Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add documentation, diff table headers, implement #324, and refactor logic into function #325

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
5 changes: 3 additions & 2 deletions cmd/gedcom/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ func runDiffCommand() {
"The Google Analytics ID, like 'UA-78454410-2'.")

flag.BoolVar(&optionProgress, "progress", false, "Show progress bar.")
//No reference because it is currently being accessed in SimpleNode.Equals(), so the only way to pass it to Equals() would be to pass it in every single SimpleNode.
flag.Bool("ancestry-source-matching", false, "Match Ancestry.com sources by Ancestry Source ID (_APID) instead of default matching algorithm (GEDCOM SOUR).")

flag.IntVar(&optionJobs, "jobs", 1, util.CLIDescription(`Number of jobs to run in
parallel. If you are comparing large trees this will make the process
Expand Down Expand Up @@ -279,8 +281,7 @@ func runDiffCommand() {
diffProgress := make(chan gedcom.Progress)

page := html.NewDiffPage(comparisons, filterFlags, optionGoogleAnalyticsID,
optionShow, optionSort, diffProgress, compareOptions, html.LivingVisibilityShow)

optionShow, optionSort, diffProgress, compareOptions, html.LivingVisibilityShow, optionLeftGedcomFile, optionRightGedcomFile)
go func() {
_, err = page.WriteHTMLTo(out)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions html/core/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func (c *Page) WriteHTMLTo(w io.Writer) (int64, error) {
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/octicons/4.4.0/font/octicons.css"/>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.1/js/bootstrap.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<div class="container">`)
Expand Down
2 changes: 1 addition & 1 deletion html/core/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func NewTable(tableClass string, content ...Component) *Table {
}

func (c *Table) WriteHTMLTo(w io.Writer) (int64, error) {
n := appendSprintf(w, `<table class="table %s">`, c.tableClass)
n := appendSprintf(w, `<table class="table table-responsive %s">`, c.tableClass)
n += appendComponent(w, NewComponents(c.content...))
n += appendString(w, "</table>")

Expand Down
80 changes: 57 additions & 23 deletions html/diff_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ type DiffPage struct {
progress chan gedcom.Progress
compareOptions *gedcom.IndividualNodesCompareOptions
visibility LivingVisibility
leftGedcomPath string
rightGedcomPath string
}

func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.FilterFlags, googleAnalyticsID string, show, sort string, progress chan gedcom.Progress, compareOptions *gedcom.IndividualNodesCompareOptions, visibility LivingVisibility) *DiffPage {
func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.FilterFlags, googleAnalyticsID string, show, sort string, progress chan gedcom.Progress, compareOptions *gedcom.IndividualNodesCompareOptions, visibility LivingVisibility, leftGedcomPath string, rightGedcomPath string) *DiffPage {
return &DiffPage{
comparisons: comparisons,
filterFlags: filterFlags,
Expand All @@ -45,6 +47,8 @@ func NewDiffPage(comparisons gedcom.IndividualComparisons, filterFlags *gedcom.F
progress: progress,
compareOptions: compareOptions,
visibility: visibility,
leftGedcomPath: leftGedcomPath,
rightGedcomPath: rightGedcomPath,
}
}

Expand Down Expand Up @@ -178,51 +182,63 @@ func (c *DiffPage) WriteHTMLTo(w io.Writer) (int64, error) {
}

// The index at the top of the page.
rows := []core.Component{}
var rows []core.Component
numOnlyLeft := 0
numOnlyRight := 0
numSimilar := 0
numEqual := 0
for _, comparison := range precalculatedComparisons {
weightedSimilarity := c.weightedSimilarity(comparison.comparison)

leftClass := ""
rightClass := ""

switch {
case comparison.comparison.Left != nil && comparison.comparison.Right == nil:
case comparison.comparison.Left != nil && comparison.comparison.Right == nil: //right is missing
leftClass = "bg-warning"
numOnlyLeft++

case comparison.comparison.Left == nil && comparison.comparison.Right != nil:
case comparison.comparison.Left == nil && comparison.comparison.Right != nil: //left is missing
rightClass = "bg-primary"
numOnlyRight++

case weightedSimilarity < 1:
case weightedSimilarity < 1: //neither are missing, but they aren't identical
leftClass = "bg-info"
rightClass = "bg-info"
numSimilar++

case c.filterFlags.HideEqual:
case c.filterFlags.HideEqual: //are identical, but user said to hide equals
numEqual++
continue
default:
numEqual++
}
rows = append(rows, c.getRow(comparison, leftClass, rightClass, weightedSimilarity))
}

leftNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Left, c.visibility, "")
rightNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Right, c.visibility, "")

left := core.NewTableCell(leftNameAndDates).Class(leftClass)
right := core.NewTableCell(rightNameAndDates).Class(rightClass)

middle := core.NewTableCell(core.NewText(""))
if weightedSimilarity != 0 {
similarityString := fmt.Sprintf("%.2f%%", weightedSimilarity*100)
middle = core.NewTableCell(core.NewText(similarityString)).
Class("text-center " + leftClass)
}

tableRow := core.NewTableRow(left, middle, right)

rows = append(rows, tableRow)
leftHeader := fmt.Sprint(c.leftGedcomPath, " (", numOnlyLeft, " only in left)")
rightHeader := fmt.Sprint(c.rightGedcomPath, " (", numOnlyRight, " only in right)")
class := "text-center"
attr := map[string]string{}
headerTag := "h5"
wereHidden := ""
if c.filterFlags.HideEqual {
wereHidden = " - were hidden"
}
middleHeader := fmt.Sprint("Similarity score", " (", numSimilar, " similar, and ", numEqual, " equal", wereHidden, ")")
header := []core.Component{core.NewTableRow(
core.NewTableCell(
core.NewTag(headerTag, attr, core.NewText(leftHeader))).Class(class),
core.NewTableCell(
core.NewTag(headerTag, attr, core.NewText(middleHeader))).Class(class),
core.NewTableCell(
core.NewTag(headerTag, attr, core.NewText(rightHeader))).Class(class))}

// Individual pages
components := []core.Component{
core.NewSpace(),
core.NewCard(core.NewText("Individuals"), core.CardNoBadgeCount,
core.NewTable("", rows...)),
core.NewTable("", append(header, rows...)...)),
core.NewSpace(),
}
for _, comparison := range precalculatedComparisons {
Expand All @@ -236,6 +252,24 @@ func (c *DiffPage) WriteHTMLTo(w io.Writer) (int64, error) {
).WriteHTMLTo(w)
}

func (c *DiffPage) getRow(comparison *IndividualCompare, leftClass string, rightClass string, weightedSimilarity float64) *core.TableRow {

leftNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Left, c.visibility, "")
rightNameAndDates := NewIndividualNameAndDatesLink(comparison.comparison.Right, c.visibility, "")

left := core.NewTableCell(leftNameAndDates).Class(leftClass)
right := core.NewTableCell(rightNameAndDates).Class(rightClass)

middle := core.NewTableCell(core.NewText(""))
if weightedSimilarity != 0 {
similarityString := fmt.Sprintf("%.2f%%", weightedSimilarity*100)
middle = core.NewTableCell(core.NewText(similarityString)).
Class("text-center " + leftClass)
}

return core.NewTableRow(left, middle, right)
}

func (c *DiffPage) shouldSkip(comparison *IndividualCompare) bool {
switch c.show {
case DiffPageShowAll:
Expand Down
2 changes: 1 addition & 1 deletion html/diff_page_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestDiffPage_WriteHTMLTo(t *testing.T) {
compareOptions := gedcom.NewIndividualNodesCompareOptions()
component := html.NewDiffPage(comparisons, filterFlags, googleAnalyticsID,
html.DiffPageShowAll, html.DiffPageSortHighestSimilarity, nil,
compareOptions, html.LivingVisibilityPlaceholder)
compareOptions, html.LivingVisibilityPlaceholder, "left", "right")

buf := bytes.NewBuffer(nil)
component.WriteHTMLTo(buf)
Expand Down
4 changes: 1 addition & 3 deletions node_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ type NodeDiff struct {
// R 2 DATE Aft. 2001
// R 2 PLAC Surry, England
// R 1 NAME J. /Smith/
//
func CompareNodes(left, right Node) *NodeDiff {
result := &NodeDiff{}

Expand Down Expand Up @@ -263,7 +262,6 @@ func (nd *NodeDiff) String() string {
// LR 1 DEAT | true
// LR 2 PLAC England | true
// R 1 NAME J. /Smith/ | false
//
func (nd *NodeDiff) IsDeepEqual() bool {
leftIsNil := IsNil(nd.Left)
rightIsNil := IsNil(nd.Right)
Expand Down Expand Up @@ -345,7 +343,7 @@ func (nd *NodeDiff) LeftNode() Node {

// RightNode returns the flattening Node value that favors the right side.
//
// To favor means to return the Left value when both the Left and Right are set.
// To favor means to return the Right value when both the Left and Right are set.
func (nd *NodeDiff) RightNode() Node {
n := nd.Right

Expand Down
19 changes: 18 additions & 1 deletion simple_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package gedcom
import (
"bytes"
"encoding/json"
"flag"
"fmt"
"sync"
)
Expand Down Expand Up @@ -61,7 +62,7 @@ func (node *SimpleNode) Identifier() string {
if node == nil {
return ""
}

return fmt.Sprintf("@%s@", node.pointer)
}

Expand All @@ -88,6 +89,22 @@ func (node *SimpleNode) Equals(node2 Node) bool {
return false
}

useAncestrySourceMatching := flag.Lookup("ancestry-source-matching").Value.String() //indexes a map CommandLine.formal
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot lookup flags like this. While this might work when using it through the CLI, this can also be used a library. It's also just a leaky abstraction to write it this way. You have three choices:

  1. Make the extra comparison logic permanent. I'm fine with that - but there are still caveats to consider below.
  2. If you want/need to control the logic you will need to provide that through another Equals method with options, for example:
type SimpleNodeEqualsOptions struct {
    // UseAncestrySourceMatching ...
    UseAncestrySourceMatching bool
}

func (node *SimpleNode) Equals(node2 Node) bool {
    return node.EqualsOptions(node2, SimpleNodeEqualsOptions{})
}

func (node *SimpleNode) EqualsOptions(node2 Node, o SimpleNodeEqualsOptions) bool {
    // ...

    if o.UseAncestrySourceMatching && node.Tag().String() == "Source" && tag.String() == "Source" {
    }
}
  1. This is probably the best option for you, use a FilterFunction (
    type FilterFunction func(node Node) (newNode Node, traverseChildren bool)
    ) with a CLI argument.

SimpleNode.Equals only will only apply to nodes that do not have a custom type with an Equals method of their own. This is an important consideration because this is generic logic that you may want to apply to all nodes on top of any logic.

If you want this to apply globally (and be controlled from the CLI) you might find it easier to do preprocessing first instead. That is, based on a CLI flag, you preprocess the files so when the comparison happens the logic has already been normalized.

We do in this already using FilterFunctions. For example the -only-official CLI flag (https://github.com/elliotchance/gedcom/blob/master/filter_flags.go#L57-L58) triggers the files to remove non-official GEDCOM tags so they don't exist for the comparison:

func OfficialTagFilter() FilterFunction {
. That file also contains some other filters examples.

//if both Ancestry sources, only check if their _APID is the same
if useAncestrySourceMatching == "true" && node.Tag().String() == "Source" && tag.String() == "Source" {
if node.Value() == node2.Value() { //if they have the same source id, then no need to check the apid
return true
}
for _, leftNode := range node.Nodes() {
for _, rightNode := range node2.Nodes() {
if leftNode.Tag().String() == "_APID" &&
rightNode.Tag().String() == "_APID" &&
rightNode.Value() == leftNode.Value() {
return true
}
}
}
}
value := node2.Value()
if node.value != value {
return false
Expand Down
45 changes: 45 additions & 0 deletions simple_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,51 @@ func TestSimpleNode_Equals(t *testing.T) {
}
}

func TestAncestryNode_Equals(t *testing.T) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This test name implies that there is a type called AncestryNode with a method called Equals. So, its easy to miss when jumping between production code and tests. It's best that you move this test to a subtest to the function above:

func TestSimpleNode_Equals(t *testing.T) {
  // ...

  t.Run("AncestryNode", func(t *testing.T) {
	//test when source ids are the same
	original := GetAncestryIndividual("@S291470533@", "@S291470520@", "@S291470520@", "@S291470520@", "@S291470520@")
	assert.True(t, gedcom.DeepEqualNodes(gedcom.newDocumentFromString(original).Nodes(), gedcom.newDocumentFromString(original).Nodes()))

	//test when source ids are different, but _apid stays the same
	left := gedcom.newDocumentFromString(GetAncestryIndividual("@S222222222@", "@S444444444@", "@S666666666@", "@S888888888@", "@S111111111@"))
	right := gedcom.newDocumentFromString(GetAncestryIndividual("@S333333333@", "@S555555555@", "@S777777777@", "@S999999999@", "@S000000000@"))
	assert.True(t, gedcom.DeepEqualNodes(left.Nodes(), right.Nodes()))
  })
}

//test when source ids are the same
original := GetAncestryIndividual("@S291470533@", "@S291470520@", "@S291470520@", "@S291470520@", "@S291470520@")
assert.True(t, gedcom.DeepEqualNodes(gedcom.newDocumentFromString(original).Nodes(), gedcom.newDocumentFromString(original).Nodes()))

//test when source ids are different, but _apid stays the same
left := gedcom.newDocumentFromString(GetAncestryIndividual("@S222222222@", "@S444444444@", "@S666666666@", "@S888888888@", "@S111111111@"))
right := gedcom.newDocumentFromString(GetAncestryIndividual("@S333333333@", "@S555555555@", "@S777777777@", "@S999999999@", "@S000000000@"))
assert.True(t, gedcom.DeepEqualNodes(left.Nodes(), right.Nodes()))
}

// This is an actual gedcom entry, hope that's ok.
func GetAncestryIndividual(source1 string, source2 string, source3 string, source4 string, source5 string) string {
return "0 @I152151456706@ INDI" +
"\n1 NAME Jacob /Yourow/" +
"\n2 GIVN Jacob" +
"\n2 SURN Yourow" +
"\n2 SOUR " + source1 +
"\n3 PAGE New York City Municipal Archives; New York, New York; Borough: Manhattan; Volume Number: 13" +
"\n3 _APID 1,61406::6159341" +
"\n2 SOUR " + source2 +
"\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" +
"\n3 _APID 1,6224::30826480" +
"\n1 SEX M" +
"\n1 FAMS @F89@" +
"\n1 BIRT" +
"\n2 DATE abt 1888" +
"\n2 PLAC Russia" +
"\n2 SOUR " + source3 +
"\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" +
"\n3 _APID 1,6224::30826480" +
"\n1 EVEN" +
"\n2 TYPE Arrival" +
"\n2 DATE 1905" +
"\n2 SOUR " + source4 +
"\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" +
"\n3 _APID 1,6224::30826480" +
"\n1 RESI Marital Status: Married; Relation to Head: Head" +
"\n2 DATE 1930" +
"\n2 PLAC Bronx, Bronx, New York, USA" +
"\n2 SOUR " + source5 +
"\n3 PAGE Year: 1930; Census Place: Bronx, Bronx, New York; Page: 42A; Enumeration District: 0430; FHL microfilm: 2341213" +
"\n3 _APID 1,6224::30826480"
}

func TestSimpleNode_Tag(t *testing.T) {
Tag := tf.Function(t, (*gedcom.SimpleNode).Tag)

Expand Down