In [1]:
import networkx as nx
import matplotlib.pyplot as plt

# Load Countries

In [2]:
with open('source.txt') as file:
    lines = file.readlines()

names = list(map(lambda line: line.split('\t')[1], lines))
print(f"Total number of countries: {len(names)}")

Total number of countries: 195


# Get start and end sets

In [3]:
def getEndSets(words):
    starts = {}
    finishes = {}

    for name in words:
        s = name[0].lower()
        f = name[-1].lower()
        starts[s] = starts.get(s, []) + [name]
        finishes[f] = finishes.get(f, []) + [name]

    return starts, finishes

starts, finishes = getEndSets(names)

# Build Networkx graph from countries and sets

In [4]:
def buildGraph(countries):
    G = nx.DiGraph()
    starts, finsihes = getEndSets(countries)
    
    # Add nodes
    for name in countries:
        G.add_node(name)
    
    # Add edges
    for curr in names:
        f = curr[-1].lower()
        children = starts.get(f, [])
        for child in children:
            if child != curr:
                G.add_edge(curr, child)
    
    return G

In [5]:
G = buildGraph(names)
print(G)

DiGraph with 195 nodes and 2011 edges


In [10]:
sources = [node for node, deg in G.in_degree() if deg == 0]
sinks = [node for node, deg in G.out_degree() if deg == 0]
print(f"Total sources : {len(sources)}")
print(f"Total sinks   : {len(sinks)}")

Total sources : 38
Total sinks   : 0


# Get grand-child set
This could potentially be used for pruning.

Depths guide:
1. Children
2. Grand-children
3. Great-grand-children

In [46]:
def family(graph, node, depth, familyset=None):
#     print(f"Getting familiy for: {node}")
    if depth == 0:
        return familyset
    
    if familyset == None:
        familyset = set([node])
    
    children = list(graph.successors(node))
    familyset.update(children)
    
    for child in children:
#         print(f"About to recurse to child: {child}")
        family(graph, child, depth-1, familyset=familyset)

    return familyset

In [47]:
ggrand = family(G, 'Albania', 3)
print(ggrand)
print(f"Albania has {len(ggrand)} great-grand-children")

{'Seychelles', 'Niger', 'Nigeria', 'Denmark', 'Somalia', 'Djibouti', 'Laos', 'Sao Tome and Principe', 'Russia', 'Solomon Islands', 'Angola', 'Lebanon', 'Sweden', 'Austria', 'Luxembourg', 'Saint Kitts and Nevis', 'Uruguay', 'Sierra Leone', 'United Arab Emirates', 'Slovenia', 'South Korea', 'Albania', 'Nauru', 'Syria', 'Slovakia', 'Sri Lanka', 'Libya', 'Lithuania', 'Romania', 'Saint Vincent and the Grenadines', 'Senegal', 'Singapore', 'Argentina', 'Uganda', 'Norway', 'Uzbekistan', 'Saudi Arabia', 'Nicaragua', 'Liberia', 'Andorra', 'Lesotho', 'Antigua and Barbuda', 'Spain', 'Sudan', 'Algeria', 'Namibia', 'New Zealand', 'North Korea', 'North Macedonia', 'United Kingdom', 'Liechtenstein', 'Switzerland', 'Samoa', 'San Marino', 'Saint Lucia', 'Afghanistan', 'Yemen', 'Serbia', 'South Africa', 'Rwanda', 'United States of America', 'Latvia', 'Democratic Republic of the Congo', 'Dominica', 'Netherlands', 'South Sudan', 'Dominican Republic', 'Suriname', 'Australia', 'Nepal', 'Armenia', 'Azerbaijan

In [48]:
grandChildSizes = {node: len(family(G, node, 3)) for node in names}
sortedNames = sorted(names, key=lambda n: grandChildSizes[n])
for name in sortedNames:
    print(grandChildSizes[name], name)

16 Iraq
25 Niger
26 Ecuador
26 El Salvador
26 Madagascar
26 Myanmar
26 Qatar
73 Albania
73 Algeria
73 Andorra
73 Angola
73 Antigua and Barbuda
73 Argentina
73 Armenia
73 Australia
73 Austria
73 Dominica
73 Latvia
73 Liberia
73 Libya
73 Lithuania
73 Namibia
73 Nicaragua
73 Nigeria
73 North Korea
73 North Macedonia
73 Norway
73 Romania
73 Russia
73 Rwanda
73 Saint Lucia
73 Samoa
73 Saudi Arabia
73 Serbia
73 Slovakia
73 Slovenia
73 Somalia
73 South Africa
73 South Korea
73 Sri Lanka
73 Syria
73 Uganda
73 United States of America
73 Uruguay
74 Bolivia
74 Bosnia and Herzegovina
74 Botswana
74 Bulgaria
74 Cambodia
74 Canada
74 China
74 Colombia
74 Costa Rica
74 Croatia
74 Cuba
74 Czechia
74 Democratic Republic of the Congo
74 Equatorial Guinea
74 Eritrea
74 Estonia
74 Ethiopia
74 Gambia
74 Georgia
74 Germany
74 Ghana
74 Grenada
74 Guatemala
74 Guinea
74 Guyana
74 Hungary
74 India
74 Indonesia
74 Italy
74 Jamaica
74 Kenya
74 Lesotho
74 Malaysia
74 Malta
74 Mauritania
74 Micronesia
74 Moldova


# Use great-grand-child sets as heuristic
Recursively try the first $n$ paths available as ranked by largest family size.

In [70]:
def findLong(graph, famSizes, node, path=[], longest=[], N=3):
    if len(path) > len(longest):
        longest = path.copy()

    children = list(filter(
        lambda c: c not in path, graph.successors(node)
    ))
    
    children = list(sorted(
        children,
        key=lambda n: famSizes[n],
        reverse=True
    ))[:N]
    # print(f"From {node} pruned to these: {children}")
    for child in children:
        if child not in path:
            newpath = path + [child]
            if len(newpath) > len(longest):
                longest = newpath.copy()
                print(f"new longest len: {len(longest)}")
            findLong(graph, famSizes, child, path=newpath, longest=longest, N=N)

    return longest

In [71]:
l = []
longest = findLong(G, grandChildSizes, 'Albania', longest=l, N=1)
print(longest)
print(l)

new longest len: 1
new longest len: 2
new longest len: 3
new longest len: 4
new longest len: 5
new longest len: 6
new longest len: 7
new longest len: 8
new longest len: 9
new longest len: 10
new longest len: 11
new longest len: 12
new longest len: 13
new longest len: 14
new longest len: 15
new longest len: 16
new longest len: 17
new longest len: 18
new longest len: 19
new longest len: 20
new longest len: 21
new longest len: 22
['Afghanistan']
[]


In [63]:
print(longest)
print(l)

['Afghanistan']
[]
