In [1]:
import pandas as pd
import networkx as nx

In [5]:
countries = pd.read_csv('../../data/countries.csv', header=0)
cities = pd.read_csv('../../data/cities.csv', header=0)

# Merge the Country and City columns into a single DataFrame
combined = pd.concat([countries[['Country', 'StartsWith', 'EndsWith']].rename(columns={'Country': 'Name'}),
                      cities[['City', 'StartsWith', 'EndsWith']].rename(columns={'City': 'Name'})],
                     ignore_index=True)

In [6]:
countries

Unnamed: 0,Country,StartsWith,EndsWith
0,Afghanistan,a,n
1,Albania,a,a
2,Algeria,a,a
3,Andorra,a,a
4,Angola,a,a
...,...,...,...
190,Venezuela,v,a
191,Vietnam,v,m
192,Yemen,y,n
193,Zambia,z,a


In [7]:
# Create directed graphs for countries, cities, and combined data

countries_network = nx.DiGraph()
cities_network = nx.DiGraph()
combined_network = nx.DiGraph()

In [8]:
# Add nodes to the graphs
countries_network.add_nodes_from(countries['Country'])
cities_network.add_nodes_from(cities['City'])
combined_network.add_nodes_from(combined['Name'])

In [10]:
# Build edges based on matching start and end letters

for alphabet in 'abcdefghijklmnopqrstuvwxyz':
    countries_end = countries[countries['EndsWith'] == alphabet]
    countries_start = countries[countries['StartsWith'] == alphabet]
    
    for _, end_row in countries_end.iterrows():
        for _, start_row in countries_start.iterrows():
            if start_row['Country'] != end_row['Country']:
                countries_network.add_edge(end_row['Country'], start_row['Country'])
    
    cities_end = cities[cities['EndsWith'] == alphabet]
    cities_start = cities[cities['StartsWith'] == alphabet]

    for _, end_row in cities_end.iterrows():
        for _, start_row in cities_start.iterrows():
            if start_row['City'] != end_row['City']:
                cities_network.add_edge(end_row['City'], start_row['City'])

    combined_end = combined[combined['EndsWith'] == alphabet]
    combined_start = combined[combined['StartsWith'] == alphabet]
    
    for _, end_row in combined_end.iterrows():
        for _, start_row in combined_start.iterrows():
            if start_row['Name'] != end_row['Name']:
                combined_network.add_edge(end_row['Name'], start_row['Name'])

In [11]:
# Add labels to nodes
nx.set_node_attributes(countries_network, {node: node for node in countries_network.nodes()}, 'label')
nx.set_node_attributes(cities_network, {node: node for node in cities_network.nodes()}, 'label')
nx.set_node_attributes(combined_network, {node: node for node in combined_network.nodes()}, 'label') 

In [15]:
# Add startswith and endswith attributes to nodes
for _, row in countries.iterrows():
    countries_network.nodes[row['Country']]['StartsWith'] = row['StartsWith']
    countries_network.nodes[row['Country']]['EndsWith'] = row['EndsWith']

for _, row in cities.iterrows():
    cities_network.nodes[row['City']]['StartsWith'] = row['StartsWith']
    cities_network.nodes[row['City']]['EndsWith'] = row['EndsWith']

for _, row in combined.iterrows():
    combined_network.nodes[row['Name']]['StartsWith'] = row['StartsWith']
    combined_network.nodes[row['Name']]['EndsWith'] = row['EndsWith']

In [16]:
# Save the networks as dot files for simplicity
nx.nx_agraph.write_dot(countries_network, 'dot/countries_network.dot')
nx.nx_agraph.write_dot(cities_network, 'dot/cities_network.dot')
nx.nx_agraph.write_dot(combined_network, 'dot/combined_network.dot')

# Save the networks as GrapML files for compatibility
nx.write_graphml(countries_network, 'graphml/countries_network.graphml')
nx.write_graphml(cities_network, 'graphml/cities_network.graphml')
nx.write_graphml(combined_network, 'graphml/combined_network.graphml')

## Letter Networks

In [21]:
# Create directed graphs for countries, cities, and combined data

countries_letter_net = nx.DiGraph()
cities_letter_net = nx.DiGraph()
combined_letter_net = nx.DiGraph()

In [None]:
for _, row in countries.iterrows():
    countries_letter_net.add_edge(row['StartsWith'], row['EndsWith'])

for _, row in cities.iterrows():
    cities_letter_net.add_edge(row['StartsWith'], row['EndsWith'])

for _, row in combined.iterrows():
    combined_letter_net.add_edge(row['StartsWith'], row['EndsWith'])

In [32]:
nx.nx_agraph.write_dot(countries_letter_net, "dot/countries_letter_net.dot")
nx.nx_agraph.write_dot(cities_letter_net, "dot/cities_letter_net.dot")
nx.nx_agraph.write_dot(combined_letter_net, "dot/combined_letter_net.dot")

nx.write_graphml(countries_letter_net, "graphml/countries_letter_net.graphml")
nx.write_graphml(cities_letter_net, "graphml/cities_letter_net.graphml")
nx.write_graphml(combined_letter_net, "graphml/combined_letter_net.graphml")



***