In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import graphviz_layout
import pygraphviz as pgv

Matplotlib is building the font cache; this may take a moment.


In [10]:
countrys = pd.read_csv('../../data/countries.csv', header=0)
cities = pd.read_csv('../../data/cities.csv', header=0)

# Merge the Country and City columns into a single DataFrame
combined = pd.concat([countrys[['Country', 'StartsWith', 'EndsWith']].rename(columns={'Country': 'Name'}),
                      cities[['City', 'StartsWith', 'EndsWith']].rename(columns={'City': 'Name'})],
                     ignore_index=True)

In [11]:
combined

Unnamed: 0,Name,StartsWith,EndsWith
0,Afghanistan,a,n
1,Albania,a,a
2,Algeria,a,a
3,Andorra,a,a
4,Angola,a,a
...,...,...,...
690,Goyang-si,g,i
691,Yulin,y,n
692,Jodhpur,j,r
693,Gwalior,g,r


In [12]:
# Create directed graphs for countries, cities, and combined data

countrys_network = nx.DiGraph()
cities_network = nx.DiGraph()
combined_network = nx.DiGraph()

In [14]:
# Build edges based on matching start and end letters

for alphabet in 'abcdefghijklmnopqrstuvwxyz':
    countrys_end = countrys[countrys['EndsWith'] == alphabet]
    countrys_start = countrys[countrys['StartsWith'] == alphabet]
    
    for _, end_row in countrys_end.iterrows():
        for _, start_row in countrys_start.iterrows():
            if start_row['Country'] != end_row['Country']:
                countrys_network.add_edge(end_row['Country'], start_row['Country'])
    
    cities_end = cities[cities['EndsWith'] == alphabet]
    cities_start = cities[cities['StartsWith'] == alphabet]

    for _, end_row in cities_end.iterrows():
        for _, start_row in cities_start.iterrows():
            if start_row['City'] != end_row['City']:
                cities_network.add_edge(end_row['City'], start_row['City'])

    combined_end = combined[combined['EndsWith'] == alphabet]
    combined_start = combined[combined['StartsWith'] == alphabet]
    
    for _, end_row in combined_end.iterrows():
        for _, start_row in combined_start.iterrows():
            if start_row['Name'] != end_row['Name']:
                combined_network.add_edge(end_row['Name'], start_row['Name'])

In [15]:
# Save the networks as dot files for simplicity
nx.nx_agraph.write_dot(countrys_network, 'dot/countrys_network.dot')
nx.nx_agraph.write_dot(cities_network, 'dot/cities_network.dot')
nx.nx_agraph.write_dot(combined_network, 'dot/combined_network.dot')

# Save the networks as GrapML files for compatibility
nx.write_graphml(countrys_network, 'graphml/countrys_network.graphml')
nx.write_graphml(cities_network, 'graphml/cities_network.graphml')
nx.write_graphml(combined_network, 'graphml/combined_network.graphml')

***