In [76]:
# Import necessary libraries
import pandas as pd
import numpy as np
import networkx as nx

In [77]:
# Load the cleaned dataset
file_path = "cleaned_cities.csv"
df = pd.read_csv(file_path)

# View the first few rows of the dataset
print(df.head())

  Country              City        AccentCity Region  Population   Latitude  \
0      ad  andorra la vella  Andorra la Vella     07     20430.0  42.500000   
1      ad           canillo           Canillo     02      3292.0  42.566667   
2      ad            encamp            Encamp     03     11224.0  42.533333   
3      ad        la massana        La Massana     04      7211.0  42.550000   
4      ad      les escaldes      Les Escaldes     08     15854.0  42.500000   

   Longitude  
0   1.516667  
1   1.600000  
2   1.583333  
3   1.516667  
4   1.533333  


In [78]:
# Print list of available cities
print("Available cities:", df["City"].unique())

Available cities: ['andorra la vella' 'canillo' 'encamp' ... 'shurugwi' 'victoria falls'
 'zvishavane']


In [79]:
# Create a graph using NetworkX
G = nx.Graph()

# Add cities as nodes to the graph
for _, row in df.iterrows():
    G.add_node(row["City"], 
               lat=row["Latitude"], 
               lon=row["Longitude"], 
               population=row["Population"], 
               country=row["Country"])

# Print the number of nodes in the graph
print(f"Number of nodes in the graph: {G.number_of_nodes()}")


Number of nodes in the graph: 43182


In [None]:
# Function to calculate travel time
def calculate_travel_time(city1, city2, G):
    """
    Calculate the travel time between two cities.
    """
    data1, data2 = G.nodes[city1], G.nodes[city2]
    
    # Base travel time based on proximity
    distance = np.sqrt((data1["lat"] - data2["lat"])**2 + (data1["lon"] - data2["lon"])**2)
    base_time = 2 if distance < 1 else 4 if distance < 3 else 8
    
    # Additional conditions
    extra_time = 0
    if data1["country"] != data2["country"]:
        extra_time += 2
    if data2["population"] > 200000:
        extra_time += 2
    
    return base_time + extra_time


In [81]:
city1 = "london"
city2 = "moscow"

# Calculate and print the travel time between the two cities
travel_time = calculate_travel_time(city1, city2, G)
print(f"Travel time from {city1} to {city2}: {travel_time} hours")


Travel time from london to moscow: 10 hours


In [None]:
from scipy.spatial import KDTree
import numpy as np

# Extract coordinates and build KDTree
coords = np.array([(G.nodes[city]["lat"], G.nodes[city]["lon"]) for city in G.nodes])
city_names = list(G.nodes)
tree = KDTree(coords)

# Add edges using KDTree
for i, city1 in enumerate(city_names):
    distances, indices = tree.query(coords[i], k=4)  # Find 3 nearest neighbors
    for j in range(1, 4):  # Skip the city itself (index 0)
        city2 = city_names[indices[j]]
        travel_time = calculate_travel_time(city1, city2, G)
        G.add_edge(city1, city2, weight=travel_time)

# Print the number of edges in the graph
print(f"Number of edges in the graph: {G.number_of_edges()}")


Number of edges in the graph: 82874


In [83]:
import pickle

# Save graph
with open("city_graph.gpickle", "wb") as f:
    pickle.dump(G, f)

print("Graph saved to 'city_graph.gpickle'.")


Graph saved to 'city_graph.gpickle'.


In [84]:
# Load the graph
with open("city_graph.gpickle", "rb") as f:
    G = pickle.load(f)

# Check the graph
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print("Nodes:", list(G.nodes)[:10])  # Print first 10 nodes
print("Edges:", list(G.edges(data=True))[:10])  # Print first 10 edges with weights


Number of nodes: 43182
Number of edges: 82874
Nodes: ['andorra la vella', 'canillo', 'encamp', 'la massana', 'les escaldes', 'ordino', 'sant julia de loria', 'abu dhabi', 'dubai', 'sharjah']
Edges: [('andorra la vella', 'les escaldes', {'weight': 2}), ('andorra la vella', 'sant julia de loria', {'weight': 2}), ('andorra la vella', 'la massana', {'weight': 2}), ('andorra la vella', 'ordino', {'weight': 2}), ('canillo', 'encamp', {'weight': 2}), ('canillo', 'ordino', {'weight': 2}), ('canillo', 'la massana', {'weight': 2}), ('canillo', 'foix', {'weight': 4}), ('encamp', 'ordino', {'weight': 2}), ('encamp', 'les escaldes', {'weight': 2})]
