In [21]:
# Importing needed libraries

import pandas as pd
import networkx as nx
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [3]:
# Loading dataset
df_final = pd.read_csv("/Users/jangaydoul/Desktop/Copenhagen Business School/4. Semester :: Thesis/03_Data/csvs/df_final.csv")
df_final.head()

Unnamed: 0,BookingId,SubBookingName,Customer Name,StartRequestedDate,EndRequestedDate,FromLocation,ToLocation,FromLatitude,FromLongitude,FromCity,FromCountry,ToLatitude,ToLongitude,ToCity,ToCountry,DomesticDelivery,route_distance,FullLoadIndicator,EmptyBookingIndicator,GrossWeight,Temperature,frozen_load
0,7399574,A,2966,2020-12-31,2020-12-31,33917,32289,53.51131,-1.1254,YORKSHIRE,United Kingdom,53.60475,-0.65636,SCUNTHORPE,United Kingdom,1,46.346,1.0,0.0,0.0,,0
1,7399575,A,8183,2020-12-31,2020-12-31,9443,57716,53.50446,-2.84867,Liverpool,United Kingdom,53.04612,-2.92787,Wrexham,United Kingdom,1,78.192,1.0,0.0,0.0,,0
2,7399576,A,8183,2020-12-31,2020-12-31,9443,57716,53.50446,-2.84867,Liverpool,United Kingdom,53.04612,-2.92787,Wrexham,United Kingdom,1,78.192,1.0,0.0,0.0,,0
3,7399577,A,4737,2020-12-31,2020-12-31,5977,9831,53.77629,-1.52585,Leeds,United Kingdom,53.71874,-1.41235,YORKSHIRE,United Kingdom,1,17.733,0.0,0.0,0.0,,0
4,7399578,A,4737,2020-12-31,2020-12-31,5977,3182,53.77629,-1.52585,Leeds,United Kingdom,55.87784,-3.65235,Bathgate,United Kingdom,1,369.34,1.0,0.0,0.0,,0


In [23]:
df_final_UK = df_final[(df_final['FromCountry'] == 'United Kingdom') & (df_final['ToCountry'] == 'United Kingdom')]

len(df_final_UK)

1482429

In [24]:
# Filtering for distances < 300km 

df_final_UK_300 = df_final_UK[df_final_UK['route_distance'] < 300]

len(df_final_UK_300)

906595

In [25]:
### 1) Building the Graph for POLAND only to test (~10,000 obs)

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for index, row in df_final_UK_300.iterrows():
    from_location = row['FromLocation']
    to_location = row['ToLocation']
    route_distance = row['route_distance']
    
    G.add_edge(from_location, to_location, weight=route_distance)

# Calculate node attributes for pick up and drop off rates
pickup_counts = df_final_UK_300['FromLocation'].value_counts()
dropoff_counts = df_final_UK_300['ToLocation'].value_counts()

for node in G.nodes():
    pickups = pickup_counts.get(node, 0)
    dropoffs = dropoff_counts.get(node, 0)
    total = pickups + dropoffs
    pickup_rate = pickups / total
    dropoff_rate = dropoffs / total
    
    G.nodes[node]['pickup_rate'] = pickup_rate
    G.nodes[node]['dropoff_rate'] = dropoff_rate
    G.nodes[node]['size'] = total

# Calculate edge attributes for frequency
route_counts = df_final_UK_300.groupby(['FromLocation', 'ToLocation']).size()

for edge in G.edges():
    frequency = route_counts.get(edge, 0)
    G.edges[edge]['frequency'] = frequency

In [26]:
# Visualize the graph
pos = nx.spring_layout(G, seed=42)
node_sizes = [G.nodes[node]['size'] for node in G.nodes()]
node_colors = [G.nodes[node]['pickup_rate'] for node in G.nodes()]
edge_widths = [G.edges[edge]['frequency'] for edge in G.edges()]

plt.figure(figsize=(12, 12))
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_colors, cmap=plt.cm.coolwarm)
nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color="gray", alpha=0.5)
nx.draw_networkx_labels(G, pos, font_size=8)

plt.title("Route Network")
plt.colorbar(plt.cm.ScalarMappable(cmap=plt.cm.coolwarm, norm=plt.Normalize(0, 1)), label="Pickup Rate")
plt.axis("off")

# Save the plot to a file
plt.savefig("route_network_UK_300.jpg", format="jpg", dpi=300, bbox_inches="tight")

# Show the plot in the notebook (optional)
plt.show()

KeyboardInterrupt: 

In [30]:
# Trying folium library

import pandas as pd
import networkx as nx
import folium
from matplotlib import cm, colors

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for index, row in df_final_UK_300.iterrows():
    from_location = row['FromLocation']
    to_location = row['ToLocation']
    route_distance = row['route_distance']
    from_lat = row['FromLatitude']
    from_lon = row['FromLongitude']
    to_lat = row['ToLatitude']
    to_lon = row['ToLongitude']

    G.add_node(from_location, latitude=from_lat, longitude=from_lon)
    G.add_node(to_location, latitude=to_lat, longitude=to_lon)
    G.add_edge(from_location, to_location, weight=route_distance)

# Calculate node attributes for pick up and drop off rates
pickup_counts = df_final_UK_300['FromLocation'].value_counts()
dropoff_counts = df_final_UK_300['ToLocation'].value_counts()

for node in G.nodes():
    pickups = pickup_counts.get(node, 0)
    dropoffs = dropoff_counts.get(node, 0)
    total = pickups + dropoffs
    pickup_rate = pickups / total
    dropoff_rate = dropoffs / total
    
    G.nodes[node]['pickup_rate'] = pickup_rate
    G.nodes[node]['dropoff_rate'] = dropoff_rate
    G.nodes[node]['size'] = int(total)

# 1) Fix the node coloring
def get_node_color(pickup_rate, dropoff_rate):
    pickup_colormap = cm.get_cmap('Blues')
    dropoff_colormap = cm.get_cmap('Reds')

    pickup_color = colors.to_rgba(pickup_colormap(pickup_rate))
    dropoff_color = colors.to_rgba(dropoff_colormap(dropoff_rate))

    blended_color = [max(pickup_color[i], dropoff_color[i]) for i in range(4)]

    return colors.to_hex(blended_color)

# Create a map centered at the average latitude and longitude of the locations
map_center = df_final_UK_300[['FromLatitude', 'FromLongitude']].mean().tolist()
m = folium.Map(location=map_center, zoom_start=5)

# 2) Change node size based on the 'total' attribute
for node in G.nodes():
    latitude = G.nodes[node]['latitude']
    longitude = G.nodes[node]['longitude']
    pickup_rate = G.nodes[node]['pickup_rate']
    dropoff_rate = G.nodes[node]['dropoff_rate']
    node_size = G.nodes[node]['size']
    node_color = get_node_color(pickup_rate, dropoff_rate)

    folium.CircleMarker(
        location=[latitude, longitude],
        radius=node_size,  # Use 'size' attribute for the radius
        color=node_color,
        fill=True,
        fill_color=node_color,
        fill_opacity=0.7,
        popup=f"{node}: {pickup_rate:.2f}"
    ).add_to(m)


# Save the map to an HTML file
m.save('map_uk_300.html')


