In [21]:
# Importing needed libraries

import pandas as pd
import networkx as nx
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

In [3]:
# Loading dataset
df_final = pd.read_csv("/Users/jangaydoul/Desktop/Copenhagen Business School/4. Semester :: Thesis/03_Data/csvs/df_final.csv")
df_final.head()

Unnamed: 0,BookingId,SubBookingName,Customer Name,StartRequestedDate,EndRequestedDate,FromLocation,ToLocation,FromLatitude,FromLongitude,FromCity,FromCountry,ToLatitude,ToLongitude,ToCity,ToCountry,DomesticDelivery,route_distance,FullLoadIndicator,EmptyBookingIndicator,GrossWeight,Temperature,frozen_load
0,7399574,A,2966,2020-12-31,2020-12-31,33917,32289,53.51131,-1.1254,YORKSHIRE,United Kingdom,53.60475,-0.65636,SCUNTHORPE,United Kingdom,1,46.346,1.0,0.0,0.0,,0
1,7399575,A,8183,2020-12-31,2020-12-31,9443,57716,53.50446,-2.84867,Liverpool,United Kingdom,53.04612,-2.92787,Wrexham,United Kingdom,1,78.192,1.0,0.0,0.0,,0
2,7399576,A,8183,2020-12-31,2020-12-31,9443,57716,53.50446,-2.84867,Liverpool,United Kingdom,53.04612,-2.92787,Wrexham,United Kingdom,1,78.192,1.0,0.0,0.0,,0
3,7399577,A,4737,2020-12-31,2020-12-31,5977,9831,53.77629,-1.52585,Leeds,United Kingdom,53.71874,-1.41235,YORKSHIRE,United Kingdom,1,17.733,0.0,0.0,0.0,,0
4,7399578,A,4737,2020-12-31,2020-12-31,5977,3182,53.77629,-1.52585,Leeds,United Kingdom,55.87784,-3.65235,Bathgate,United Kingdom,1,369.34,1.0,0.0,0.0,,0


In [23]:
df_final_UK = df_final[(df_final['FromCountry'] == 'United Kingdom') & (df_final['ToCountry'] == 'United Kingdom')]

len(df_final_UK)

1482429

In [24]:
# Filtering for distances < 300km 

df_final_UK_300 = df_final_UK[df_final_UK['route_distance'] < 300]

len(df_final_UK_300)

906595

In [58]:
# Trying folium library

import pandas as pd
import networkx as nx
import folium


# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for index, row in df_final_UK_300.iterrows():
    from_location = row['FromLocation']
    to_location = row['ToLocation']
    route_distance = row['route_distance']
    from_lat = row['FromLatitude']
    from_lon = row['FromLongitude']
    to_lat = row['ToLatitude']
    to_lon = row['ToLongitude']

    G.add_node(from_location, latitude=from_lat, longitude=from_lon)
    G.add_node(to_location, latitude=to_lat, longitude=to_lon)
    G.add_edge(from_location, to_location, weight=route_distance)

# Calculate node attributes for pick up and drop off rates
pickup_counts = df_final_UK_300['FromLocation'].value_counts()
dropoff_counts = df_final_UK_300['ToLocation'].value_counts()

for node in G.nodes():
    pickups = pickup_counts.get(node, 0)
    dropoffs = dropoff_counts.get(node, 0)
    total = pickups + dropoffs
    pickup_rate = pickups / total
    dropoff_rate = dropoffs / total
    
    G.nodes[node]['pickup_rate'] = pickup_rate
    G.nodes[node]['dropoff_rate'] = dropoff_rate
    G.nodes[node]['size'] = total

# Create a function to map node colors based on pickup rates
def get_node_color(pickup_rate, dropoff_rate):
    pickup_colormap = cm.get_cmap('Blues')
    dropoff_colormap = cm.get_cmap('Reds')

    pickup_color = colors.to_rgba(pickup_colormap(pickup_rate))
    dropoff_color = colors.to_rgba(dropoff_colormap(dropoff_rate))

    blended_color = [max(pickup_color[i], dropoff_color[i]) for i in range(4)]

    return colors.to_hex(blended_color)


# Create a map centered at the average latitude and longitude of the locations
map_center = df_final_UK_300[['FromLatitude', 'FromLongitude']].mean().tolist()
m = folium.Map(location=map_center, zoom_start=5, tiles="Stamen Terrain")

# 1. Count the number of times each route appears in the data and store it in a dictionary.
route_counts = df_final_UK_300.groupby(['FromLocation', 'ToLocation']).size().to_dict()

# Calculate the top 10% most used routes threshold
threshold = int(len(route_counts) * 0.1)
top_routes = df_final_UK_300.groupby(['FromLocation', 'ToLocation']).size().nlargest(threshold).index.tolist()

# Create a set to store nodes that are part of the top 10% routes
top_route_nodes = {node for route in top_routes for node in route}

# Create a function to map node colors based on pickup rates
def get_node_color(pickup_rate, dropoff_rate):
    pickup_colormap = cm.get_cmap('Blues')
    dropoff_colormap = cm.get_cmap('Reds')

    blended_rate = 0.7 * pickup_rate + 0.3 * dropoff_rate

    pickup_color = colors.to_rgba(pickup_colormap(blended_rate))
    dropoff_color = colors.to_rgba(dropoff_colormap(blended_rate))

    blended_color = [max(pickup_color[i], dropoff_color[i]) for i in range(3)]
    blended_color.append(0.7)

    return colors.to_hex(blended_color)

# Combined loop
for node in G.nodes():
    latitude = G.nodes[node]['latitude']
    longitude = G.nodes[node]['longitude']
    pickup_rate = G.nodes[node]['pickup_rate']
    dropoff_rate = G.nodes[node]['dropoff_rate']
    node_color = get_node_color(pickup_rate, dropoff_rate)

    if node in top_route_nodes:
        folium.CircleMarker(
            location=[latitude, longitude],
            radius=0.25,
            color=node_color,
            fill=True,
            fill_color=node_color,
            fill_opacity=0.7,
            popup=f"{node}: {pickup_rate:.2f}"
        ).add_to(m)

    for _, to_node in G.out_edges(node):
        edge_weight = route_counts.get((node, to_node), 0)
        edge_weight_threshold = 10 # set the minimum number of appearances for an edge to be considered "frequent"
    if edge_weight > edge_weight_threshold:
        edge_color = '#444444'
    else:
        edge_color = '#CCCCCC'

    if (node, to_node) in top_routes:
        to_location = [G.nodes[to_node]['latitude'], G.nodes[to_node]['longitude']]
        folium.PolyLine(
            locations=[[latitude, longitude], to_location],
            color=edge_color,
            weight=1,
            opacity=0.5
        ).add_to(m)

# Save the map to an HTML file
m.save('map_uk_300.html')


