In [None]:
from sklearn.preprocessing import MinMaxScaler
from tqdm.notebook import tqdm
import networkx as nx
import pandas as pd
import numpy as np
from tabulate import tabulate
import folium

In [None]:
files = []

for j in range(2020, 2023):
    for i in range(12):
        files.append("traffic_density_{}".format(str(j) + str(i+1).zfill(2)))
        

for i in range(4):
    files.append("traffic_density_{}".format(str(2023) + str(i+1).zfill(2)))
        
files

In [None]:
node_conservation = []

for file in tqdm(files):
    
    node_loc = {}
        
    traffic_data = pd.read_csv(file + ".csv", encoding = "utf-8")
    G = nx.read_gexf(file + ".gexf")

    actual = G.number_of_nodes()
    largest_component = max(nx.connected_components(G), key=len)
    temp = G.subgraph(largest_component)
    G_l = nx.Graph(temp)
    
    traffic_data = traffic_data[["GEOHASH", "LATITUDE", "LONGITUDE"]].set_index("GEOHASH")
    
    for row in traffic_data.iterrows():
        if row[0] not in node_loc:
            node_loc[row[0]] = {"LAT": row[1]["LONGITUDE"], "LONG": row[1]["LATITUDE"]}

    nx.set_node_attributes(G_l, node_loc)
    largest = G_l.number_of_nodes()
    
    node_conservation.append(largest/actual)
    
    nx.write_gexf(G_l, file + "_largest.gexf")

    data = []
    
    for edge in list(G.edges(data = True)):
        mult = 1
        if edge[2]["isNegative"]:
            mult = -1
        data.append([
            edge[0], edge[1], edge[2]["dist"], edge[2]["weight"] * mult
        ])
    
    df = pd.DataFrame(data, columns = ["GEOHASH_1", "GEOHASH_2", "Distance", "Correlation"])
    df.to_csv(file + "_correlations.csv", index = False, encoding = "utf-8")

In [None]:
node_conservation

In [None]:
np.mean(node_conservation)

In [None]:
max_val = 0.21540647846367683
min_val = 0

In [None]:
between_nodes = {}
scaler = MinMaxScaler(feature_range = (min_val, max_val))

for file in tqdm(files):
    G = nx.read_gexf(file + ".gexf")
    btw_nodes = nx.betweenness_centrality(G)
    btw_centr = np.array(list(btw_nodes.values()))
    keys = list(btw_nodes.keys())
    btw_centr_scaled = scaler.fit_transform(btw_centr.reshape(-1, 1))
    btw_centr_scaled_list = [btw_centr_scaled[i][0] for i in range(len(btw_centr_scaled))]
    for j in range(len(keys)):
        if keys[j] not in between_nodes:
            between_nodes[keys[j]] = [btw_centr_scaled[j], 1]
        else:
            prev_val = between_nodes[keys[j]][0]
            prev_count = between_nodes[keys[j]][1]
            new_count = prev_count + 1
            new_val = (btw_centr_scaled[j] + prev_val * prev_count) / (new_count)
            between_nodes[keys[j]] = [new_val, new_count] 

In [None]:
between_nodes

In [None]:
btw_centr = {loc: [between_nodes[loc][0][0], between_nodes[loc][1]] for loc in list(between_nodes.keys())}

btw_centr

In [None]:
keys = list(btw_centr.keys())
vals = list(btw_centr.values())
vals_real = [vals[i][0] for i in range(len(vals)) if vals[i][1] >= 24]
top_5 = sorted(vals_real, reverse = True)[:5]

top_5_keys = []

for key in keys:
    for val in top_5:
        if val == btw_centr[key][0]:
            top_5_keys.append(key)
        
top_5_dict = {key: btw_centr[key] for key in top_5_keys}

btw_centralities = [btw_centr[key][0] for key in top_5_keys]

counts = [btw_centr[key][1] for key in top_5_keys]

file = "traffic_density_202005.csv"

df = pd.read_csv(file)

org_df = df[df["GEOHASH"].isin(top_5_keys)]

new_df = org_df.groupby("GEOHASH").agg({"LATITUDE": np.mean, "LONGITUDE": np.mean}).reset_index()

new_df.columns = ["GEOHASH", "LONGITUDE", "LATITUDE"]

new_df["CENTRALITY"] = btw_centralities

new_df = new_df.sort_values("CENTRALITY", ascending = False).reset_index(drop = True)

my_map = folium.Map()

# Add markers for each coordinate
for _, row in new_df.iterrows():
    folium.Marker([row["LATITUDE"], row["LONGITUDE"]]).add_to(my_map)
    
my_map

In [None]:
new_df

In [None]:
my_map = folium.Map()

# Add markers for each coordinate
for _, row in new_df.iterrows():
    folium.Marker([row["LATITUDE"], row["LONGITUDE"]]).add_to(my_map)

In [None]:
max_hash = []
maxes = []

for file in tqdm(files):
    G = nx.read_gexf(file + ".gexf")
    btw_nodes = nx.betweenness_centrality(G)
    btw_centr = np.array(list(btw_nodes.values()))
    max_val = max(btw_centr)
    maxes.append(max_val)
    keys = list(btw_nodes.keys())
    for key in keys:
        if btw_nodes[key] == max_val:
            max_hash.append(key)
            
max_hash

In [None]:
file = "traffic_density_202005.csv"

df = pd.read_csv(file)

org_df = df[df["GEOHASH"].isin(max_hash)]

new_df = org_df.groupby("GEOHASH").agg({"LATITUDE": np.mean, "LONGITUDE": np.mean}).reset_index()

new_df.columns = ["GEOHASH", "LONGITUDE", "LATITUDE"]

counts = []

for idx, row in new_df.iterrows():
    counts.append(max_hash.count(row["GEOHASH"]))
    
new_df["BTW_COUNTS"] = counts

new_df

G = nx.Graph()

for _, row in new_df.iterrows():
    G.add_node(row["GEOHASH"], lat = row["LATITUDE"], long = row["LONGITUDE"], count = row["BTW_COUNTS"])
    
    
for _, row in df.iterrows():
    if max_hashes.index(row["GEOHASH"]) == -1:
        pass
    
print(G)

In [None]:
new_dict = {loc: between_nodes[loc][0][0] for loc in list(between_nodes.keys())}
values = list(new_dict.values())

In [None]:
new_dict

In [None]:
vals = list(between_nodes.values())
values = [vals[i][0][0] for i in range(len(vals))]

In [None]:
idx = values.index(max(values))
list(between_nodes.keys())[idx]

In [None]:
new_df

In [None]:
# Create a map object
my_map = folium.Map()

# Add markers for each coordinate
for _, row in new_df.iterrows():
    folium.Marker([row["LATITUDE"], row["LONGITUDE"]]).add_to(my_map)

# Save the map to an HTML file
my_map