In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from geopy.distance import vincenty
from sklearn import preprocessing
from scipy import interpolate
from scipy.interpolate import interp1d
from sklearn.linear_model import LinearRegression
import networkx as nx

# First, import the data: show upto 100 columns
pd.options.display.max_columns = 100


## Adding a month of data - Jan 2016 - Feb 2016 - download the data for more
# list_of_df_names = ['fahrzeiten_soll_ist_20160103_20160109', 'fahrzeiten_soll_ist_20160110_20160116',
#                     'fahrzeiten_soll_ist_20160117_20160123', 'fahrzeiten_soll_ist_20160124_20160130',
#                     'fahrzeiten_soll_ist_20160131_20160206', 'fahrzeiten_soll_ist_20160207_20160213',
#                     'fahrzeiten_soll_ist_20160214_20160220', 'fahrzeiten_soll_ist_20160221_20160227']

# Just the first two weeks
list_of_df_names = ['./travel_times_2016/fahrzeiten_soll_ist_20160103_20160109', './travel_times_2016/fahrzeiten_soll_ist_20160110_20160116']

list_of_df = []
for i in list_of_df_names:
    list_of_df.append(pd.read_csv(i+'.csv'))
df_1 = pd.concat(list_of_df, ignore_index=True)
list_of_df.clear()


####################
df_haltepunkt = pd.read_csv("./travel_times_2016/haltepunkt.csv")
# Fix the gps
# print(type(df_haltepunkt['GPS_Latitude'].apply(lambda x: float(x.replace(',', '.')))))
# df_haltepunkt['GPS_Latitude'] = df_haltepunkt['GPS_Latitude'].apply(lambda x: float(x.replace(',', '.')))
# df_haltepunkt['GPS_Longitude'] = df_haltepunkt['GPS_Longitude'].apply(lambda x: float(x.replace(',', '.')))


# Bus Station
df_haltestelle = pd.read_csv("./travel_times_2016/haltestelle.csv")
####################

def find_dist(lat1, lon1, lat_2, lon2):
    c1 = (lat1, lon1)
    c2 = (lat_2, lon2)
    return geopy.distance.vincenty(c1, c2).km
####################
# Some useful atts #
####################

# Target attributes - from: in seconds
target_arrival_from = "soll_an_von"

# Actual attributes - from: in seconds
actual_arrival_from = "ist_an_von"

# nominal departure from in seconds
target_departure_from = "soll_ab_von"

# actual  departure from in seconds
actual_departure_from = "ist_ab_von"
#######################################

#######################################
# Target attributes - from: in seconds
target_arrival_to = "soll_an_nach"

# Actual attributes - from: in seconds
actual_arrival_to = "ist_an_nach1"

# nominal departure from in seconds
target_departure_to = "soll_ab_nach"

# actual  departure from in seconds
actual_departure_to = "ist_ab_nach"
#####################################

#####################################


In [None]:
# does not contain nans usually, but remove any just in case
df_haltepunkt.dropna(axis=0, how='any', inplace=True, subset=['GPS_Longitude'])

In [None]:
# Fix the gps
df_haltepunkt['GPS_Latitude'] = df_haltepunkt['GPS_Latitude'].apply(lambda x: float(x.replace(',', '.')))
df_haltepunkt['GPS_Longitude'] = df_haltepunkt['GPS_Longitude'].apply(lambda x: float(x.replace(',', '.')))

In [None]:
# number of records in df_1
len(df_1)

In [None]:
# Merge the halt punkt from

df_1['halt_punkt_id'] = df_1['halt_punkt_id_von']
df_1 = df_1.merge(df_haltepunkt, on=['halt_punkt_id'])

In [None]:
# Merge punkt to

df_1['halt_punkt_id'] = df_1['halt_punkt_id_nach']
df_1 = df_1.merge(df_haltepunkt, on='halt_punkt_id')

In [None]:
df_1['halt_id'] = df_1['halt_id_x']
df_1 = df_1.merge(df_haltestelle, on='halt_id')

In [None]:
df_1['halt_id'] = df_1['halt_id_y']
df_1 = df_1.merge(df_haltestelle, on='halt_id')

In [None]:
df_1.info()

In [None]:
# df_1.sort_values(by = ['betriebsdatum', 'soll_ab_von', 'fahrt_id', 'seq_von'], inplace=True)
df_1 = df_1[df_1['betriebsdatum'] == df_1['datum_nach']]

In [None]:
# here, the windowing is done; 18000 for starting point translates to 18000/60/60 which is 5 AM, and 86400 is 11:59 PM
df_1 = df_1[(df_1['soll_an_nach'] > 18000) & (df_1['soll_an_nach'] < 86400)]

In [None]:
# Some Signals
target_stationary = 'target_stationary'
target_travel_time = 'target_travel_time'
delay = 'delay'

actual_stationary = 'actual_stationary'
actual_travel_time = 'actual_travel_time'

df_1[target_stationary] = df_1[target_departure_from] - df_1[target_arrival_from]
df_1[target_travel_time] = df_1[target_arrival_to] - df_1[target_departure_from]
df_1[delay] = df_1[actual_arrival_from] - df_1[target_arrival_from]


# Signal - 1 - actual arrival - actual departure
df_1[actual_stationary] = df_1[actual_departure_from] - df_1[actual_arrival_from]
df_1[actual_travel_time] = df_1[actual_arrival_to] - df_1[actual_departure_from]

# Create A Graph

In [None]:
# Creating a pair of gps locations and calculating the signal at that time instance:

# aggregating the gps locations of the same punkts
df_new_x = df_1.groupby(['halt_id_x'], as_index=False)['GPS_Latitude_x', 'GPS_Longitude_x'].mean()
df_new_y = df_1.groupby(['halt_id_y'], as_index=False)['GPS_Latitude_y', 'GPS_Longitude_y'].mean()
# Merging the two frames signifying a road segment
final = df_1.merge(df_new_x, on=['halt_id_x'])
final = final.merge(df_new_y, on=['halt_id_y'])

In [None]:
final.rename(columns={'GPS_Latitude_x_y':'GPS_LAN_STELLE_FROM', 'GPS_Longitude_x_y':'GPS_LON_STELLE_FROM',
                      'GPS_Latitude_y_y':'GPS_LAN_STELLE_TO', 'GPS_Longitude_y_y':'GPS_LON_STELLE_TO'}, inplace=True)
# final

In [None]:
# final

In [None]:
final['Road_Segment_From'] = list(zip(final.GPS_Latitude_x_x, final.GPS_Longitude_x_x))
final['Road_Segment_To'] = list(zip(final.GPS_Latitude_y_x, final.GPS_Longitude_y_x))
final['Road_Segment_Node'] = list(zip(final.Road_Segment_From, final.Road_Segment_To))
final['Route_Node_id_tuple'] = list(zip(final.halt_punkt_id_von, final.halt_punkt_id_nach))


### the above uses the punkts for the graph; uncomment below for stelles
# final['Road_Segment_From'] = list(zip(final.GPS_LAN_STELLE_FROM, final.GPS_LON_STELLE_FROM))
# final['Road_Segment_To'] = list(zip(final.GPS_LAN_STELLE_TO, final.GPS_LON_STELLE_TO))
# final['Road_Segment_Node'] = list(zip(final.Road_Segment_From, final.Road_Segment_To))
# final['Route_Node_id_tuple'] = list(zip(final.halt_id_x, final.halt_id_y))
# # final

In [None]:
# drop these columns to be easier on the memory
# columns_to_drop = ['halt_diva_von', 'halt_punkt_diva_von', 'halt_diva_nach', 'halt_punkt_diva_nach', 'fw_no', 'fw_typ', 'fw_kurz']
# final.drop(columns=columns_to_drop, inplace=True)

# Construct the graph:

    Dealing with granularity issues:

In [None]:
# new signal for the day of the week
d = pd.to_datetime(final['betriebsdatum'], format='%d.%m.%y', dayfirst=True)
# Monday == 0 … Sunday == 6
final['day_of_week'] = [i.weekday() for i in d]

In [None]:
final.fw_typ.value_counts()

In [None]:
# the fw_typ 2 is dropped since it does not correspond to a tram or a bus
final = final[final.fw_typ != 2]

In [None]:
# show the dist of fw_no
final.fw_no.value_counts()[:5]

In [None]:
list(set(zip(final.GPS_Latitude_x_x, final.GPS_Longitude_x_x)))[:5]

In [None]:
# # encode routes
final['Route_Node_id_str'] = final.Route_Node_id_tuple.apply(lambda x: '{}_{}'.format(x[0], x[1]))
le = preprocessing.LabelEncoder()
le.fit(final.Route_Node_id_str)
final['Route_Node_id'] = le.transform(final.Route_Node_id_str)

In [None]:
final = final[((final.day_of_week != 5) & (final.day_of_week != 6))]
final = final[(final['soll_an_von'] >= 21600) & (final['soll_an_von'] <= 72000)]

In [None]:
final.fw_no.value_counts()[:5]

In [None]:
len(set(final.Road_Segment_Node))

In [None]:
# two kinds of signal
final['actual_travel_time_n_target'] = final.actual_travel_time.div(final.target_travel_time, axis=0)
final['actual_travel_time_n_target_mean'] = final.actual_travel_time.div(final.target_travel_time.mean(), axis=0)

In [None]:
# here, define the time period; 600 represents 10 mins (600/60)
time_period = 600
# Previously was at 5 am
starting_time = 21600 # starting time is set at 21600 to maximize graph nodes
# indicates 10 mins intervals
# Number of bins:
n_bins = int(np.floor((72000 - starting_time)/time_period))
labels = [i for i in range(n_bins)]
# Finding the correct bins:
list_of_bins = [starting_time + time_period * i for i in range(n_bins + 1)]
final['intervals'] = pd.cut(final[target_arrival_from], bins=list_of_bins, retbins=False, labels=labels, right=True, include_lowest=True)

In [None]:
len(labels)

In [None]:
final.Route_Node_id.value_counts()[:5]

In [None]:
# final['actual_travel_time_n_target'] = final.actual_travel_time / final.target_travel_time
# final.actual_travel_time_n_target = final.actual_travel_time_n_target - final.actual_travel_time_n_target.mean()
# final.actual_travel_time_n_target_mean = final.actual_travel_time_n_target_mean - final.actual_travel_time_n_target_mean.mean()

In [None]:

a = final[final.Route_Node_id == 1158    ]
a = a[a.betriebsdatum == '05.01.16']
# a = a.sort_values(by=target_departure_from)
a = a.groupby(by='intervals', as_index=False).mean()
# a.intervals.value_counts()
# a = a.groupby(by='intervals', as_index=False).mean()
# a.fillna(value=0)
# plt.plot(a[target_departure_from], a.target_travel_time)
plt.plot(a.intervals.values.tolist(), a.actual_travel_time_n_target.values.tolist(), a.intervals.values.tolist(), a.actual_travel_time_n_target_mean.values.tolist())

In [None]:
a = final[final.Route_Node_id == 1158]
a = a[a.betriebsdatum == '05.01.16']
for i in set(a.fahrweg_id):
    b = a[a.fahrweg_id == i]
    b.sort_values(by=['intervals', target_departure_from], inplace=True)
    if b.shape[0] < 50:
        continue
    plt.plot(b[target_departure_from], b.actual_travel_time_n_target)
a = a.groupby(by='intervals', as_index=False).mean()
plt.title('Signal')
plt.xlabel('Times of the day')
plt.ylabel('')
a.sort_values(by=['intervals', target_departure_from], inplace=True)
plt.scatter([time_period * i + 21600 for i in a.intervals.values.tolist()], np.array(a.actual_travel_time_n_target.values.tolist()), marker='^', s=1000)

In [None]:
final.Route_Node_id.value_counts()[580:600]

In [None]:
# Find the Nodes with gran. problems:
# when specifying some of the parameters above, some nodes may have to be dropped to ensure that each node has signal values
# for that specific time stamp

list_road_seg = final.Route_Node_id.value_counts()
list_road_seg_id = []
list_empty_bins_index = []
list_to_drop = []

for index, item in list_road_seg.items():
    temp = final[final.Route_Node_id == index]
    if (0 in (temp.intervals.value_counts().value_counts())):
        list_road_seg_id.append(index)
        list_empty_bins_index.append(temp.intervals.value_counts().value_counts()[0])
        list_to_drop.append(index)
    else:
        list_road_seg_id.append(index)
        list_empty_bins_index.append(0)
list_empty_bins_index = np.array(list_empty_bins_index)

In [None]:
len(set(final.Route_Node_id))

In [None]:
# show the number of missing values
len(list_empty_bins_index)
pd.Series(list_empty_bins_index).hist()
plt.title('Distribution of empty and non empty bins - 10 mins - Start at 7 - 10')
plt.xlabel('Road ID')
plt.ylabel('Number of empty bins')

In [None]:
# %matplotlib
# Plot the halts vs. empty bins
plt.scatter(list_road_seg_id, list_empty_bins_index)
plt.title('Scatter of empty and non empty bins - 10 mins  - Start at 7 - 10')
plt.xlabel('Road ID')
plt.ylabel('Number of empty bins')


In [None]:
# how many nodes are to be kept if one allows for up to 5 interpolated points, vs. no interpolation
print(np.count_nonzero(list_empty_bins_index < 6))
print(np.count_nonzero(list_empty_bins_index == 0))

# A Clear Issue with granularity
solving it by limiting the start date and visualizing the distribution


In [None]:
# rev_dist_bins = pd.Series(index = final.intervals.value_counts().values, data=final.intervals.value_counts().index.values)
plt.scatter(final.intervals.value_counts().index.values, final.intervals.value_counts().values)
plt.title('scatter - bin number vs data count - start at 7, end at 12 - 10 mins')
plt.xlabel('Bin number')
plt.ylabel('Count of Data points')

In [None]:
final.intervals.value_counts()[:5]

In [None]:
np.array(final.intervals.value_counts().index.values)

In [None]:
# Drop those datapoints:
print(np.count_nonzero(list_empty_bins_index != 0))

In [None]:
set(list_empty_bins_index)

In [None]:
final = final[~final.Route_Node_id.isin(list_to_drop)]

In [None]:
len(final.Road_Segment_Node)

In [None]:
final.Route_Node_id.value_counts()

# Define new signal: Normalized travel time - close to TTI - Version 1, normalize by target travel time

In [None]:
final['actual_travel_time_n_target'] = final.actual_travel_time.div(final.target_travel_time, axis=0)
final.head()

# Define new signal: Normalized travel time - Version 2, normalize by mean of target in that interval

In [None]:
final['actual_travel_time_n_target_mean'] = final.actual_travel_time.div(final.target_travel_time.mean(), axis=0)
final.head()

# Create Signal First!

In [None]:
# Creating Signals for all of the nodes:
List_of_Nodes = [str(i) for i in list(set(final.Route_Node_id))]
# List_of_Nodes

In [None]:
len(List_of_Nodes)

In [None]:
# which signal to use? actual_travel_time_n_target or actual_travel_time_n_target_mean

List_of_head_data = {}
for i in List_of_Nodes:
    temp_df = final[final.Route_Node_id == int(i)]
    time_series_for_node = []
    for j in sorted(list(set(temp_df.betriebsdatum)), key=lambda x: (x[3], x[4], x[0], x[1])):
        time_series = temp_df[temp_df.betriebsdatum == j]
        time_series = time_series.groupby(by='intervals', as_index=False).mean()
        time_series = time_series['actual_travel_time_n_target']
        time_series_for_node.append(time_series)
    final_series = pd.concat(time_series_for_node, ignore_index=True)
    List_of_head_data[i] = final_series

In [None]:
# a = List_of_head_data['184']
new_dict = {}
for key, value in List_of_head_data.items():
    new_dict[key] = value.isna().sum()

In [None]:
# new_dict

In [None]:
# Finding dicts with distribution
nan_dist = pd.Series(new_dict)
nan_dist.hist()

In [None]:
# print(len(final_data_df.columns))
final_data_df = pd.DataFrame.from_dict(List_of_head_data)
# print(sorted(final_data_df.isna().sum()))
final_data_df.dropna(thresh=len(final_data_df) - 15, axis=1, inplace=True)
# print(len(final_data_df.columns))

In [None]:
# print(sorted(final_data_df.isna().sum()))

In [None]:
final_data_df.isna().sum().sum()

In [None]:
final_data_df.head()

In [None]:
final_data_df = final_data_df.interpolate()

In [None]:
# 726     656
# 769     581
sig_1 = final_data_df['24']
sig_2 = final_data_df['23']

In [None]:
plt.plot(final_data_df.index, sig_1, final_data_df.index, sig_2)

In [None]:
np.corrcoef(sig_1,sig_2)

# Signal is Defined, Create the graphs in two ways! Using the signal DF!

## Create The graph - V1

In [None]:
final['Mid_point_GPS_data'] = final.Road_Segment_Node.apply(lambda x: (((x[0][0] + x[1][0])/2) , ((x[0][1] + x[1][1])/2)))

In [None]:
len(final_data_df.columns.values.tolist())

In [None]:
list_of_nodes_Road = list(set(final_data_df.columns.values.tolist()))
# list_of_nodes = list(set(zip(final.Route_Node_id, final.Mid_point_GPS_data)))
(list_of_nodes_Road)[:5]

In [None]:
from geopy.distance import vincenty
from geopy.distance import geodesic
from networkx.drawing.nx_pydot import write_dot

In [None]:
list_of_nodes_Road_toKeep = [int(i) for i in list_of_nodes_Road]
final = final[final.Route_Node_id.isin(list_of_nodes_Road_toKeep)]
# final

In [None]:
# Create List of dicts:
new_final = final.groupby(by='Route_Node_id', as_index=False).min()

In [None]:
Dict_List = pd.Series(new_final.Mid_point_GPS_data.values,index=new_final.Route_Node_id).to_dict()
len(Dict_List)

In [None]:
new_final.head()

In [None]:
# Find edges and weights:
list_of_edges = []

# iterate over each mid point aka road segment

for i in list_of_nodes_Road:
    # Look at every other road segment, if physical distance was less than 2.5 kilometers, add an edge
    
    for key, value in Dict_List.items():
        if Dict_List[int(i)] != value:
            if vincenty(Dict_List[int(i)], value).meters < 1500:
                list_of_edges.append((int(i), key))
    
#     a = new_final[new_final.Route_Node_id == int(i)]
#     a.Route_Node_id_tuple.iloc[5]
#     for j in list_of_nodes:
#         if i[0] != j[0]:
#             if vincenty(i[1], j[1]).meters < 1000:
#                 list_of_edges.append((i[0], j[0]))

In [None]:
zlst = list(zip(*list_of_edges))
edge_series = pd.Series(zlst[1], index = zlst[0])
edge_series.value_counts()[:5]

In [None]:
# Define Graph
G = nx.Graph()

In [None]:
G.add_nodes_from(list_of_nodes_Road_toKeep)

In [None]:
len(set(list_of_edges))

In [None]:
G.add_edges_from(list_of_edges)

In [None]:
# Visualize Graph
nx.draw_networkx(G)

In [None]:
len(list(G.nodes))

In [None]:
a = [int(i) for i in final_data_df.columns.values.tolist()]

adj_mat = nx.to_numpy_matrix(G, nodelist=a)

# MAP animation

In [None]:
# The following data is needed for the next notebook for time evolution map
dict_gps = pd.Series(final.Mid_point_GPS_data.values,index=final.Route_Node_id).to_dict()
# print(len(dict_gps))
for i, v in dict_gps.items():
    if i not in list_of_nodes_Road_toKeep:
        del dict_gps[i]


In [None]:
d = pd.DataFrame(dict_gps)
d.to_csv('gps_10min.csv')

In [None]:
final_data_df.to_csv('final_10min.csv')

# Finding correlation coefficient

In [None]:
sig1 = final[final.Route_Node_id == 740]
sig1 = sig1[sig1.betriebsdatum == '07.01.16']
sig1.sort_values(by='soll_an_von', inplace=True)
# sig1 = sig1
sig1 = sig1.groupby(by='intervals', as_index=False).mean()
# sig1

In [None]:
sig2 = final[final.Route_Node_id == 454]
sig2 = sig2[sig2.betriebsdatum == '07.01.16']
sig2.sort_values(by='soll_an_von', inplace=True)
sig2 = sig2.groupby(by='intervals', as_index=False).mean()
# sig2

In [None]:
sig1_X = np.array(sig1.intervals)
sig1_y = np.array(sig1.actual_travel_time_n_target)
sig1_y_zeromean = sig1_y - sig1_y.mean()

sig2_X = np.array(sig2.intervals)
sig2_y = np.array(sig2.actual_travel_time_n_target)
sig2_y_zeromean = sig2_y - sig2_y.mean()

plt.plot(sig1_X, sig1_y, sig2_X, sig2_y)

In [None]:
plt.plot(sig1_X, sig1_y_zeromean, sig2_X, sig2_y_zeromean)

In [None]:
from scipy.stats.stats import pearsonr
np.corrcoef(sig1_y_zeromean, sig2_y_zeromean)

In [None]:
pearsonr_coef = pearsonr(sig1_y, sig2_y)
pearsonr_coef

In [None]:
# For the entire network - all signals connected to node 726:
# len(edge_series[726])

In [None]:
edge_series.value_counts()[:5]

In [None]:
# 1283    218
# 1498    218
# 1497    212
# 611     212
# 622     212

# %matplotlib
sig1 = np.array(final_data_df[str(213)])

list_of_node_crosscoef = []

for i in edge_series[213]:
    sig2 = np.array(final_data_df[str(i)])
    list_of_node_crosscoef.append((i, np.corrcoef(sig1, sig2)[0][1]))

zlst = list(zip(*list_of_node_crosscoef))
list_of_node_crosscoef = pd.Series(zlst[1], index = zlst[0])
list_of_node_crosscoef = list_of_node_crosscoef.dropna().sort_values()

plt.scatter(list_of_node_crosscoef.index.values.tolist(), list_of_node_crosscoef.values.tolist())

In [None]:
# Autocorrelation
def autocorr(x, t=1):
    return np.corrcoef(np.array([x[:-t], x[t:]]))
autocorr(sig2_y_zeromean)

In [None]:
# %matplotlib
# import seaborn as sns
# corr = final_data_df.corr()
# sns.heatmap(corr, 
#             xticklabels=corr.columns.values,
#             yticklabels=corr.columns.values)

In [None]:
ix = final_data_df.corr().sort_values('213', ascending=False).index
df_sorted = final_data_df.loc[:, ix]

In [None]:
# show how many sub components the graph has. This is an important step; make sure that the graph is connected, and there
# every node is at least of degree 1.
list(nx.connected_component_subgraphs(G))

In [None]:
# final_data_df

In [None]:
a = [int(i) for i in final_data_df.columns.values.tolist()]

adj_mat = nx.to_numpy_matrix(G, nodelist=a)

np.savetxt("output_weights.csv", adj_mat, delimiter=",")

In [None]:
final_data_df.to_csv('output_values.csv', index=False)

In [None]:
# [str(i) for i in list(list(nx.connected_component_subgraphs(G))[0].nodes())]

In [None]:
adj_mat = nx.to_numpy_matrix(G, nodelist=a)
adj_mat

# Create the graph another way! V2

In [None]:
Nodes = list(set(new_final.Route_Node_id))

In [None]:
Dict_List_halt_ids = pd.Series(new_final.Route_Node_id_tuple.values,index=new_final.Route_Node_id).to_dict()
len(Dict_List)

In [None]:
# Create the new graph:
# Nodes = list(set(new_final.halt_id_x))
# print(len(Nodes))

# GPS is in Dict_List
Edges = []
for key, value in Dict_List_halt_ids.items():
    for key_2, value_2 in Dict_List_halt_ids.items():
        if key != key_2:
#             if value[0] == value_2[0] or value[0] == value_2[1] or value[1] == value_2[0] or value[1] == value_2[1]: 
            if value[0] == value_2[0] or value[1] == value_2[1]:    
                Edges.append((int(key), int(key_2), 1/(vincenty(Dict_List[int(key)], Dict_List[int(key_2)]).kilometers)))
    


In [None]:
len(Edges)

In [None]:
New_G = nx.Graph()
New_G.add_nodes_from(list_of_nodes_Road_toKeep)
New_G.add_weighted_edges_from(Edges)

In [None]:
list(New_G.nodes)

In [None]:
nx.draw_networkx(New_G)

In [None]:
# # Get Line graph:
# New_G_line = nx.line_graph(New_G)

In [None]:
# nx.draw(New_G_line)

In [None]:
# print(len(list(New_G_line.nodes())))
# print(len(set(new_final.Route_Node_id)))

# print(len(list(New_G_line.edges())))
# # print(len(set(new_final.Route_Node_id)))

In [None]:
set1 = set(list(New_G.node))
set2 = set(new_final.Route_Node_id)
set2 - set1

# Remove nodes with no connections


In [None]:
len(New_G.nodes)

In [None]:
Connected_New_G = max(nx.connected_component_subgraphs(New_G), key=len)
nx.draw_networkx(Connected_New_G)

In [None]:
from datetime import timedelta
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import pandas as pd

import folium
import folium.plugins as plugins
import numpy as np

df = pd.read_csv('final_10min.csv', nrows=252)

df.interpolate(inplace=True, axis=1)

print(df.isna().sum().sum())

df_2 = pd.read_csv('gps_10min.csv')

df = (df - df.mean()) / (df.std() ** 1.2)

df_2.drop('Unnamed: 0', axis=1, inplace=True)
df.drop('Unnamed: 0', axis=1, inplace=True)

# Outer list:
list_df_gps = df_2.columns.values.tolist()
OuterList = []
for index, row in df.iterrows():
    Mid_List = []
    for j in list_df_gps:
        Time_List = df_2[j].values.tolist()
#         print(type(j))
#         print(row[0])
        Time_List.append(row[j])
        Mid_List.append(Time_List)
    OuterList.append(Mid_List)

m = folium.Map([47.36179377478453, 8.572671632662107], tiles='stamentoner', zoom_start=12)

list_of_id = df_2.values
list_of_id = list(zip(list_of_id[0], list_of_id[1]))

for j in list_of_id:
    folium.Circle(j, popup='<strong>Location One</strong>', radius=20).add_to(m)

index = [str(timedelta(seconds=i * time_period + starting_time)) for i in set(final.intervals)] * int(df.shape[0]/len(set(final.intervals)))

hm = plugins.HeatMapWithTime(OuterList, index=index)

hm.add_to(m)

m.save('index.html')

In [None]:
# Visualize the graph with the heatmap

## Note about this visualization!

There's a bug as of Oct. 2020 with folium's heatmapwithtime class. It's very well explained here:

https://github.com/python-visualization/folium/issues/1221

However, if the above was not available, simply replace:

"https://rawcdn.githack.com/socib/Leaflet.TimeDimension/master/dist/leaflet.timedimension.min.js"

in the index.html file (it is referenced as a script tag, you may opne index.html via sublime or any other text editor, and find it using ctrl+f) by:

"https://cdn.jsdelivr.net/npm/leaflet-timedimension@1.1.0/dist/leaflet.timedimension.min.js"

Be sure to save changes, and reload the page.

# Save Files

In [None]:
a = [int(i) for i in final_data_df.columns.values.tolist()]

adj_mat = nx.to_numpy_matrix(G, nodelist=a)

In [None]:
final_data_df.to_csv('output_v2_values.csv', header=False, index=False)

In [None]:
np.savetxt("output_v2_weights.csv", adj_mat, delimiter=",")

In [None]:
# New_G.edges(data=True)