# Creation of Different Network Models

In [2]:
import numpy as np
import pandas as pd
import datetime as dt
import networkx as nx
from itertools import combinations
import math

cleaned_data_path = "cleaned_data"
networks_path = "networks"

### Reading the data

In [3]:
stopTimes = pd.read_csv(f"{cleaned_data_path}/stop_times.csv", parse_dates=['arrival_time'])
routes = pd.read_csv(f"{cleaned_data_path}/routes.csv")
stops = pd.read_csv(f"{cleaned_data_path}/stops.csv")
trips = pd.read_csv(f"{cleaned_data_path}/trips.csv")
stopsRoutes = pd.read_csv(f"{cleaned_data_path}/stops_routes.csv")
stopTypes = pd.read_csv(f"{cleaned_data_path}/stop_types_df.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'cleaned_data/stop_types.csv'

In [3]:
routes.head()

Unnamed: 0,route_id,route_short_name,route_long_name,route_type
0,2,2,Unutrašnji Krug - Spoljašnji Krug,0
1,5,5,Kalemegdan /Donji Grad/ - Ustanička,0
2,6,6,Tašmajdan - Ustanička,0
3,7,7,Ustanička - Blok 45,0
4,9,9,Banjica - Blok 45,0


## BGNetL
Stops are represented as nodes, an two nodes (stops) are connected if they are consecutive stops withn one line.<br>
Since we're want to perserve the information about the consecutive stops, the graph should be **directed**.

In [4]:
# adding nodes and their attributes
BGNetL = nx.DiGraph()

stops_list = np.array(stops.stop_id.values, dtype=str)
BGNetL.add_nodes_from(stops_list)
node_attributes = stops.set_index('stop_id').to_dict('index')
node_attributes_str = {str(key): value for key, value in node_attributes.items()}
nx.set_node_attributes(BGNetL, node_attributes_str)

In [5]:
# saving the network
nx.write_gml(BGNetL, f"{networks_path}/BGNetL.gml")

In [6]:
BGNetL.nodes(data=True)

NodeDataView({'1': {'stop_name': 'Kalemegdan (Donji Grad)', 'stop_lat': 44.8268096, 'stop_lon': 20.4532741, 'zone_id': 1}, '2': {'stop_name': 'Tadeuša Košćuška', 'stop_lat': 44.8259171, 'stop_lon': 20.455243, 'zone_id': 1}, '3': {'stop_name': 'Tadeuša Košćuška', 'stop_lat': 44.8262534, 'stop_lon': 20.4549667, 'zone_id': 1}, '4': {'stop_name': 'Kalemegdan', 'stop_lat': 44.8203288, 'stop_lon': 20.4532518, 'zone_id': 1}, '5': {'stop_name': 'Kalemegdan', 'stop_lat': 44.8208843, 'stop_lon': 20.4540765, 'zone_id': 1}, '6': {'stop_name': 'Pristanište', 'stop_lat': 44.8186623, 'stop_lon': 20.4494119, 'zone_id': 1}, '7': {'stop_name': 'Pristanište', 'stop_lat': 44.8189915, 'stop_lon': 20.4495147, 'zone_id': 1}, '8': {'stop_name': 'Brankov Most', 'stop_lat': 44.814955, 'stop_lon': 20.4507714, 'zone_id': 1}, '9': {'stop_name': 'Brankov Most', 'stop_lat': 44.8145626, 'stop_lon': 20.4511267, 'zone_id': 1}, '10': {'stop_name': 'Ekonomski Fakultet', 'stop_lat': 44.8108839, 'stop_lon': 20.4541239, 'zo

In [7]:
BGNetL.nodes()

NodeView(('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '180', '181', '182', '183

In [8]:
# utility dataframes
# for simplicity, only work week data will me used, and night lines will be discarded
routes_daily = routes[~(routes.route_short_name.str.contains('N'))]
trips_work_week = trips[trips.service_id.str.contains('Radni Dan')]
routs_trips = trips_work_week.merge(routes_daily)[['route_id', 'trip_id', 'direction_id']]
routs_trips.drop_duplicates(subset=['route_id', 'direction_id'], inplace=True)
routs_trips_stops = routs_trips.merge(stopTimes).sort_values(by=['route_id', 'direction_id', 'stop_sequence'])
routs_trips_stops['stop_id'] = routs_trips_stops['stop_id'].astype(str)
routs_trips_stops = routs_trips_stops.assign(concatenated=lambda row: list(zip(row['stop_id'], (row['stop_id'].shift(-1)))))
routs_trips_stops['stops'] = routs_trips_stops['concatenated'].where(routs_trips_stops['route_id'] == routs_trips_stops['route_id'].shift(-1))
routs_trips_stops.drop(['concatenated'], axis=1, inplace=True)
grouped_result = (
    routs_trips_stops
    .groupby(['route_id'])['stop_id']
    .agg(last_stop='last', first_stop='first')
    .reset_index(drop=True)
)
grouped_result['new_column'] = grouped_result.apply(lambda row: (row['last_stop'], row['first_stop']), axis=1)
routs_trips_stops.loc[routs_trips_stops.stops.isnull(), 'stops'] = grouped_result['new_column'].values

routs_trips_stops.head()

Unnamed: 0,route_id,trip_id,direction_id,arrival_time,stop_id,stop_sequence,stops
0,2,1423806,0,2023-08-23 04:10:00,7,1,"(7, 5)"
1,2,1423806,0,2023-08-23 04:14:00,5,2,"(5, 71)"
2,2,1423806,0,2023-08-23 04:18:00,71,3,"(71, 73)"
3,2,1423806,0,2023-08-23 04:20:00,73,4,"(73, 52)"
4,2,1423806,0,2023-08-23 04:22:00,52,5,"(52, 54)"


In [9]:
# adding edges: weight of an edge will be incrementally increased with the number of different lines connecting two stops
cons_stops = routs_trips_stops.stops.value_counts()
for i in range(len(cons_stops)):
    index = cons_stops.index[i]
    value = cons_stops.values[i]
    BGNetL.add_edge(index[0], index[1], weight=str(value))
    

In [10]:
# check for nodes that have 0 edges (since we only looked at daily routes and work weeks)
nodes_with_no_edges = list(nx.isolates(BGNetL))
print("nodes_with_no_edges: ",nodes_with_no_edges)
# delete all nodes with no edges
BGNetL.remove_nodes_from(list(nx.isolates(BGNetL)))

nodes_with_no_edges:  ['1128', '2331', '2333', '2442', '2444', '2756', '3735', '3737', '3757']


In [11]:
# BGNetL.nodes(data=True)

In [12]:
# Remove loops
loops = []
for node in BGNetL.nodes():
    if BGNetL.has_edge(node, node):
        BGNetL.remove_edge(node, node)
        loops.append(node)

print("Removed loops from nodes:", loops)

Removed loops from nodes: ['84', '228', '333', '334', '425', '490', '545', '546', '547', '552', '603', '630', '638', '654', '674', '709', '744', '750', '780', '937', '1005', '1027', '1045', '1065', '1143', '1198', '1215', '1235', '1315', '1330', '1349', '1368', '1411', '1426', '1496', '1544', '1562', '1576', '1595', '1597', '1707', '1739', '1777', '1802', '1835', '1836', '1896', '1900', '1912', '1937', '1978', '2032', '2051', '2076', '2090', '2102', '2110', '2124', '2129', '2188', '2209', '2276', '2285', '2294', '2301', '2327', '2351', '2357', '2480', '2486', '2497', '2579', '2691', '2719', '2720', '2749', '2766', '2782', '2794', '2803', '2804', '2838', '2846', '2856', '2898', '2908', '2924', '2977', '2988', '2997', '3036', '3056', '3096', '3133', '3882']


In [13]:
# saving the network
nx.write_gml(BGNetL, f"{networks_path}/BGNetL.gml")

## BGNetP
Stops are represented as nodes, an two nodes (stops) are connected if a line is connecting them.<br>
Since the order in which stops appear within the line is not important, the graph should be **undirected**.

In [13]:
routs_trips_stops.head()

Unnamed: 0,route_id,trip_id,direction_id,arrival_time,stop_id,stop_sequence,stops
0,2,1423806,0,2023-08-11 04:10:00,7,1,"(7, 5)"
1,2,1423806,0,2023-08-11 04:14:00,5,2,"(5, 71)"
2,2,1423806,0,2023-08-11 04:18:00,71,3,"(71, 73)"
3,2,1423806,0,2023-08-11 04:20:00,73,4,"(73, 52)"
4,2,1423806,0,2023-08-11 04:22:00,52,5,"(52, 54)"


In [14]:
# adding nodes and their attributes
BGNetP = nx.Graph()

stops_list = np.array(stops.stop_id.values, dtype=str)
BGNetP.add_nodes_from(stops_list)
node_attributes = stops.set_index('stop_id').to_dict('index')
node_attributes_str = {str(key): value for key, value in node_attributes.items()}
nx.set_node_attributes(BGNetP, node_attributes_str)

In [15]:
route_ids = routs_trips_stops.route_id.unique()
combinations_ = []
for r in route_ids:
    values = routs_trips_stops[routs_trips_stops.route_id==r].stop_id.unique()
    combinations_without_order = list(combinations(values, 2))
    ombinations_without_order_str = [tuple(map(str, c)) for c in combinations_without_order]
    for c in ombinations_without_order_str:
        combinations_.append(c)


In [16]:
# adding edges: weight of an edge will be incrementally increased with the number of different lines connecting two stops
combinations_series =  pd.Series(combinations_)
edge_pairs = combinations_series.value_counts()
for i in range(len(edge_pairs)):
    index = edge_pairs.index[i]
    value = edge_pairs.values[i]
    BGNetP.add_edge(index[0], index[1], weight=str(value))

In [17]:
# check for nodes that have 0 edges (since we only looked at daily routes and work weeks)
nodes_with_no_edges = list(nx.isolates(BGNetP))
print("nodes_with_no_edges: ",nodes_with_no_edges)
# delete all nodes with no edges
BGNetP.remove_nodes_from(list(nx.isolates(BGNetP)))

nodes_with_no_edges:  ['1128', '2331', '2333', '2442', '2444', '2756', '3735', '3737', '3757']


In [18]:
BGNetP.number_of_nodes()

3121

In [19]:
BGNetP.number_of_edges()

163683

In [20]:
# saving the network
nx.write_gml(BGNetP, f"{networks_path}/BGNetP.gml")

#### provere

In [21]:
for edge in BGNetP.edges(data=True):
    source, target, attributes = edge
    weight = attributes.get('weight')
    print(f"Edge: ({source}, {target}), Weight: {weight}")

Edge: (1, 2), Weight: 3
Edge: (1, 3), Weight: 3
Edge: (1, 59), Weight: 2
Edge: (1, 61), Weight: 2
Edge: (1, 57), Weight: 2
Edge: (1, 55), Weight: 2
Edge: (1, 53), Weight: 2
Edge: (1, 74), Weight: 2
Edge: (1, 72), Weight: 2
Edge: (1, 71), Weight: 2
Edge: (1, 56), Weight: 2
Edge: (1, 54), Weight: 2
Edge: (1, 73), Weight: 2
Edge: (1, 52), Weight: 2
Edge: (1, 58), Weight: 2
Edge: (1, 743), Weight: 2
Edge: (1, 94), Weight: 1
Edge: (1, 96), Weight: 1
Edge: (1, 89), Weight: 1
Edge: (1, 92), Weight: 1
Edge: (1, 102), Weight: 1
Edge: (1, 104), Weight: 1
Edge: (1, 98), Weight: 1
Edge: (1, 100), Weight: 1
Edge: (1, 85), Weight: 1
Edge: (1, 87), Weight: 1
Edge: (1, 63), Weight: 1
Edge: (1, 86), Weight: 1
Edge: (1, 88), Weight: 1
Edge: (1, 108), Weight: 1
Edge: (1, 90), Weight: 1
Edge: (1, 64), Weight: 1
Edge: (1, 62), Weight: 1
Edge: (1, 93), Weight: 1
Edge: (1, 105), Weight: 1
Edge: (1, 103), Weight: 1
Edge: (1, 106), Weight: 1
Edge: (1, 107), Weight: 1
Edge: (1, 101), Weight: 1
Edge: (1, 95), We

Edge: (397, 427), Weight: 1
Edge: (397, 456), Weight: 1
Edge: (397, 458), Weight: 1
Edge: (397, 460), Weight: 1
Edge: (397, 462), Weight: 1
Edge: (397, 463), Weight: 1
Edge: (397, 813), Weight: 1
Edge: (397, 1964), Weight: 1
Edge: (397, 1223), Weight: 1
Edge: (397, 1236), Weight: 1
Edge: (397, 1224), Weight: 1
Edge: (397, 1222), Weight: 1
Edge: (397, 1963), Weight: 1
Edge: (397, 812), Weight: 1
Edge: (397, 814), Weight: 1
Edge: (397, 772), Weight: 1
Edge: (397, 434), Weight: 1
Edge: (397, 844), Weight: 1
Edge: (397, 409), Weight: 1
Edge: (397, 653), Weight: 1
Edge: (397, 406), Weight: 1
Edge: (397, 411), Weight: 1
Edge: (397, 423), Weight: 1
Edge: (397, 410), Weight: 1
Edge: (397, 413), Weight: 1
Edge: (397, 415), Weight: 1
Edge: (397, 417), Weight: 1
Edge: (397, 419), Weight: 1
Edge: (397, 421), Weight: 1
Edge: (397, 425), Weight: 1
Edge: (397, 412), Weight: 1
Edge: (397, 424), Weight: 1
Edge: (397, 422), Weight: 1
Edge: (397, 420), Weight: 1
Edge: (397, 418), Weight: 1
Edge: (397, 41

In [22]:
BGNetP.get_edge_data(u='1', v='4')

{'weight': '1'}

In [23]:
BGNetP.is_directed()

False

## BGNetC
Lines are represented as nodes, an two nodes are connected if they share at least 1 common stop.<br>
This network is also **undirected**, and it's kind of inverse network to the BGNetP.

In [24]:
routs_trips_stops.head()

Unnamed: 0,route_id,trip_id,direction_id,arrival_time,stop_id,stop_sequence,stops
0,2,1423806,0,2023-08-11 04:10:00,7,1,"(7, 5)"
1,2,1423806,0,2023-08-11 04:14:00,5,2,"(5, 71)"
2,2,1423806,0,2023-08-11 04:18:00,71,3,"(71, 73)"
3,2,1423806,0,2023-08-11 04:20:00,73,4,"(73, 52)"
4,2,1423806,0,2023-08-11 04:22:00,52,5,"(52, 54)"


In [25]:
# adding nodes and their attributes
BGNetC = nx.Graph()

stops_list = np.array(stops.stop_id.values, dtype=str)
BGNetP.add_nodes_from(stops_list)
node_attributes = stops.set_index('stop_id').to_dict('index')
node_attributes_str = {str(key): value for key, value in node_attributes.items()}
nx.set_node_attributes(BGNetC, node_attributes_str)

In [26]:
unique_stop_ids_per_route = routs_trips_stops.groupby('route_id')['stop_id'].unique()
combinations_without_order = list(combinations(unique_stop_ids_per_route.index.values, 2))
# adding edges
for c in combinations_without_order:
    route1 = c[0]
    route2 = c[1]
    stops1 = unique_stop_ids_per_route[route1]
    stops2 = unique_stop_ids_per_route[route2]
    intersection = np.intersect1d(stops1, stops2)
    intersection_len = intersection.shape[0]
    if intersection_len > 0:
        BGNetC.add_edge(str(route1), str(route2), weight=str(intersection_len)) 

In [27]:
# check for nodes that have 0 edges (since we only looked at daily routes and work weeks)
nodes_with_no_edges = list(nx.isolates(BGNetC))
print("nodes_with_no_edges: ",nodes_with_no_edges)
# delete all nodes with no edges
BGNetP.remove_nodes_from(list(nx.isolates(BGNetC)))

nodes_with_no_edges:  []


In [28]:
# saving the network
nx.write_gml(BGNetC, f"{networks_path}/BGNetC.gml")

#### provere

In [29]:
BGNetC.number_of_nodes()

178

In [30]:
BGNetC.number_of_edges()

1989

In [31]:
for edge in BGNetC.edges(data=True):
    source, target, attributes = edge
    weight = attributes.get('weight')
    print(f"Edge: ({source}, {target}), Weight: {weight}")

Edge: (2, 5), Weight: 18
Edge: (2, 6), Weight: 1
Edge: (2, 7), Weight: 3
Edge: (2, 9), Weight: 4
Edge: (2, 10), Weight: 15
Edge: (2, 11), Weight: 7
Edge: (2, 12), Weight: 5
Edge: (2, 14), Weight: 2
Edge: (2, 24), Weight: 6
Edge: (2, 25), Weight: 1
Edge: (2, 26), Weight: 5
Edge: (2, 27), Weight: 1
Edge: (2, 65), Weight: 4
Edge: (2, 74), Weight: 1
Edge: (2, 77), Weight: 2
Edge: (2, 79), Weight: 14
Edge: (2, 10025), Weight: 1
Edge: (2, 40101), Weight: 1
Edge: (2, 90001), Weight: 1
Edge: (5, 6), Weight: 22
Edge: (5, 7), Weight: 22
Edge: (5, 10), Weight: 17
Edge: (5, 11), Weight: 3
Edge: (5, 12), Weight: 2
Edge: (5, 14), Weight: 22
Edge: (5, 20), Weight: 3
Edge: (5, 24), Weight: 6
Edge: (5, 25), Weight: 1
Edge: (5, 26), Weight: 5
Edge: (5, 27), Weight: 1
Edge: (5, 46), Weight: 3
Edge: (5, 65), Weight: 4
Edge: (5, 74), Weight: 1
Edge: (5, 77), Weight: 2
Edge: (5, 79), Weight: 19
Edge: (5, 309), Weight: 2
Edge: (5, 10025), Weight: 1
Edge: (5, 40101), Weight: 1
Edge: (6, 7), Weight: 24
Edge: (

## BGNetLWalk
Routes and stops differ by the type of transportation (e.g. buses and trams are not having the same routes and stops), but their stops are often spatially very close. <br>
For this putpose, we can merge stops that are spatially very close. To determine how close they need to be, we first should do some data analysis to determine the threshold. <br>
To calculate the distance between two stops, we will use **haversine** distance.

### Distance and threshold calculation 

In [None]:
stopTypes

In [32]:
stops#.head()

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,zone_id
0,1,Kalemegdan (Donji Grad),44.826810,20.453274,1
1,2,Tadeuša Košćuška,44.825917,20.455243,1
2,3,Tadeuša Košćuška,44.826253,20.454967,1
3,4,Kalemegdan,44.820329,20.453252,1
4,5,Kalemegdan,44.820884,20.454076,1
...,...,...,...,...,...
3125,7941,Simić - R,44.626497,20.429264,3
3126,7942,Simić - R,44.626713,20.429340,3
3127,7968,Stara Lipovica - Okretnica,44.613962,20.430680,3
3128,7969,Manastir,44.616600,20.430119,3


In [33]:
r = 6371000 # Earth radius in meters
def haversine_distance(lat1, lon1, lat2, lon2):
  t1 = math.pow(math.sin(math.radians(lat1 - lat2) / 2), 2)
  t2 = math.pow(math.sin(math.radians(lon1 - lon2) / 2), 2)
  d = 2 * r * math.asin(math.sqrt(t1 + t2 * math.cos(math.radians(lat1)) * math.cos(math.radians(lat2))))
  return d

In [34]:
stops[stops.stop_id==combinations_without_order[0][0]].stop_lat.values

array([44.8259171])

In [35]:
values = stops.stop_id.unique()
combinations_without_order = list(combinations(values, 2))

In [45]:
BGNetL.number_of_edges()

3946

In [46]:
not_connected_pairs = [(u, v) for u in BGNetL.nodes() for v in BGNetL.nodes() if not BGNetL.has_edge(u, v)]
not_connected_pairs
# Find pairs of nodes that are not connected by an edge
# not_connected_pairs = [pair for pair in all_node_pairs if not BGNetL.has_edge(u, v)]


9736695

In [70]:
combinations_without_order = list(combinations(stops.stop_id.values, 2))
pairs_to_calc_distance = []
for pair in combinations_without_order:
    # first check if they share a route
    routes_stop_0 = routs_trips_stops[routs_trips_stops.stop_id==pair[0]].route_id.values
    routes_stop_1 = routs_trips_stops[routs_trips_stops.stop_id==pair[1]].route_id.values
    intersection = np.intersect1d(routes_stop_0, routes_stop_1)
    intersection_len = intersection.shape[0]
    if (intersection_len == 0) & (routes_stop_0.shape[0]!=0) & (routes_stop_1.shape[0]!=0):
        # check if they have the same direction
        stop_dir_0 = routs_trips_stops[routs_trips_stops.stop_id==pair[0]].direction_id.values#[0]
        stop_dir_1 = routs_trips_stops[routs_trips_stops.stop_id==pair[1]].direction_id.values[0]
        if stop_dir_0 == stop_dir_1:
            pairs_to_calc_distance.append(pair)


In [None]:

np.save(f'pairs_to_calc_distance.npy', pairs_to_calc_distance)

In [None]:
pairs_to_calc_distance

In [68]:
not_connected_pairs[0][0]

'1'

In [66]:
routs_trips_stops[routs_trips_stops.stop_id==not_connected_pairs[0][0]].direction_id.values#[0]

array([1, 1, 1])

In [50]:
routs_trips_stops

Unnamed: 0,route_id,trip_id,direction_id,arrival_time,stop_id,stop_sequence,stops
0,2,1423806,0,2023-08-11 04:10:00,7,1,"(7, 5)"
1,2,1423806,0,2023-08-11 04:14:00,5,2,"(5, 71)"
2,2,1423806,0,2023-08-11 04:18:00,71,3,"(71, 73)"
3,2,1423806,0,2023-08-11 04:20:00,73,4,"(73, 52)"
4,2,1423806,0,2023-08-11 04:22:00,52,5,"(52, 54)"
...,...,...,...,...,...,...,...
8421,90004,1325238,1,2023-08-11 07:47:00,688,22,"(688, 690)"
8422,90004,1325238,1,2023-08-11 07:48:00,690,23,"(690, 692)"
8423,90004,1325238,1,2023-08-11 07:49:00,692,24,"(692, 694)"
8424,90004,1325238,1,2023-08-11 07:49:00,694,25,"(694, 2199)"


In [None]:
BGNetL.nodes()

In [42]:
for edge in BGNetL.edges(data=True):
    source, target, attributes = edge
    weight = attributes.get('weight')
    print(f"Edge: ({source}, {target}), Weight: {weight}")

Edge: (1, 2), Weight: 3
Edge: (2, 71), Weight: 2
Edge: (2, 4), Weight: 1
Edge: (3, 1), Weight: 3
Edge: (4, 6), Weight: 2
Edge: (5, 3), Weight: 1
Edge: (5, 71), Weight: 1
Edge: (6, 8), Weight: 2
Edge: (6, 7), Weight: 1
Edge: (7, 5), Weight: 2
Edge: (7, 6), Weight: 1
Edge: (8, 14), Weight: 1
Edge: (8, 70), Weight: 1
Edge: (9, 7), Weight: 2
Edge: (10, 1218), Weight: 4
Edge: (10, 70), Weight: 2
Edge: (11, 9), Weight: 2
Edge: (14, 16), Weight: 2
Edge: (15, 11), Weight: 1
Edge: (15, 70), Weight: 1
Edge: (16, 18), Weight: 2
Edge: (17, 15), Weight: 2
Edge: (18, 20), Weight: 2
Edge: (19, 17), Weight: 2
Edge: (20, 22), Weight: 2
Edge: (21, 19), Weight: 2
Edge: (22, 3029), Weight: 1
Edge: (22, 177), Weight: 1
Edge: (23, 21), Weight: 2
Edge: (52, 54), Weight: 4
Edge: (52, 530), Weight: 2
Edge: (53, 74), Weight: 6
Edge: (54, 56), Weight: 4
Edge: (55, 53), Weight: 4
Edge: (56, 58), Weight: 4
Edge: (57, 55), Weight: 4
Edge: (58, 743), Weight: 4
Edge: (59, 57), Weight: 4
Edge: (60, 2097), Weight: 1
Ed

In [43]:
BGNetL.edges()

OutEdgeView([('1', '2'), ('2', '71'), ('2', '4'), ('3', '1'), ('4', '6'), ('5', '3'), ('5', '71'), ('6', '8'), ('6', '7'), ('7', '5'), ('7', '6'), ('8', '14'), ('8', '70'), ('9', '7'), ('10', '1218'), ('10', '70'), ('11', '9'), ('14', '16'), ('15', '11'), ('15', '70'), ('16', '18'), ('17', '15'), ('18', '20'), ('19', '17'), ('20', '22'), ('21', '19'), ('22', '3029'), ('22', '177'), ('23', '21'), ('52', '54'), ('52', '530'), ('53', '74'), ('54', '56'), ('55', '53'), ('56', '58'), ('57', '55'), ('58', '743'), ('59', '57'), ('60', '2097'), ('60', '1192'), ('61', '59'), ('61', '311'), ('62', '61'), ('63', '586'), ('63', '109'), ('63', '65'), ('63', '86'), ('64', '553'), ('64', '62'), ('64', '654'), ('65', '146'), ('65', '67'), ('66', '86'), ('66', '64'), ('67', '69'), ('68', '66'), ('68', '146'), ('69', '115'), ('69', '14'), ('69', '11'), ('70', '68'), ('70', '114'), ('71', '73'), ('72', '2111'), ('72', '3'), ('72', '4'), ('73', '52'), ('74', '72'), ('75', '1044'), ('76', '347'), ('76', '8

In [40]:
all_node_pairs

[('1', '1'),
 ('1', '3'),
 ('1', '4'),
 ('1', '5'),
 ('1', '6'),
 ('1', '7'),
 ('1', '8'),
 ('1', '9'),
 ('1', '10'),
 ('1', '11'),
 ('1', '14'),
 ('1', '15'),
 ('1', '16'),
 ('1', '17'),
 ('1', '18'),
 ('1', '19'),
 ('1', '20'),
 ('1', '21'),
 ('1', '22'),
 ('1', '23'),
 ('1', '52'),
 ('1', '53'),
 ('1', '54'),
 ('1', '55'),
 ('1', '56'),
 ('1', '57'),
 ('1', '58'),
 ('1', '59'),
 ('1', '60'),
 ('1', '61'),
 ('1', '62'),
 ('1', '63'),
 ('1', '64'),
 ('1', '65'),
 ('1', '66'),
 ('1', '67'),
 ('1', '68'),
 ('1', '69'),
 ('1', '70'),
 ('1', '71'),
 ('1', '72'),
 ('1', '73'),
 ('1', '74'),
 ('1', '75'),
 ('1', '76'),
 ('1', '77'),
 ('1', '78'),
 ('1', '79'),
 ('1', '80'),
 ('1', '81'),
 ('1', '82'),
 ('1', '83'),
 ('1', '84'),
 ('1', '85'),
 ('1', '86'),
 ('1', '87'),
 ('1', '88'),
 ('1', '89'),
 ('1', '90'),
 ('1', '91'),
 ('1', '92'),
 ('1', '93'),
 ('1', '94'),
 ('1', '95'),
 ('1', '96'),
 ('1', '97'),
 ('1', '98'),
 ('1', '99'),
 ('1', '100'),
 ('1', '101'),
 ('1', '102'),
 ('1', '103

In [None]:
BGNetL.has_edge('1', '24')

False

In [None]:
nx.non_edges(BGNetL)

<generator object non_edges at 0x7ff5229a83c0>

In [None]:
len(not_connected_pairs)

9733669

In [None]:
unique_stop_ids_dir0 = routs_trips_stops[routs_trips_stops.direction_id==0].stop_id.unique()
unique_stop_ids_dir1 = routs_trips_stops[routs_trips_stops.direction_id==1].stop_id.unique()
combinations_dir0 = list(combinations(unique_stop_ids_dir0, 2)) 

In [None]:
unique_stop_ids_dir0.shape[0]

4289041

In [None]:
len(combinations_dir0)

2143485

In [None]:
unique_stop_ids_per_route_dir0 = routs_trips_stops[routs_trips_stops.direction_id==0].groupby(['route_id'])['stop_id'].unique()
unique_stop_ids_per_route_dir1 = routs_trips_stops[routs_trips_stops.direction_id==1].groupby(['route_id'])['stop_id'].unique()

In [None]:
unique_stop_ids_dir0

route_id
2        [7, 5, 71, 73, 52, 54, 56, 58, 743, 85, 63, 65...
5        [2, 71, 73, 52, 54, 56, 58, 743, 85, 63, 86, 8...
6        [84, 66, 86, 87, 89, 92, 94, 96, 98, 100, 102,...
7        [107, 105, 103, 101, 99, 97, 95, 93, 90, 88, 1...
9        [170, 172, 542, 174, 168, 166, 164, 160, 158, ...
                               ...                        
40102    [529, 2362, 3140, 82, 193, 195, 197, 199, 925,...
90001    [480, 222, 220, 218, 216, 214, 212, 210, 208, ...
90002    [1026, 1024, 1022, 1020, 1018, 1016, 1014, 527...
90003    [637, 708, 634, 470, 627, 625, 623, 621, 619, ...
90004    [2200, 695, 693, 691, 689, 685, 680, 683, 681,...
Name: stop_id, Length: 178, dtype: object

In [54]:
combinations_without_order = list(combinations(stops.stop_id.values, 2))
len(combinations_without_order)

4896885

In [None]:
combinations_without_order = list(combinations(stops.stop_id.values, 2))
for c in combinations_without_order:
    comb_dict = {}
    lat_1 = stops[stops.stop_id==c[0]].stop_lat.values
    lat_2 = stops[stops.stop_id==c[1]].stop_lat.values
    lon_1 = stops[stops.stop_id==c[0]].stop_lon.values
    lon_2 = stops[stops.stop_id==c[1]].stop_lon.values
    dist = haversine_distance(lat_1, lon_1, lat_2, lon_2)
    # comb_dict.update({c: dist})


KeyboardInterrupt: 

In [None]:
comb_dict

array([   0,    1,    2, ..., 3127, 3128, 3129])