In [1]:
import pandas as pd
import networkx as nx
import my_funcs as fs
from my_funcs import *

# Load Data

In [2]:
stations = fs.stations
ridership = fs.ridership

In [3]:
stations.head()

Unnamed: 0,Complex ID,Stop Name,Borough
0,1,Astoria-Ditmars Blvd,Q
1,2,Astoria Blvd,Q
2,3,30 Av,Q
3,4,Broadway,Q
4,5,36 Av,Q


In [4]:
ridership.head()

Unnamed: 0,Origin Station Complex ID,Origin Station Complex Name,Destination Station Complex ID,Destination Station Complex Name,Estimated Average Ridership,Day of Week,Hour of Day
0,26,"DeKalb Av (B,Q,R)",355,"Winthrop St (2,5)",0.5556,Monday,1
1,231,"Grand St (B,D)",284,Nassau Av (G),0.3068,Monday,1
2,313,"72 St (1,2,3)",71,8 Av (N),0.3012,Monday,1
3,320,23 St (1),309,103 St (1),0.9,Monday,1
4,399,68 St-Hunter College (6),618,"14 St (A,C,E)/8 Av (L)",0.294,Monday,1


# Question 1: Find the top 5 origin sub-way stations from where most riders took subway ride

## Question 1A: Across each borough (Manhattan, Brooklyn, Queens, Bronx, State Island)

In [5]:
boroughs = ['M', 'Bk', 'Q', 'Bx', 'SI']

top_5_origin_stations = []
for borough in boroughs:
    in_borough_id = stations[stations['Borough'] == borough]['Complex ID'].unique()
    
    top_5_origin_station = computeSumOutDegreeEdgeWeights(ridership[ridership['Origin Station Complex ID'].isin(in_borough_id)])
    print(f"Top 5 origin stations in {borough}:", top_5_origin_station)
    top_5_origin_stations.append(top_5_origin_station)

Top 5 origin stations in M: Times Sq-42 St, 14 St-Union Sq, Grand Central-42 St, 34 St-Herald Sq, Fulton St
Top 5 origin stations in Bk: Atlantic Av-Barclays Ctr, Jay St-MetroTech, Bedford Av, Borough Hall, Myrtle-Wyckoff Avs
Top 5 origin stations in Q: 74 St-Broadway, Flushing-Main St, Sutphin Blvd-Archer Av-JFK Airport, Jamaica Center-Parsons/Archer, Court Sq
Top 5 origin stations in Bx: 161 St-Yankee Stadium, 3 Av-149 St, 149 St-Grand Concourse, Parkchester, Fordham Rd
Top 5 origin stations in SI: No stations found


## Question 1B: On Monday, Tuesday, Wednesday (combined)

In [6]:
question_1b = computeSumOutDegreeEdgeWeights(ridership[ridership['Day of Week'].isin(['Monday', 'Tuesday', 'Wednesday'])])
question_1b

'Times Sq-42 St, Grand Central-42 St, 14 St-Union Sq, 34 St-Herald Sq, Fulton St'

## Question 1C: On Saturday and Sunday (combined)

In [7]:
question_1c = computeSumOutDegreeEdgeWeights(ridership[ridership['Day of Week'].isin(['Saturday', 'Sunday'])])
question_1c

'Times Sq-42 St, 14 St-Union Sq, 34 St-Herald Sq, Grand Central-42 St, Fulton St'

## Question 1D: Between 1am-5am across all days and boroughs

In [8]:
question_1d = computeSumOutDegreeEdgeWeights(ridership[ridership['Hour of Day'].isin([1, 2, 3, 4])])
question_1d

'Times Sq-42 St, 34 St-Herald Sq, 14 St-Union Sq, W 4 St-Wash Sq, 74 St-Broadway'

## Question 1E: Between 6am-9am across all days and boroughs

In [9]:
question_1e = computeSumOutDegreeEdgeWeights(ridership[ridership['Hour of Day'].isin([6, 7, 8])])
question_1e

'Times Sq-42 St, 74 St-Broadway, Grand Central-42 St, Fulton St, Flushing-Main St'

# Question 2: Find the top 5 destination sub-way stations from where most riders took subway ride

## Question 2A: Across each borough (Manhattan, Brooklyn, Queens, Bronx, State Island)

In [10]:
boroughs = ['M', 'Bk', 'Q', 'Bx', 'SI']

top_5_dest_stations = []
for borough in boroughs:
    in_borough_id = stations[stations['Borough'] == borough]['Complex ID'].unique()
    
    top_5_dest_station = computeSumInDegreeEdgeWeights(ridership[ridership['Destination Station Complex ID'].isin(in_borough_id)])
    print(f"Top 5 destination stations in {borough}:", top_5_dest_station)
    top_5_dest_stations.append(top_5_dest_station)

Top 5 destination stations in M: Times Sq-42 St, Grand Central-42 St, 34 St-Herald Sq, 14 St-Union Sq, Fulton St
Top 5 destination stations in Bk: Atlantic Av-Barclays Ctr, Jay St-MetroTech, Bedford Av, Borough Hall, Crown Hts-Utica Av
Top 5 destination stations in Q: 74 St-Broadway, Flushing-Main St, Sutphin Blvd-Archer Av-JFK Airport, Jamaica Center-Parsons/Archer, Court Sq
Top 5 destination stations in Bx: 161 St-Yankee Stadium, 3 Av-149 St, 149 St-Grand Concourse, Parkchester, Fordham Rd
Top 5 destination stations in SI: No stations found


## Question 2B: On Thursday and Friday (combined)

In [11]:
question_2b = computeSumInDegreeEdgeWeights(ridership[ridership['Day of Week'].isin(['Monday', 'Tuesday', 'Wednesday'])])
question_2b

'Times Sq-42 St, Grand Central-42 St, 34 St-Herald Sq, 14 St-Union Sq, Fulton St'

## Question 2C: On Saturday only

In [12]:
question_2c = computeSumInDegreeEdgeWeights(ridership[ridership['Day of Week'] == 'Saturday'])
question_2c

'Times Sq-42 St, 34 St-Herald Sq, 14 St-Union Sq, Grand Central-42 St, 34 St-Penn Station'

## Question 2D: Between 12am-5am across all days and boroughs

In [13]:
question_2d = computeSumInDegreeEdgeWeights(ridership[ridership['Hour of Day'].isin([0, 1, 2, 3, 4])])
question_2d

'Times Sq-42 St, Grand Central-42 St, 34 St-Herald Sq, 74 St-Broadway, 34 St-Penn Station'

## Question 2E: Between 6pm-9pm across all days and boroughs

In [14]:
question_2e = computeSumInDegreeEdgeWeights(ridership[ridership['Hour of Day'].isin([18, 19, 20])])
question_2e

'Times Sq-42 St, 14 St-Union Sq, 34 St-Herald Sq, Grand Central-42 St, Fulton St'

# Question 3: Find the top 10 congested source-destination sub-way stations pair

In [15]:
# Get the unique station IDs for each borough
in_brooklyn = stations[stations['Borough'] == 'Bk']['Complex ID'].unique()
in_queens = stations[stations['Borough'] == 'Q']['Complex ID'].unique()
in_manhattan = stations[stations['Borough'] == 'M']['Complex ID'].unique()
in_bronx = stations[stations['Borough'] == 'Bx']['Complex ID'].unique()
in_si = stations[stations['Borough'] == 'SI']['Complex ID'].unique()

## Question 3A: On Monday between 1pm-2pm

In [16]:
question_3a = computeSumEdgeWeights(ridership[(ridership['Day of Week'] == 'Monday') & (ridership['Hour of Day'] == 13)])
print(question_3a)

14 St-Union Sq - Grand Central-42 St
Flushing-Main St - Junction Blvd
Grand Central-42 St - Fulton St
Grand Central-42 St - Times Sq-42 St
Flushing-Main St - 74 St-Broadway
86 St - Grand Central-42 St
Flushing-Main St - 103 St-Corona Plaza
14 St-Union Sq - Times Sq-42 St
Times Sq-42 St - 59 St-Columbus Circle
Times Sq-42 St - Fulton St


## Question 3B: On Queens borough, on Fridays between 6pm-9pm

In [17]:
subset = ridership[(ridership['Origin Station Complex ID'].isin(in_queens)
                    & ridership['Destination Station Complex ID'].isin(in_queens)) 
                    & (ridership['Day of Week'] == 'Friday') 
                    & (ridership['Hour of Day'].isin([18, 19, 20]))]

question_3b = computeSumEdgeWeights(subset)
print(question_3b)

Flushing-Main St - Junction Blvd
Flushing-Main St - 74 St-Broadway
Flushing-Main St - 103 St-Corona Plaza
Flushing-Main St - 90 St-Elmhurst Av
Flushing-Main St - 82 St-Jackson Hts
Kew Gardens-Union Tpke - 74 St-Broadway
Jamaica-179 St - 74 St-Broadway
Junction Blvd - 74 St-Broadway
Forest Hills-71 Av - 74 St-Broadway
Sutphin Blvd-Archer Av-JFK Airport - 74 St-Broadway


## Question 3C: On Brooklyn borough, Ridership between 1am-5am

In [18]:
subset = ridership[(ridership['Origin Station Complex ID'].isin(in_brooklyn))
                    & (ridership['Destination Station Complex ID'].isin(in_brooklyn)) 
                    & (ridership['Hour of Day'].isin([1, 2, 3, 4]))]

question_3c = computeSumEdgeWeights(subset)
print(question_3c)

Bedford Av - Myrtle-Wyckoff Avs
Bedford Av - Jefferson St
Bedford Av - DeKalb Av
36 St - Atlantic Av-Barclays Ctr
Lorimer St - Myrtle-Wyckoff Avs
Jefferson St - Lorimer St
Jefferson St - Myrtle-Wyckoff Avs
DeKalb Av - Lorimer St
Crown Hts-Utica Av - Atlantic Av-Barclays Ctr
Flatbush Av-Brooklyn College - Atlantic Av-Barclays Ctr


## Question 3D: Source is Brooklyn, Destination is Manhattan, Monday-Thursday 6am-7am

In [19]:
subset = ridership[(ridership['Origin Station Complex ID'].isin(in_brooklyn))
                    & (ridership['Destination Station Complex ID'].isin(in_manhattan)) 
                    & (ridership['Day of Week'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday']))
                    & (ridership['Hour of Day'] == 6)]

question_3d = computeSumEdgeWeights(subset)
print(question_3d)

Atlantic Av-Barclays Ctr - Bowling Green
Crown Hts-Utica Av - Grand Central-42 St
Flatbush Av-Brooklyn College - Grand Central-42 St
Kings Hwy - 34 St-Herald Sq
Crown Hts-Utica Av - Fulton St
Flatbush Av-Brooklyn College - Fulton St
Crown Hts-Utica Av - 86 St
Kings Hwy - 47-50 Sts-Rockefeller Ctr
Kings Hwy - 72 St
Crown Hts-Utica Av - 14 St-Union Sq


## Question 3E: Where source is Bronx, Destination is Manhattan, Monday-Thursday 6am-7am

In [20]:
subset = ridership[(ridership['Origin Station Complex ID'].isin(in_bronx))
                    & (ridership['Destination Station Complex ID'].isin(in_manhattan)) 
                    & (ridership['Day of Week'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday']))
                    & (ridership['Hour of Day'] == 6)]   

question_3e = computeSumEdgeWeights(subset)
print(question_3e)

Parkchester - Grand Central-42 St
Parkchester - 125 St
Parkchester - 68 St-Hunter College
Parkchester - 86 St
Parkchester - Fulton St
Parkchester - 51 St
Woodlawn - 86 St
3 Av-149 St - Grand Central-42 St
Parkchester - Brooklyn Bridge-City Hall
161 St-Yankee Stadium - 34 St-Herald Sq


## Question 3F: Where source is Staten Island, Destination is Manhattan, Monday-Thursday 6am-7am

In [21]:
subset = ridership[(ridership['Origin Station Complex ID'].isin(in_si))
                    & (ridership['Destination Station Complex ID'].isin(in_manhattan)) 
                    & (ridership['Day of Week'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday']))
                    & (ridership['Hour of Day'] == 6)]

question_3f = computeSumEdgeWeights(subset)
question_3f

'No pairs found'