In [None]:
# make sure to install these packages before running:
!pip install pandas
!pip install sodapy
!pip install python-dotenv



In [None]:
import pandas as pd
from sodapy import Socrata
from dotenv import load_dotenv
import os

In [None]:
#load secrets

load_dotenv(override = True)

api_token = os.getenv("API_TOKEN")
email = os.getenv("EMAIL")
password = os.getenv("PASSWORD")

In [46]:
#API authentication for the first dataset
client = Socrata("data.ny.gov",
                 api_token,
                 username= email,
                 password= password)

In [122]:
# returns json response - DATASET 1 “MTA Subway Trains Delayed: Beginning 2020”
results = client.get_all("g937-7k7c", 
    where="month >= '2024-01' AND month <= '2024-12'")

In [123]:
# Convert to pandas DataFrame - DATASET 1 “MTA Subway Trains Delayed: Beginning 2020”
delayed_df = pd.DataFrame.from_records(results)
delayed_df

Unnamed: 0,month,division,line,day_type,incidents,reporting_category
0,2024-01-01T00:00:00.000,A DIVISION,1,1,0,
1,2024-01-01T00:00:00.000,A DIVISION,1,1,31,Crew Availability
2,2024-01-01T00:00:00.000,A DIVISION,1,1,2,External Factors
3,2024-01-01T00:00:00.000,A DIVISION,1,1,92,Infrastructure & Equipment
4,2024-01-01T00:00:00.000,A DIVISION,1,1,86,Operating Conditions
...,...,...,...,...,...,...
3761,2024-12-01T00:00:00.000,B DIVISION,S Rock,2,16,External Factors
3762,2024-12-01T00:00:00.000,B DIVISION,S Rock,2,6,Infrastructure & Equipment
3763,2024-12-01T00:00:00.000,B DIVISION,S Rock,2,1,Operating Conditions
3764,2024-12-01T00:00:00.000,B DIVISION,S Rock,2,1,Planned ROW Work


In [116]:
# returns json response - DATASET 2 "MTA Subway Customer Journey-Focused Metrics: 2020-2024”
results = client.get_all("4apg-4kt9", 
    where="month >= '2024-01' AND month <= '2024-12'")

In [117]:
# Convert to pandas DataFrame - DATASET 2 "MTA Subway Customer Journey-Focused Metrics: 2020-2024”
metrics_df = pd.DataFrame.from_records(results)
metrics_df

Unnamed: 0,month,division,line,period,num_passengers,additional_platform_time,additional_train_time,total_apt,total_att,over_five_mins,over_five_mins_perc,customer_journey_time
0,2024-01-01T00:00:00.000,A DIVISION,1,offpeak,4873143.5,1.656052,0.79665726,8070179.0,3882225.2,763100.9,0.15659314,0.84340686
1,2024-01-01T00:00:00.000,A DIVISION,1,peak,4765373.0,2.115036,0.8733,10078935.0,4161600.2,743524.6,0.15602654,0.84397346
2,2024-01-01T00:00:00.000,A DIVISION,2,offpeak,2878163.8,1.6068327,0.50131786,4624727.5,1442874.9,487204.1,0.16927601,0.830724
3,2024-01-01T00:00:00.000,A DIVISION,2,peak,3092725.5,2.0733223,0.5617463,6412216.5,1737327.1,460791.2,0.14899196,0.85100806
4,2024-01-01T00:00:00.000,A DIVISION,3,offpeak,2125353.8,1.2491903,0.3281006,2654971.2,697329.8,265963.7,0.12513857,0.8748614
...,...,...,...,...,...,...,...,...,...,...,...,...
571,2024-12-01T00:00:00.000,B DIVISION,S Fkln,peak,194189.34,0.5707303,0.057167325,110829.74,11101.285,9461.589,0.048723523,0.9512765
572,2024-12-01T00:00:00.000,B DIVISION,S Rock,offpeak,26513.352,1.2623241,-0.90806454,33468.44,-24075.834,3445.474,0.12995242,0.87004757
573,2024-12-01T00:00:00.000,B DIVISION,S Rock,peak,19257.482,0.8787027,-1.183044,16921.602,-22782.447,1812.6135,0.09412515,0.90587485
574,2024-12-01T00:00:00.000,B DIVISION,W,offpeak,1265461.6,0.9385583,0.2576316,1187709.5,326022.94,142330.02,0.112472795,0.8875272


In [114]:
# returns json response - DATASET 3 “MTA Subway Service Delivered: 2020-2024”
results = client.get_all("bg59-42xi",
    where="month >= '2024-01' AND month <= '2024-12'")

In [115]:
# Convert to pandas DataFrame - DATASET 3 “MTA Subway Service Delivered: 2020-2024”
delivered_df = pd.DataFrame.from_records(results)
delivered_df

Unnamed: 0,month,division,line,day_type,num_sched_trains,num_actual_trains,service_delivered
0,2024-01-01T00:00:00.000,A DIVISION,1,1,1696,1499,0.8838443
1,2024-01-01T00:00:00.000,A DIVISION,1,2,864,819,0.9479167
2,2024-01-01T00:00:00.000,A DIVISION,2,1,2244,2027,0.90329766
3,2024-01-01T00:00:00.000,A DIVISION,2,2,1117,1067,0.95523727
4,2024-01-01T00:00:00.000,A DIVISION,3,1,2069,1937,0.93620104
...,...,...,...,...,...,...,...
549,2024-12-01T00:00:00.000,B DIVISION,Q,2,1756,1714,0.976082
550,2024-12-01T00:00:00.000,B DIVISION,R,1,2163,1611,0.7447989
551,2024-12-01T00:00:00.000,B DIVISION,R,2,1360,1308,0.9617647
552,2024-12-01T00:00:00.000,B DIVISION,W,1,819,749,0.9145299


In [112]:
# returns json response - DATASET 4 "MTA Service Alerts: Beginning April 2020"
results = client.get_all("7kct-peq7",
    where= "date >= '2024-01-01T00:00:00' AND date < '2025-01-01T00:00:00'"
)

In [113]:
# Convert to pandas DataFrame - DATASET 4 "MTA Service Alerts: Beginning April 2020"
alerts_df = pd.DataFrame.from_records(results)
alerts_df

Unnamed: 0,alert_id,event_id,update_number,date,agency,status_label,affected,header,description
0,281016,134282,2,2024-01-01T00:01:00.000,NYCT Bus,delays,SIM1C,The following SIM1C trips will not run:\n\nFro...,We’re running as much service as we can with t...
1,281017,134319,0,2024-01-01T00:11:00.000,NYCT Subway,delays,J,Jamaica Center-Parsons/Archer-bound J trains a...,
2,281018,134320,0,2024-01-01T00:12:00.000,NYCT Subway,delays,A,Northbound A trains are running with delays af...,
3,281019,134321,0,2024-01-01T00:20:00.000,NYCT Subway,local-to-express,C,Northbound C trains are running express from 1...,"For service to/from 135 St, take a southbound ..."
4,281020,134316,5,2024-01-01T00:24:00.000,LIRR,some-delays,City Terminal Zone,There are scattered delays along the City Term...,
...,...,...,...,...,...,...,...,...,...
92281,379670,184992,2,2024-12-31T23:33:00.000,BT,some-delays,Bronx-Whitestone Bridge,BWB: Disabled vehicle; Queens bound cleared.,
92282,379671,184993,0,2024-12-31T23:48:00.000,NYCT Subway,delays,4,Northbound 4 trains are delayed while we reque...,
92283,379672,184994,0,2024-12-31T23:53:00.000,NYCT Subway,delays,2 | 5,Northbound 2/5 trains are delayed while we req...,
92284,379673,184993,1,2024-12-31T23:55:00.000,NYCT Subway,delays,4,Northbound 4 trains are running with delays af...,
