<a href="https://colab.research.google.com/github/rutevicente/edit-g2-final-project/blob/get_historical_data/EDA_gtfs_endpoints.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**General Documentation:**
* https://gtfs.org/documentation/schedule/reference/

#gtfs (zipped folder) - endpoint

In [2]:
import os
import requests
import zipfile
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns

# Step 1: Download the GTFS ZIP file
url = "https://api.carrismetropolitana.pt/gtfs"
zip_file = "carris_metropolitana_gtfs.zip"
extract_folder = "carris_metropolitana_gtfs"

# Download the file
response = requests.get(url)
if response.status_code == 200:
    with open(zip_file, "wb") as f:
        f.write(response.content)
    print("GTFS ZIP file downloaded successfully.")
else:
    raise Exception(f"Failed to download the GTFS file. Status code: {response.status_code}")

# Step 2: Extract the ZIP file
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)
    print(f"Extracted contents to {extract_folder}")

# Step 3: List the names of the files in the extracted folder
file_names = [file_name for file_name in os.listdir(extract_folder) if file_name.endswith(".txt")]
print("\nList of files in the extracted folder:")
for file_name in file_names:
    print(file_name)

# Step 4: Read the text files as pandas DataFrames and create independent variables
for file_name in file_names:
    file_path = os.path.join(extract_folder, file_name)
    df_name = file_name.replace(".txt", "")  # Remove .txt for DataFrame name
    df = pd.read_csv(file_path)  # Load the text file as a DataFrame

    # Dynamically create a variable with the name of the DataFrame
    globals()[df_name] = df
    print(f"Created variable for DataFrame {df_name}")

GTFS ZIP file downloaded successfully.
Extracted contents to carris_metropolitana_gtfs

List of files in the extracted folder:
agency.txt
periods.txt
fare_attributes.txt
stops.txt
stop_times.txt
feed_info.txt
fare_rules.txt
municipalities.txt
archives.txt
shapes.txt
dates.txt
calendar_dates.txt
trips.txt
routes.txt
Created variable for agency DataFrame
Created variable for periods DataFrame
Created variable for fare_attributes DataFrame
Created variable for stops DataFrame
Created variable for stop_times DataFrame
Created variable for feed_info DataFrame
Created variable for fare_rules DataFrame
Created variable for municipalities DataFrame
Created variable for archives DataFrame
Created variable for shapes DataFrame
Created variable for dates DataFrame
Created variable for calendar_dates DataFrame
Created variable for trips DataFrame
Created variable for routes DataFrame


  df = pd.read_csv(file_path)  # Load the text file as a DataFrame


# EDA

In [None]:
# Irrelevant:
#feed_info
#agency
#fare_attributes
#fare_rules


#Relevant:
#stops.head()
#routes.head() # line_id, route_id
#trips.head() # route_id
#stop_times

#calendar_dates.head()
#periods.head()
#dates.head()

#municipalities.head() # municipality_id

#archives.head()
#shapes.head()

Métricas Históricas:
* Velocidade média
* nr viagens
* Km percorridos
* Tempo de viagem total



Filtros Gerais do Dashboard:
* Data
* Linha
* Rota
* Direção
* Rotas que param em estação X
* Rotas que servem munícipio X

##Lines - endpoint

In [3]:
# Send the GET request to the API
response_lines = requests.get('https://api.carrismetropolitana.pt/lines')
lines = response_lines.json()

# Convert the JSON data into a DataFrame
df_lines = pd.json_normalize(lines)  # This will flatten nested structures if any

In [4]:
#df_lines
df_lines[df_lines['id'] == '2125']

#Exemple: line id 2125 has 3 routes and 3 patterns; 1 pattern = 1 route? No: documentation says "each route can have at most two patterns"

# column routes (att it is an array) = route_id in dataframes patterns and routes

Unnamed: 0,color,facilities,id,localities,long_name,municipalities,patterns,routes,short_name,text_color
218,#3D85C6,[],2125,"[Mafra, Ericeira]",Urbana da Ericeira 2 | Circular via Outeirinho...,[1109],"[2125_0_3, 2125_1_3, 2125_2_3]","[2125_0, 2125_1, 2125_2]",2125,#FFFFFF


##Patterns - endpoint

In [None]:
# Send the GET request to the API
response_patterns = requests.get('https://api.carrismetropolitana.pt/patterns/2125_0_3') # line 2125 has patterns [2125_0_3, 2125_1_3, 2125_2_3]. This example is just one pattern
patterns = response_patterns.json()

# Convert the JSON data into a DataFrame
df_patterns = pd.json_normalize(patterns)  # This will flatten nested structures if any

In [None]:
df_patterns #2125_0_3

Unnamed: 0,color,direction,facilities,headsign,id,line_id,localities,municipalities,path,route_id,shape_id,short_name,text_color,trips,valid_on
0,#3D85C6,0,[],Urbana da Ericeira 2,2125_0_3,2125,[Mafra],[1109],"[{'allow_drop_off': False, 'allow_pickup': Fal...",2125_0,2125_0_3_5ZVUM,2125,#FFFFFF,"[{'calendar_description': '', 'calendar_id': '...","[20250106, 20250113, 20250120, 20250127, 20250..."


## Routes

In [None]:
# line 2125 has the routes [2125_0, 2125_1, 2125_2]

route_2125_0 = routes[routes['route_id'] == '2125_0']
route_2125_1 = routes[routes['route_id'] == '2125_1']
route_2125_2 = routes[routes['route_id'] == '2125_2']

# Differs just the path_type?

In [None]:
route_2125_0

Unnamed: 0,agency_id,circular,line_id,line_long_name,line_short_name,line_type,path_type,route_color,route_id,route_long_name,route_short_name,route_text_color,route_type,school
683,CM,1,2125,Urbana da Ericeira 2 | Circular via Outeirinho...,2125,,1,3D85C6,2125_0,Urbana da Ericeira 2 | Circular via Outeirinho...,2125,FFFFFF,3,


In [None]:
route_2125_1

Unnamed: 0,agency_id,circular,line_id,line_long_name,line_short_name,line_type,path_type,route_color,route_id,route_long_name,route_short_name,route_text_color,route_type,school
684,CM,1,2125,Urbana da Ericeira 2 | Circular via Outeirinho...,2125,,3,3D85C6,2125_1,Urbana da Ericeira 2 | Circular via Fonte Boa ...,2125,FFFFFF,3,


### Merging Routes with Trips

In [None]:
# 1 line has multiple routes and each route will have multiple trip_ids
merged_routes_trips = pd.merge(routes,trips, on='route_id', how='inner').sort_values(by=['line_id', 'route_id'])

#merged_routes_trips[merged_routes_trips['line_id'] == 2125]
#print(merged_routes_trips.columns)

#Seleccting just a few columns
selected_cols = ['line_id', 'route_id', 'service_id','trip_id','route_type','direction_id', 'pattern_id','shape_id']
df_selected = merged_routes_trips[selected_cols]
df_selected[df_selected['line_id'] == 2125]

Unnamed: 0,line_id,route_id,service_id,trip_id,route_type,direction_id,pattern_id,shape_id
312575,2125,2125_0,1101_5ZVUM,2125_0_3|110|1|0930_5ZVUM,3,0,2125_0_3,2125_0_3_5ZVUM
312576,2125,2125_0,1101_5ZVUM,2125_0_3|110|1|1100_5ZVUM,3,0,2125_0_3,2125_0_3_5ZVUM
312577,2125,2125_0,1101_5ZVUM,2125_0_3|110|1|1430_5ZVUM,3,0,2125_0_3,2125_0_3_5ZVUM
312578,2125,2125_0,1101_5ZVUM,2125_0_3|110|1|1620_5ZVUM,3,0,2125_0_3,2125_0_3_5ZVUM
312579,2125,2125_0,1102_5ZVUM,2125_0_3|110|2|0930_5ZVUM,3,0,2125_0_3,2125_0_3_5ZVUM
...,...,...,...,...,...,...,...,...
312710,2125,2125_2,1503_5ZVUM,2125_2_3|150|3|0845_5ZVUM,3,0,2125_2_3,2125_2_3_5ZVUM
312711,2125,2125_2,1503_5ZVUM,2125_2_3|150|3|1145_5ZVUM,3,0,2125_2_3,2125_2_3_5ZVUM
312712,2125,2125_2,1503_5ZVUM,2125_2_3|150|3|1245_5ZVUM,3,0,2125_2_3,2125_2_3_5ZVUM
312713,2125,2125_2,1503_5ZVUM,2125_2_3|150|3|1510_5ZVUM,3,0,2125_2_3,2125_2_3_5ZVUM


## Trips

In [None]:
trips.head()

Unnamed: 0,calendar_desc,direction_id,pattern_id,route_id,service_id,shape_id,trip_headsign,trip_id
0,,0,1001_0_1,1001_0,1_MPLKH,1_MPLKH,Reboleira (Estação),1001_0_1_0600_0629_0_1_MPLKH
1,,0,1001_0_1,1001_0,14_MPLKH,1_MPLKH,Reboleira (Estação),1001_0_1_0600_0629_0_14_MPLKH
2,,0,1001_0_1,1001_0,4_MPLKH,1_MPLKH,Reboleira (Estação),1001_0_1_0600_0629_0_4_MPLKH
3,,0,1001_0_1,1001_0,7_MPLKH,1_MPLKH,Reboleira (Estação),1001_0_1_0600_0629_0_7_MPLKH
4,,0,1001_0_1,1001_0,1_MPLKH,1_MPLKH,Reboleira (Estação),1001_0_1_0700_0729_0_1_MPLKH


In [5]:
trip_route_2125_0 = trips[trips['route_id'] == '2125_0']
trip_route_2125_1 = trips[trips['route_id'] == '2125_1']
trip_route_2125_2 = trips[trips['route_id'] == '2125_2']

In [None]:
#trip_route_2125_0.head()
trip_route_2125_1
#trip_route_2125_2

Unnamed: 0,calendar_desc,direction_id,pattern_id,route_id,service_id,shape_id,trip_headsign,trip_id
312500,,0,2125_1_3,2125_1,1101_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|110|1|1310_5ZVUM
312501,,0,2125_1_3,2125_1,1102_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|110|2|1310_5ZVUM
312502,,0,2125_1_3,2125_1,1201_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|120|1|1310_5ZVUM
312503,,0,2125_1_3,2125_1,1202_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|120|2|1310_5ZVUM
312504,,0,2125_1_3,2125_1,1301_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|130|1|1310_5ZVUM
312505,,0,2125_1_3,2125_1,1302_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|130|2|1310_5ZVUM
312506,,0,2125_1_3,2125_1,1401_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|140|1|1310_5ZVUM
312507,,0,2125_1_3,2125_1,1402_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|140|2|1310_5ZVUM
312508,,0,2125_1_3,2125_1,1501_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|150|1|1310_5ZVUM
312509,,0,2125_1_3,2125_1,1502_5ZVUM,2125_1_3_5ZVUM,Urbana da Ericeira 2,2125_1_3|150|2|1310_5ZVUM


In [6]:
#trip_route_2125_0['service_id'].unique()

#filt_trip_route_2125_0 = trip_route_2125_0[trip_route_2125_0['service_id'] == '1101_5ZVUM']
#filt_trip_route_2125_0['trip_id'].unique()

#filt_trip_route_2125_0 = trip_route_2125_0[trip_route_2125_0['service_id'] == '1102_5ZVUM']
#filt_trip_route_2125_0['trip_id'].unique()

filt_trip_route_2125_1 = trip_route_2125_1[trip_route_2125_1['service_id'] == '1202_5ZVUM']
filt_trip_route_2125_1['trip_id'].unique()

array(['2125_1_3|120|2|1310_5ZVUM'], dtype=object)

In [8]:
# Are 'trip_id' unique values?
trips['trip_id'].is_unique

True

In [10]:
# One'service_id' has multiple 'route_id'? Yes
service_route_group = trips.groupby('service_id')['route_id'].nunique()

service_route_group[service_route_group > 1]

Unnamed: 0_level_0,route_id
service_id,Unnamed: 1_level_1
1000_AEMLZ,146
100_AEMLZ,81
1100_AEMLZ,143
1101_5ZVUM,151
1102_5ZVUM,106
...,...
SOL17_UYRQQ,2
SOL18_UYRQQ,3
VER_DOM_UYRQQ,101
VER_DU_UYRQQ,145


##Calendar dates (has the column service_id)

In [None]:
#One date will have different service_id
#Eg.20251231   | 15_BDUOM, 12_5ZVUM, 1302_5ZVUM

#calendar_dates.sort_values('date', ascending=True)

#calendar_dates[calendar_dates['date'] == 20251231]
#calendar_dates[calendar_dates['service_id'] == '15_BDUOM'] # Only December 24 and 31
calendar_dates[calendar_dates['service_id'] == '1302_5ZVUM'] # Other days besides December 24 and 31

Unnamed: 0,date,day_type,exception_type,holiday,period,service_id
1979,20250416,1,1,0,2,1302_5ZVUM
2259,20250903,1,1,0,2,1302_5ZVUM
2273,20250910,1,1,0,2,1302_5ZVUM
2483,20251224,1,1,0,2,1302_5ZVUM
2497,20251231,1,1,0,2,1302_5ZVUM


In [12]:
#calendar_dates[calendar_dates['service_id'] == '1101_5ZVUM'] # Ex.: only on mondays
calendar_dates[calendar_dates['service_id'] == '1102_5ZVUM'] # Ex.: only times in a year
#calendar_dates[calendar_dates['service_id'] == '1301_5ZVUM']

Unnamed: 0,date,day_type,exception_type,holiday,period,service_id
2254,20250901,1,1,0,2,1102_5ZVUM
2268,20250908,1,1,0,2,1102_5ZVUM
2478,20251222,1,1,0,2,1102_5ZVUM
2492,20251229,1,1,0,2,1102_5ZVUM


## Stops

In [None]:
stops.head() # stop_id, municipality_id

Unnamed: 0,stop_id,stop_name,stop_name_new,stop_short_name,stop_lat,stop_lon,operational_status,areas,region_id,region_name,district_id,district_name,municipality_id,municipality_name,parish_id,parish_name,locality,jurisdiction,stop_code,tts_stop_name,platform_code,parent_station,location_type,stop_url,has_pole,has_cover,has_shelter,shelter_code,shelter_maintainer,has_mupi,has_bench,has_trash_bin,has_lighting,has_electricity,docking_bay_type,last_infrastructure_maintenance,last_infrastructure_check,has_flag,flag_maintainer,has_pip_static,has_pip_audio,pip_audio_code,has_pip_realtime,pip_realtime_code,has_h2oa_signage,has_schedules,has_tactile_schedules,has_network_map,last_schedules_maintenance,last_schedules_check,last_flag_maintenance,last_flag_check,has_sidewalk,sidewalk_type,has_crossing,has_flat_access,has_wide_access,has_tactile_access,has_abusive_parking,wheelchair_boarding,last_accessibility_maintenance,last_accessibility_check,near_health_clinic,near_hospital,near_university,near_school,near_police_station,near_fire_station,near_shopping,near_historic_building,near_transit_office,near_beach,subway,light_rail,train,boat,airport,bike_sharing,bike_parking,car_parking
0,10001,Rua Carlos Manuel Rodrigues Francisco (Escola),Rua Carlos Manuel Rodrigues Francisco (Escola),R. Carlos Manuel Rodrigues Francisco (Escola),38.754244,-8.959557,ACTIVE,44,PT170,AML,15,Setúbal,1502,Alcochete,,,Alcochete,,10001,Rua Carlos Manuel Rodrigues Francisco ( Escola ),,,0,https://on.carrismetropolitana.pt/stops/010001,0,UNKNOWN,UNKNOWN,,,,UNKNOWN,UNKNOWN,,,,,,UNKNOWN,,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,,,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,0,,,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1,10002,R Carlos M. Francisco 229 (Escola Monte Novo),Rua Carlos Manuel Rodrigues Francisco (Frente ...,R. Carlos Manuel Rodrigues Francisco (Ft. Esco...,38.754572,-8.959615,ACTIVE,44,PT170,AML,15,Setúbal,1502,Alcochete,,,Alcochete,,10002,Rua Carlos M. Francisco 229 ( Escola Monte Novo ),,,0,https://on.carrismetropolitana.pt/stops/010002,0,UNKNOWN,UNKNOWN,,,,UNKNOWN,UNKNOWN,,,,,,UNKNOWN,,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,,,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,0,,,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2,10005,ALCOCHETE (R CIPRIÃO FIGUEIREDO),Rua Doutor Ciprião Figueiredo 13,R. Dr. Ciprião Figueiredo 13,38.754175,-8.961806,ACTIVE,44,PT170,AML,15,Setúbal,1502,Alcochete,,,Alcochete,,10005,Alcochete ( Rua Ciprião Figueiredo ),,,0,https://on.carrismetropolitana.pt/stops/010005,UNKNOWN,UNKNOWN,UNKNOWN,,,,UNKNOWN,UNKNOWN,,,,,,UNKNOWN,,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,,,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,0,,,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,10007,ALCOCHETE (R LEITE CUNHA) BIBLIOTECA,Rua Professor Leite Cunha (Biblioteca),R. Prof. Leite Cunha (Biblioteca),38.753196,-8.963687,ACTIVE,44,PT170,AML,15,Setúbal,1502,Alcochete,,,Alcochete,,10007,Alcochete ( Rua Leite Cunha ) Biblioteca,,,0,https://on.carrismetropolitana.pt/stops/010007,0,UNKNOWN,UNKNOWN,,,,UNKNOWN,UNKNOWN,,,,,,UNKNOWN,,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,,,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,0,,,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,10008,ALCOCHETE (R LEITE CUNHA) BIBLIOTECA,Rua Professor Leite Cunha 151,R. Prof. Leite Cunha 151,38.753271,-8.963504,ACTIVE,44,PT170,AML,15,Setúbal,1502,Alcochete,,,Alcochete,,10008,Alcochete ( Rua Leite Cunha ) Biblioteca,,,0,https://on.carrismetropolitana.pt/stops/010008,0,UNKNOWN,UNKNOWN,,,,UNKNOWN,UNKNOWN,,,,,,UNKNOWN,,UNKNOWN,UNKNOWN,,UNKNOWN,,,,,,,,,,,,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,UNKNOWN,0,,,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


##Stop Times

These trip_id are "base" schedules (similar to the ones we see at bus stops with the estimated arrival times).

**In the context of the project, these will be used as if they were historical data.**

In [13]:
#These trip_id belong to the service_id '1101_5ZVUM' of route '2125_0'
#2125_0_3|110|1|0930_5ZVUM # 09:30
#2125_0_3|110|1|1100_5ZVUM # 11:00
#2125_0_3|110|1|1430_5ZVUM # 14:30
#2125_0_3|110|1|1620_5ZVUM # 16:20

stop_times[stop_times['trip_id'] == '2125_0_3|110|1|0930_5ZVUM'].sort_values('arrival_time', ascending=True)

Unnamed: 0,arrival_time,departure_time,drop_off_type,pickup_type,shape_dist_traveled,stop_id,stop_sequence,timepoint,trip_id
10095680,09:30:00,09:30:00,0,0,0.0,82009,0,0,2125_0_3|110|1|0930_5ZVUM
10095681,09:31:08,09:31:08,0,0,447.0,80657,1,0,2125_0_3|110|1|0930_5ZVUM
10095682,09:32:37,09:32:37,0,0,1032.0,80655,2,0,2125_0_3|110|1|0930_5ZVUM
10095683,09:34:00,09:34:00,0,0,1573.0,80653,3,0,2125_0_3|110|1|0930_5ZVUM
10095684,09:34:49,09:34:49,0,0,1839.0,80773,4,0,2125_0_3|110|1|0930_5ZVUM
10095685,09:36:07,09:36:07,0,0,2255.0,82304,5,0,2125_0_3|110|1|0930_5ZVUM
10095686,09:36:39,09:36:39,0,0,2428.0,80776,6,0,2125_0_3|110|1|0930_5ZVUM
10095687,09:38:00,09:38:00,0,0,2860.0,80777,7,0,2125_0_3|110|1|0930_5ZVUM
10095688,09:39:55,09:39:55,0,0,3300.0,80775,8,0,2125_0_3|110|1|0930_5ZVUM
10095689,09:41:00,09:41:00,0,0,3548.0,80779,9,0,2125_0_3|110|1|0930_5ZVUM


In [None]:
#These trip_id belong to the service_id '1102_5ZVUM' of route '2125_0'
#2125_0_3|110|2|0930_5ZVUM # 09:30
#2125_0_3|110|2|1100_5ZVUM
#2125_0_3|110|2|1430_5ZVUM
#2125_0_3|110|2|1620_5ZVUM

stop_times[stop_times['trip_id'] == '2125_0_3|110|2|0930_5ZVUM'].sort_values('arrival_time', ascending=True)

Unnamed: 0,arrival_time,departure_time,drop_off_type,pickup_type,shape_dist_traveled,stop_id,stop_sequence,timepoint,trip_id
10095792,09:30:00,09:30:00,0,0,0.0,82009,0,0,2125_0_3|110|2|0930_5ZVUM
10095793,09:31:08,09:31:08,0,0,447.0,80657,1,0,2125_0_3|110|2|0930_5ZVUM
10095794,09:32:37,09:32:37,0,0,1032.0,80655,2,0,2125_0_3|110|2|0930_5ZVUM
10095795,09:34:00,09:34:00,0,0,1573.0,80653,3,0,2125_0_3|110|2|0930_5ZVUM
10095796,09:34:49,09:34:49,0,0,1839.0,80773,4,0,2125_0_3|110|2|0930_5ZVUM
10095797,09:36:07,09:36:07,0,0,2255.0,82304,5,0,2125_0_3|110|2|0930_5ZVUM
10095798,09:36:39,09:36:39,0,0,2428.0,80776,6,0,2125_0_3|110|2|0930_5ZVUM
10095799,09:38:00,09:38:00,0,0,2860.0,80777,7,0,2125_0_3|110|2|0930_5ZVUM
10095800,09:39:55,09:39:55,0,0,3300.0,80775,8,0,2125_0_3|110|2|0930_5ZVUM
10095801,09:41:00,09:41:00,0,0,3548.0,80779,9,0,2125_0_3|110|2|0930_5ZVUM


In [None]:
#This trip_id belongs to the service_id '1102_5ZVUM' of route '2125_1'
#'2125_1_3|120|2|1310_5ZVUM' # 13:10

stop_times[stop_times['trip_id'] == '2125_1_3|120|2|1310_5ZVUM'].sort_values('arrival_time', ascending=True)

Unnamed: 0,arrival_time,departure_time,drop_off_type,pickup_type,shape_dist_traveled,stop_id,stop_sequence,timepoint,trip_id
10097274,13:10:00,13:10:00,0,0,0.0,82009,0,0,2125_1_3|120|2|1310_5ZVUM
10097275,13:11:50,13:11:50,0,0,997.0,80782,1,0,2125_1_3|120|2|1310_5ZVUM
10097276,13:13:14,13:13:14,0,0,1746.0,80784,2,0,2125_1_3|120|2|1310_5ZVUM
10097277,13:14:00,13:14:00,0,0,2158.0,80786,3,0,2125_1_3|120|2|1310_5ZVUM
10097278,13:14:40,13:14:40,0,0,2432.0,80788,4,0,2125_1_3|120|2|1310_5ZVUM
10097279,13:15:26,13:15:26,0,0,2743.0,80789,5,0,2125_1_3|120|2|1310_5ZVUM
10097280,13:16:00,13:16:00,0,0,2971.0,80791,6,0,2125_1_3|120|2|1310_5ZVUM
10097281,13:17:03,13:17:03,0,0,3397.0,80793,7,0,2125_1_3|120|2|1310_5ZVUM
10097282,13:18:00,13:18:00,0,0,3775.0,80016,8,0,2125_1_3|120|2|1310_5ZVUM
10097283,13:19:25,13:19:25,0,0,4538.0,80759,9,0,2125_1_3|120|2|1310_5ZVUM


##Alerts - endpoint

https://gtfs.org/documentation/realtime/feed-entities/service-alerts/

Updates whenever there is disruption on the network. Delays and cancellations of individual trips

In [14]:
# Send the GET request to the API
response_alerts = requests.get('https://api.carrismetropolitana.pt/alerts')

alerts = response_alerts.json()

# Convert the JSON data into a DataFrame
df_alerts = pd.json_normalize(alerts)  # This will flatten nested structures if any

In [15]:
df_alerts.head()

Unnamed: 0,entity,header.gtfsRealtimeVersion,header.incrementality,header.timestamp
0,"[{'id': 'CarrisMetropolitanaGTFSAlerts-26678',...",2.0,FULL_DATASET,1736028410


In [22]:
# Flatten the 'entity' column
#pd.json_normalize(df_alerts['entity'])

#Expand into a Separate DataFrame
expanded_entities = df_alerts['entity'].apply(pd.Series)

# Flatten the dictionaries in each column of 'expanded_entities'
flattened_entities = pd.concat(
    [pd.json_normalize(expanded_entities[col].dropna()) for col in expanded_entities.columns],
    ignore_index=True
)

flattened_entities.head()

Unnamed: 0,id,alert.activePeriod,alert.informedEntity,alert.cause,alert.effect,alert.url.translation,alert.headerText.translation,alert.descriptionText.translation,alert.image.localizedImage
0,CarrisMetropolitanaGTFSAlerts-26678,"[{'start': 1736172000, 'end': 1736182800}]","[{'routeId': '4542_0'}, {'routeId': '4543_0'}]",CONSTRUCTION,MODIFIED_SERVICE,"[{'language': 'pt', 'text': 'https://backoffic...","[{'language': 'pt', 'text': 'Setúbal | 4542 e ...","[{'language': 'pt', 'text': 'No dia 6 de janei...","[{'language': 'pt', 'mediaType': '', 'url': ''}]"
1,CarrisMetropolitanaGTFSAlerts-25721,"[{'start': 1736121600, 'end': 1737331200}]","[{'routeId': '4310_0'}, {'stopId': '130260'}, ...",UNKNOWN_CAUSE,MODIFIED_SERVICE,"[{'language': 'pt', 'text': 'https://backoffic...","[{'language': 'pt', 'text': 'Palmela | 4310: A...","[{'language': 'pt', 'text': 'A partir do dia 6...","[{'language': 'pt', 'mediaType': '', 'url': ''}]"
2,CarrisMetropolitanaGTFSAlerts-26167,"[{'start': 1736121600, 'end': 1737331200}]","[{'routeId': '4525_0'}, {'stopId': '130282'}]",UNKNOWN_CAUSE,ADDITIONAL_SERVICE,"[{'language': 'pt', 'text': 'https://backoffic...","[{'language': 'pt', 'text': 'Palmela | 4525: N...","[{'language': 'pt', 'text': 'A partir do dia 6...","[{'language': 'pt', 'mediaType': '', 'url': ''}]"
3,CarrisMetropolitanaGTFSAlerts-25722,"[{'start': 1736121600, 'end': 1737331200}]","[{'routeId': '4311_0'}, {'stopId': '130269'}]",UNKNOWN_CAUSE,MODIFIED_SERVICE,"[{'language': 'pt', 'text': 'https://backoffic...","[{'language': 'pt', 'text': 'Palmela | 4311: P...","[{'language': 'pt', 'text': 'A partir do dia 6...","[{'language': 'pt', 'mediaType': 'image/png', ..."
4,CarrisMetropolitanaGTFSAlerts-26299,"[{'start': 1736121600, 'end': 1736201210}]","[{'routeId': '1717_0'}, {'stopId': '120520'}]",UNKNOWN_CAUSE,ADDITIONAL_SERVICE,"[{'language': 'pt', 'text': 'https://backoffic...","[{'language': 'pt', 'text': 'Amadora, Lisboa, ...","[{'language': 'pt', 'text': 'A partir do dia ...","[{'language': 'pt', 'mediaType': 'image/png', ..."
