In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
import pandas as pd

In [None]:
from src.io import (
    load_incident_data, 
    save_processed_data
)
from src.utils import (
    standardize_column_names, 
    normalize_text_column
)
from src.data_cleaning.incidents import ( 
    parse_incident_date_column,
    split_age_gender_column,
    classify_incident_type
)
from src.enrichment.ride_metadata import (
    fetch_wikipedia_ride_metadata
)
from src.enrichment.temporal_features import (
    enrich_temporal_features
)
from src.enrichment.visitor_profile import (
    enrich_visitor_profile
)
from src.enrichment.aggregate_features import (
    enrich_aggregate_features
)
from src.enrichment.weather_enrichment import (
    enrich_weather,
    fetch_weather_for_location
)


In [7]:
df = load_incident_data("data/raw/incidents.csv")

In [8]:
df = standardize_column_names(df)

In [9]:
df = normalize_text_column(df, col="company")

In [10]:
df = parse_incident_date_column(df)   


In [11]:
df = normalize_text_column(df, col="theme_park")

In [12]:
df = split_age_gender_column(df)


In [13]:
df = classify_incident_type(df)

In [14]:
save_processed_data(df, "data/processed/incidents_clean.parquet")

In [15]:
df_unique_rides = df[["ride_name", "theme_park"]].drop_duplicates().copy()

metadata = df_unique_rides.apply(
    lambda row: pd.Series(fetch_wikipedia_ride_metadata(row["ride_name"], row["theme_park"])),
    axis=1
)

df_rides = pd.concat([df_unique_rides, metadata], axis=1)
df_rides.to_csv("data/external/rides_metadata_wikipedia.csv", index=False)

df = df.merge(df_rides, on=["ride_name", "theme_park"], how="left")



🔍 Buscando metadata para: Alien Swirling Saucers HOLLYWOOD STUDIOS
🌐 Título encontrado: Alien Swirling Saucers
🔗 URL: https://en.wikipedia.org/wiki/Alien_Swirling_Saucers
⏱️ Duration (min): None

🔍 Buscando metadata para: Astro Orbiter MAGIC KINGDOM
🌐 Título encontrado: Astro Orbiter
🔗 URL: https://en.wikipedia.org/wiki/Astro_Orbiter
⏱️ Duration (min): 1.5

🔍 Buscando metadata para: Avatar Flight of Passage ANIMAL KINGDOM
🌐 Título encontrado: Avatar Flight of Passage
🔗 URL: https://en.wikipedia.org/wiki/Avatar_Flight_of_Passage
⏱️ Duration (min): 4.4

🔍 Buscando metadata para: Back to the Future UNIVERSAL STUDIOS
🌐 Título encontrado: Back to the Future: The Ride
🔗 URL: https://en.wikipedia.org/wiki/Back_to_the_Future:_The_Ride
⏱️ Duration (min): 15.0

🔍 Buscando metadata para: Big Thunder Mountain Railroad MAGIC KINGDOM
🌐 Título encontrado: Big Thunder Mountain Railroad
🔗 URL: https://en.wikipedia.org/wiki/Big_Thunder_Mountain_Railroad
⏱️ Duration (min): 3.0

🔍 Buscando metadata para:

In [16]:
df = enrich_temporal_features(df)

In [17]:
df = enrich_visitor_profile(df)

In [18]:
df = enrich_aggregate_features(df)

In [29]:
df = enrich_weather(df)

df

Unnamed: 0,company,incident_date,ride_name_dirty,ride_name,theme_park,age_gender,description,incident_date_parsed,age,gender,...,first_time_visitor,ride_incident_count,park_incident_count,ride_type_simplified,temperature_max_x,temperature_min_x,precipitation_sum_x,temperature_max_y,temperature_min_y,precipitation_sum_y
0,DISNEY WORLD,06/09/2022,Alien Swirling Saucers,Alien Swirling Saucers,HOLLYWOOD STUDIOS,59 yof,guest with pre-existing condition sustained in...,2022-06-09,59,F,...,True,1,80,other,,,,31.7,22.5,9.1
1,DISNEY WORLD,10/05/2014,Astro Orbiter,Astro Orbiter,MAGIC KINGDOM,68 yom,"guest fell exiting vehicle, developed blood c...",2014-10-05,68,M,...,False,2,177,flat_ride,,,,24.2,16.0,0.0
2,DISNEY WORLD,12/03/2009,Astro Orbitor,Astro Orbiter,MAGIC KINGDOM,40 yof,fractured left ankle exiting ride,2009-12-03,40,F,...,False,2,177,flat_ride,,,,24.6,17.4,4.7
3,DISNEY WORLD,08/07/2022,Avatar Flight of Passage,Avatar Flight of Passage,ANIMAL KINGDOM,83 yom,guest with pre-existing condition was briefly ...,2022-08-07,83,M,...,False,15,97,simulator,,,,33.0,23.5,3.7
4,DISNEY WORLD,4/19/22,Avatar Flight of Passage,Avatar Flight of Passage,ANIMAL KINGDOM,42 yof,guest with pre-existing condition had motion s...,2022-04-19,42,F,...,False,15,97,simulator,,,,25.4,15.9,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,DISNEY WORLD,7/21/19,Under the Sea Journey of the Little Mermaid,Under the Sea - Journey of the Little Mermaid,MAGIC KINGDOM,69 yof,guest with pre-existing medical condition had ...,2019-07-21,69,F,...,False,6,177,unknown,,,,30.6,22.7,5.0
678,DISNEY WORLD,06/02/2017,Under the Sea- Journey of the Little Mermaid,Under the Sea - Journey of the Little Mermaid,MAGIC KINGDOM,34 yom,felt disoriented and fell after riding,2017-06-02,34,M,...,False,6,177,unknown,,,,29.8,22.4,12.7
679,DISNEY WORLD,07/04/2016,Walt Disney World Railroad,Walt Disney World Railroad,MAGIC KINGDOM,41 yom,guest felt ill and began shaking after exiting.,2016-07-04,41,M,...,True,2,177,other,,,,32.9,25.4,5.2
680,DISNEY WORLD,05/05/2015,Walt Disney World Railroad,Walt Disney World Railroad,MAGIC KINGDOM,71 yom,nausea and dizziness,2015-05-05,71,M,...,False,2,177,other,,,,26.9,20.3,0.0


In [31]:
save_processed_data(df, "data/processed/incidents_enriched.parquet")
