In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import ipywidgets as widgets
from IPython.display import display


In [5]:
%pip install ipywidgets

Collecting ipywidgets
  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting widgetsnbextension~=4.0.12 (from ipywidgets)
  Using cached widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets)
  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)
Using cached ipywidgets-8.1.5-py3-none-any.whl (139 kB)
Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)
Using cached widgetsnbextension-4.0.13-py3-none-any.whl (2.3 MB)
Installing collected packages: widgetsnbextension, jupyterlab-widgets, ipywidgets
Successfully installed ipywidgets-8.1.5 jupyterlab-widgets-3.0.13 widgetsnbextension-4.0.13
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# Folder containing the data files
data_folder = "data/waiting times"

# List to store data
waiting_times = []

# Read and merge CSV files
for filename in tqdm(os.listdir(data_folder)):
    file_path = os.path.join(data_folder, filename)
    df = pd.read_csv(file_path)
    
    if df.empty:
        continue  # Skip empty files
    
    attraction_name = filename.replace(".csv", "").replace("_", " ")  # Clean attraction name
    df.insert(0, 'attraction', attraction_name)  # Add attraction name as the first column
    waiting_times.append(df)



  0%|          | 0/52 [00:00<?, ?it/s]

100%|██████████| 52/52 [00:07<00:00,  7.37it/s]


In [None]:
# Combine all data into a single DataFrame
df_wait_raw = pd.concat(waiting_times, ignore_index=True)

# Make a copy and apply cleaning steps
df_wait = df_wait_raw.copy()

In [None]:
# Convert date formats
df_wait['date'] = pd.to_datetime(df_wait['date'], errors='coerce')
df_wait['datetime'] = pd.to_datetime(df_wait['datetime'], errors='coerce')

In [None]:
# Remove outliers in actual wait times (-1000 to 360 is considered valid)
df_wait = df_wait[((df_wait["SACTMIN"] >= -1000) & (df_wait["SACTMIN"] < 360)) | (df_wait["SACTMIN"].isnull())]

# Remove outliers in posted wait times (-999 means attraction is closed)
df_wait = df_wait[(df_wait["SPOSTMIN"] >= -998.99) | (df_wait["SPOSTMIN"].isnull())]

print(f"✅ Temizleme sonrası {len(df_wait_raw) - len(df_wait)} satır kaldırıldı.")


✅ Temizleme sonrası 1318703 satır kaldırıldı.


In [None]:
# Create separate DataFrames for actual and posted wait times
df_wait_act = df_wait[~df_wait["SACTMIN"].isnull()].drop("SPOSTMIN", axis=1)
df_wait_post = df_wait[~df_wait["SPOSTMIN"].isnull()].drop("SACTMIN", axis=1)

# Get unique attractions
attractions = df_wait["attraction"].unique()

# Descriptive statistics for each attraction
df_wait_post.groupby("attraction")["SPOSTMIN"].describe()
df_wait_act.groupby("attraction")["SACTMIN"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
attraction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7 dwarfs train,7634.0,36.166623,22.950125,0.0,21.0,31.0,46.0,217.0
alien saucers,3031.0,22.068624,12.586751,0.0,13.0,21.0,30.0,79.0
astro orbiter,1770.0,20.40226,9.501468,0.0,14.0,20.0,26.0,65.0
barnstormer,3087.0,12.528345,9.900399,0.0,5.0,11.0,18.0,70.0
big thunder mtn,11729.0,18.251513,12.913191,0.0,9.0,16.0,25.0,125.0
buzz lightyear,7998.0,14.12078,12.355926,0.0,5.0,11.0,20.0,109.0
carousel of progress,876.0,4.179224,4.455348,0.0,1.0,2.0,6.0,33.0
country bears,362.0,7.842541,7.213412,0.0,2.0,6.0,11.0,40.0
dinosaur,5059.0,20.197075,15.33237,0.0,8.0,17.0,29.0,96.0
dumbo,2518.0,12.894758,11.080719,0.0,5.0,10.0,18.0,75.0


In [None]:
# Interactive Dropdown Menu for Visualizations
attraction_dropdown = widgets.Dropdown(
    options=attractions,
    description="Attraction:",
    value=attractions[0]  # Default selection
)

In [None]:
# Function to Update Plots Based on Selected Attraction
def update_plot(attraction):
    plt.figure(figsize=(12, 4))  #Create a new figure
    plt.subplot(1, 2, 1)  # First subplot: Actual wait times
    df_wait_attraction = df_wait[df_wait["attraction"] == attraction]
    df_wait_attraction["SACTMIN"].plot.hist(bins=30, title="Actual Waiting Times", color="royalblue", alpha=0.7)
    
    plt.subplot(1, 2, 2)  # Second subplot: Posted wait times
    df_wait_attraction["SPOSTMIN"].plot.hist(bins=np.arange(-2.5, df_wait["SPOSTMIN"].max() + 2.5, 10),
                                             title="Posted Waiting Times",
                                             color="orange",
                                             alpha=0.7)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Display Interactive Plots
interactive_plot = widgets.interactive(update_plot, attraction=attraction_dropdown)
display(interactive_plot)

interactive(children=(Dropdown(description='Attraction:', options=('7 dwarfs train', 'alien saucers', 'astro o…