In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

In [None]:
DATA_PATH = "/content/processed_flight_records.csv"

df = pd.read_csv(DATA_PATH)

# Identify datetime and status columns
date_cols = [c for c in df.columns if 'date' in c.lower() or 'time' in c.lower()]
status_cols = [c for c in df.columns if 'status' in c.lower()]

date_col = date_cols[0]
status_col = status_cols[0] if status_cols else None

df[date_col] = pd.to_datetime(df[date_col])

In [None]:
df['year'] = df[date_col].dt.year
df['month'] = df[date_col].dt.month
df['day'] = df[date_col].dt.day
df['day_of_week'] = df[date_col].dt.day_name()
df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])

# Philippine seasonal proxies
# Dry season: Nov–May, Wet season: Jun–Oct

df['season'] = np.where(df['month'].between(6, 10), 'Wet Season', 'Dry Season')

In [None]:
# Note: Simplified fixed-date holidays for proxy analysis
PH_HOLIDAYS = [
    (1, 1),    # New Year
    (4, 9),    # Araw ng Kagitingan
    (5, 1),    # Labor Day
    (6, 12),   # Independence Day
    (8, 21),   # Ninoy Aquino Day
    (11, 30),  # Bonifacio Day
    (12, 25)   # Christmas
]

df['is_holiday'] = df.apply(
    lambda x: (x['month'], x['day']) in PH_HOLIDAYS,
    axis=1
)


In [None]:
if status_col:
    df['is_delayed'] = df[status_col].astype(str).str.lower().str.contains('delay')
    df['is_cancelled'] = df[status_col].astype(str).str.lower().str.contains('cancel')
else:
    df['is_delayed'] = False
    df['is_cancelled'] = False

In [None]:
weekend_stats = df.groupby('is_weekend').agg(
    flights=('is_delayed', 'count'),
    delay_rate=('is_delayed', 'mean'),
    cancellation_rate=('is_cancelled', 'mean')
)

weekend_stats[['delay_rate', 'cancellation_rate']] *= 100
weekend_stats


In [None]:
holiday_stats = df.groupby('is_holiday').agg(
    flights=('is_delayed', 'count'),
    delay_rate=('is_delayed', 'mean'),
    cancellation_rate=('is_cancelled', 'mean')
)

holiday_stats[['delay_rate', 'cancellation_rate']] *= 100
holiday_stats

In [None]:
seasonal_stats = df.groupby('season').agg(
    flights=('is_delayed', 'count'),
    delay_rate=('is_delayed', 'mean'),
    cancellation_rate=('is_cancelled', 'mean')
)

seasonal_stats[['delay_rate', 'cancellation_rate']] *= 100
seasonal_stats

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

weekend_stats['delay_rate'].plot(kind='bar', ax=axes[0], title='Delay Rate: Weekend vs Weekday')
holiday_stats['delay_rate'].plot(kind='bar', ax=axes[1], title='Delay Rate: Holiday vs Non-Holiday')
seasonal_stats['delay_rate'].plot(kind='bar', ax=axes[2], title='Delay Rate: Wet vs Dry Season')

plt.tight_layout()
plt.show()

In [None]:
heatmap_data = df.pivot_table(
    index='year',
    columns='month',
    values='is_delayed',
    aggfunc='mean'
) * 100

plt.figure(figsize=(14,6))
sns.heatmap(heatmap_data, cmap='Reds')
plt.title('Monthly Delay Rate Heatmap (%)')
plt.xlabel('Month')
plt.ylabel('Year')
plt.show()

In [None]:
weekend_stats.to_csv("/content/weekend_vs_weekday_stats.csv")
holiday_stats.to_csv("/content/holiday_vs_nonholiday_stats.csv")
seasonal_stats.to_csv("/content/seasonal_effects_stats.csv")

print("Seasonal and calendar effects analysis outputs saved.")