In [None]:
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Load the dataset
df = pd.read_csv('../data/jetbluenew_df.csv')

# Ensure the 'flightDate' column is in datetime format
df['flightDate'] = pd.to_datetime(df['flightDate'])

# Step 2: Define fixed holidays and NYC-specific events
fixed_holidays = [
    '2022-04-15',  # Good Friday
    '2022-04-17',  # Easter Sunday
    '2022-04-30',  # Solar Eclipse 
    '2022-05-02',  # Eid al-Fitr
    '2022-05-30',  # Memorial Day
    '2022-06-19',  # Juneteenth
    '2022-06-20',  # Observed Juneteenth
    '2022-07-04',  # Independence Day
    '2022-06-26',  # NYC Pride March
]
fixed_holidays = pd.to_datetime(fixed_holidays)

# Step 3: Add dynamic holidays (e.g., Mother's Day, Father's Day)
def calculate_dynamic_holidays(year):
    """
    Calculate dynamic holidays like Mother's Day and Father's Day.
    """
    holidays = []
    
    # Mother's Day: Second Sunday in May
    may = pd.date_range(start=f'{year}-05-01', end=f'{year}-05-31', freq='D')
    mothers_day = may[may.dayofweek == 6][1]  # Second Sunday
    holidays.append(mothers_day)
    
    # Father's Day: Third Sunday in June
    june = pd.date_range(start=f'{year}-06-01', end=f'{year}-06-30', freq='D')
    fathers_day = june[june.dayofweek == 6][2]  # Third Sunday
    holidays.append(fathers_day)
    
    # Add more dynamic holidays if necessary
    
    return holidays

# Calculate dynamic holidays for 2022
dynamic_holidays = calculate_dynamic_holidays(2022)

# Combine fixed and dynamic holidays
all_holidays = list(fixed_holidays) + dynamic_holidays

# Step 4: Add 'isHoliday' column
df['isHoliday'] = df['flightDate'].isin(all_holidays).astype(int)

# Step 5: Add 'isNearHoliday' column
def is_near_holiday(date, holidays, window=3):
    """
    Check if the date is within the window (before/after) of any holiday.
    """
    return any((date >= holiday - pd.Timedelta(days=window)) & (date <= holiday + pd.Timedelta(days=window)) for holiday in holidays)

df['isNearHoliday'] = df['flightDate'].apply(lambda x: 1 if is_near_holiday(x, all_holidays, window=3) else 0)

# Step 6: Add 'holidayProximity' column
def time_relative_to_holiday(date, holidays, window=3):
    """
    Determine if the date is before, after, or on a holiday.
    """
    for holiday in holidays:
        if date == holiday:
            return 'holiday'
        elif holiday - pd.Timedelta(days=window) <= date < holiday:
            return 'before'
        elif holiday < date <= holiday + pd.Timedelta(days=window):
            return 'after'
    return 'none'

df['holidayProximity'] = df['flightDate'].apply(lambda x: time_relative_to_holiday(x, all_holidays, window=3))

# Step 7: Save the updated DataFrame
df.to_csv('../data/jetbluenew_master_with_holidays.csv', index=False)

# Display the updated DataFrame
print(df[['flightDate', 'isHoliday', 'isNearHoliday', 'holidayProximity']].head(15))


   flightDate  isHoliday  isNearHoliday holidayProximity
0  2022-04-17          1              1            after
1  2022-04-17          1              1            after
2  2022-04-17          1              1            after
3  2022-04-17          1              1            after
4  2022-04-17          1              1            after
5  2022-04-17          1              1            after
6  2022-04-17          1              1            after
7  2022-04-17          1              1            after
8  2022-04-17          1              1            after
9  2022-04-17          1              1            after
10 2022-04-17          1              1            after
11 2022-04-17          1              1            after
12 2022-04-17          1              1            after
13 2022-04-17          1              1            after
14 2022-04-17          1              1            after
