In [None]:
import pandas as pd

df = pd.read_csv('../data/jetbluenew_df.csv')

df['flightDate'] = pd.to_datetime(df['flightDate'])

fixed_holidays = [
    '2022-04-15',  # Good Friday
    '2022-04-17',  # Easter Sunday
    '2022-04-23',  # NYC Spring Break 
    '2022-04-30',  # Solar Eclipse
    '2022-05-02',  # Eid al-Fitr
    '2022-05-30',  # Memorial Day
    '2022-06-19',  # Juneteenth
    '2022-06-20',  # Observed Juneteenth
    '2022-07-04',  # Independence Day
    '2022-06-26',  # NYC Pride March
    '2022-07-22',  # Comic-Con NYC start
]
fixed_holidays = pd.to_datetime(fixed_holidays)

def calculate_dynamic_holidays(year):
    """
    Calculate dynamic holidays like Mother's Day and Father's Day.
    """
    holidays = []
    
    may = pd.date_range(start=f'{year}-05-01', end=f'{year}-05-31', freq='D')
    mothers_day = may[may.dayofweek == 6][1]  # Second Sunday
    holidays.append(mothers_day)
    
    june = pd.date_range(start=f'{year}-06-01', end=f'{year}-06-30', freq='D')
    fathers_day = june[june.dayofweek == 6][2]  # Third Sunday
    holidays.append(fathers_day)
    
    return holidays

dynamic_holidays = calculate_dynamic_holidays(2022)

all_holidays = list(fixed_holidays) + dynamic_holidays

df['isHoliday'] = df['flightDate'].isin(all_holidays).astype(int)

def is_near_holiday(date, holidays, window=3):
    """
    Check if the date is within the window (before/after) of any holiday.
    """
    return any((date >= holiday - pd.Timedelta(days=window)) & (date <= holiday + pd.Timedelta(days=window)) for holiday in holidays)

df['isNearHoliday'] = df['flightDate'].apply(lambda x: 1 if is_near_holiday(x, all_holidays, window=3) else 0)

df.drop(columns=['holidayProximity'], errors='ignore', inplace=True)  # Drop if it exists

df.to_csv('../data/jetbluenew_master_with_holidays.csv', index=False)

print(df[['flightDate', 'isHoliday', 'isNearHoliday']].head(15))



   flightDate  isHoliday  isNearHoliday
0  2022-04-17          1              1
1  2022-04-17          1              1
2  2022-04-17          1              1
3  2022-04-17          1              1
4  2022-04-17          1              1
5  2022-04-17          1              1
6  2022-04-17          1              1
7  2022-04-17          1              1
8  2022-04-17          1              1
9  2022-04-17          1              1
10 2022-04-17          1              1
11 2022-04-17          1              1
12 2022-04-17          1              1
13 2022-04-17          1              1
14 2022-04-17          1              1
