# Initial EDA

In [1]:
# Importing packages
import pandas as pd
import datetime

## Creating new columns
### Adding day-of-the-week column

In [2]:
# Loading data
df = pd.read_csv('initial_data.csv')

# Converting Date column to datetime object
df['Date'] = pd.to_datetime(df['Date'])

# Creating day of the week column
df['day_of_week'] = df['Date'].dt.day_name()

### Adding weekend Boolean column

In [3]:
# Define the function to find weekend values
def weekend(dow):
    if dow == 'Saturday' or dow == 'Sunday':
        return True
    else:
        return False
    
# Apply the function to the Age column using the apply() function
df['weekend'] = df['day_of_week'].apply(weekend)

### Adding season column

In [4]:
# Defining function to identify season
def get_season(date):
    month = date.month
    day = date.day
    if month in [3, 4, 5]:
        season = 'spring'
    elif month in [6, 7, 8]:
        season = 'summer'
    elif month in [9, 10, 11]:
        season = 'fall'
    else:
        season = 'winter'
    

    if month == 3 and day < 20:
        season = 'winter'
    elif month == 6 and day < 21:
        season = 'spring'
    elif month == 9 and day < 22:
        season = 'summer'
    elif month == 12 and day < 21:
        season = 'fall'
    
    return season

# Applying function and creating season column
df['season'] = df['Date'].apply(get_season)

### Adding holiday season column

In [5]:
# Define function to find some major holiday values 
def is_holiday_season(date):
    holidays = [
        datetime.date(date.year, 1, 1),   # New Year's Day
        datetime.date(date.year, 7, 4),   # Independence Day
        datetime.date(date.year, 12, 25), # Christmas Day
        datetime.date(date.year, 11, 4) + datetime.timedelta(weeks=3, days=3),  # Thanksgiving Day: 4th Thursday of November
        datetime.date(date.year, 5, 31) - datetime.timedelta(days=(datetime.date(date.year, 5, 31).weekday() + 1) % 7),  # Memorial Day: last Monday of May
        datetime.date(date.year, 9, 1) + datetime.timedelta(days=7-datetime.date(date.year, 9, 1).weekday()),  # Labor Day: first Monday of September
        datetime.date(date.year, 11, 11),  # Veteran's Day
        datetime.date(date.year, 1, 15) + datetime.timedelta(days=1-datetime.date(date.year, 1, 15).weekday())  # MLK Day: third Monday of January
    ]
    
    # Adding a three day buffer area around each holiday to create a "holiday season"
    for holiday in holidays:
        if (date.date() - datetime.timedelta(days=3)) <= holiday <= (date.date() + datetime.timedelta(days=3)):
            return True
    
    return False

# Create holiday season column
df['holiday_season'] = df['Date'].apply(is_holiday_season)

### Vaccine Announcement Column

In [6]:
# Define a list of unique announcement dates
announcement_dates = [
    "04/28/2023", "04/27/2023", "04/18/2023", "03/14/2023", "03/10/2023",
    "12/16/2022", "12/08/2022", "10/21/2022", "10/12/2022", "09/13/2022",
    "08/31/2022", "08/26/2022", "08/19/2022", "07/13/2022", "07/08/2022",
    "06/30/2022", "06/17/2022", "05/17/2022", "05/05/2022", "04/29/2022",
    "04/15/2022", "04/01/2022", "03/29/2022", "03/21/2022", "03/11/2022",
    "02/11/2022", "02/01/2022", "01/31/2022", "01/11/2022", "01/07/2022",
    "01/03/2022", "12/17/2021", "12/14/2021", "12/09/2021", "11/30/2021",
    "11/19/2021", "10/29/2021", "10/26/2021", "10/22/2021", "10/20/2021",
    "10/14/2021", "10/01/2021", "09/24/2021", "09/22/2021", "09/20/2021",
    "09/17/2021", "09/10/2021", "09/02/2021", "09/01/2021", "08/24/2021",
    "08/23/2021", "08/18/2021", "08/13/2021", "08/12/2021", "08/06/2021",
    "08/03/2021", "07/30/2021", "07/16/2021", "07/13/2021", "07/08/2021",
    "07/06/2021", "07/02/2021", "06/29/2021", "06/25/2021", "06/15/2021",
    "06/11/2021", "06/10/2021", "05/25/2021", "05/19/2021", "05/11/2021",
    "05/10/2021", "04/27/2021", "04/23/2021", "04/21/2021", "04/16/2021",
    "04/13/2021", "04/12/2021", "04/01/2021", "03/26/2021", "03/24/2021",
    "03/17/2021", "03/05/2021", "03/02/2021", "02/27/2021", "02/26/2021",
    "02/25/2021", "02/23/2021", "02/22/2021", "02/12/2021", "02/05/2021",
    "02/04/2021", "01/29/2021", "01/14/2021", "01/04/2021", "12/28/2020",
    "12/22/2020", "12/21/2020", "12/18/2020", "12/17/2020", "12/14/2020", "12/12/2020"
]

# Convert to datetime and deduplicate
announcement_dates = list(set([pd.to_datetime(date) for date in announcement_dates]))


# Check if the date in the dataset is within a week of any announcement dates
def near_announcement(date):
    for announcement in announcement_dates:
        if announcement <= date <= (announcement + pd.Timedelta(days=7)):
            return True
    return False

# Create the 'vax_announcement' column
df['vax_announcement'] = df['Date'].apply(near_announcement)
