In [1]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

In [2]:
### importing AISD holidays during the 2023-24 school year
aisdHolidays_school23_24 = pd.read_csv('aisdHolidayDates_schoolYear_23_24.csv').drop(columns='Holiday')
aisdHolidays_school23_24

Unnamed: 0,Date,StudentHoliday,FederalHoliday
0,2023-09-04,True,True
1,2023-10-09,True,True
2,2023-11-13,True,False
3,2023-11-20,True,False
4,2023-11-21,True,False
5,2023-11-22,True,False
6,2023-11-23,True,True
7,2023-11-24,True,False
8,2023-12-21,True,False
9,2023-12-22,True,False


In [None]:
### define the beginning and ending dates for the school year and span for modeling
first_date = datetime(year=2023, month=8, day=1)
last_date = datetime(year=2024, month=5, day=31)
first_school_date = datetime(year=2023, month=8, day=14)
last_school_date = datetime(year=2024, month=5, day=23)
date_range = (last_date - first_date)

In [4]:
### create list of all dates in the date range using a generator expression
date_list_school = [ last_date - timedelta(days=x) for x in range(date_range.days+1) ]

In [5]:
### create dictionary that indicates whether the date is a weekday(True) or weekend(False), 0 = Monday, 6 = Sunday
weekday = {i: (True if i.weekday() < 5 else False) for i in date_list_school}

In [6]:
### use weekday dictionary to create dataframe and transpose
weekday_df = pd.DataFrame.from_dict([weekday]).transpose().reset_index()
weekday_df.columns = ['Date', 'is_weekday']

In [7]:
### make sure that the Date columns for the weekday and holidays dataframes are of the type datetime64[ns]
weekday_df['Date'] = weekday_df['Date'].astype('datetime64[ns]')
aisdHolidays_school23_24['Date'] = aisdHolidays_school23_24['Date'].astype('datetime64[ns]')

In [8]:
### combine the weekday and holiday dataframes by matching the dates
### dates not in the holiday dataframe will have NaN, so replace with False 
schoolWork_schedule23_24 = pd.merge(weekday_df, aisdHolidays_school23_24, on='Date', how='left')
schoolWork_schedule23_24.fillna(False, inplace=True)
schoolWork_schedule23_24

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday
0,2024-05-31,True,False,False
1,2024-05-30,True,False,False
2,2024-05-29,True,False,False
3,2024-05-28,True,False,False
4,2024-05-27,True,False,True
...,...,...,...,...
300,2023-08-05,False,False,False
301,2023-08-04,True,False,False
302,2023-08-03,True,False,False
303,2023-08-02,True,False,False


In [21]:
### create school day column based on weekday, student holiday and dates columns
### is_school_day entry is 0 if the date is a weekend, student holiday or a day that falls outside of the 23-24 school year beginning and ending dates, 1 otherwise
schoolWork_schedule23_24['is_school_day'] = [ 
    0 if (w == False) or (h == True) or (d < first_school_date) or (d >  last_school_date)  else 1 
    for w, h, d in zip(schoolWork_schedule23_24['is_weekday'], schoolWork_schedule23_24['StudentHoliday'], schoolWork_schedule23_24['Date']) ]

In [10]:
### create work day column based on weekday and federal holiday columns
### is_work_day entry is 0 if the date is a weekend or federal holiday
schoolWork_schedule23_24['is_work_day'] = [
    0 if (w == False) or (f == True) else 1 for w, f in zip(schoolWork_schedule23_24['is_weekday'], schoolWork_schedule23_24['FederalHoliday'])
]

In [22]:
### checking that the entries for is_school_day and is_work_day columns follow the logic based on the other columns
schoolWork_schedule23_24.sort_values('Date').head(20)

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday,is_school_day,is_work_day
304,2023-08-01,True,False,False,0,1
303,2023-08-02,True,False,False,0,1
302,2023-08-03,True,False,False,0,1
301,2023-08-04,True,False,False,0,1
300,2023-08-05,False,False,False,0,0
299,2023-08-06,False,False,False,0,0
298,2023-08-07,True,False,False,0,1
297,2023-08-08,True,False,False,0,1
296,2023-08-09,True,False,False,0,1
295,2023-08-10,True,False,False,0,1


In [23]:
### only keeping the date, is_school_day and is_work_day columns for the calender dataframe
school_work_calender23_24 = schoolWork_schedule23_24.drop(columns=['is_weekday', 'StudentHoliday', 'FederalHoliday']).rename(columns={'Date': 'date'})

In [24]:
### sorting the calender in chronological order
### resetting index so that index column follows chronological dates as well
school_work_calender23_24_Seq = school_work_calender23_24.sort_values('date').reset_index()

In [25]:
### convert dates to strings of YYYY-MM-DD format that is neccessary for the CLT model input
school_work_calender23_24_Seq['date'] = school_work_calender23_24_Seq['date'].dt.strftime('%Y-%m-%d')

In [26]:
### removing the unneccassary 'index' column
school_work_calender23_24_Seq.drop(columns='index', inplace=True)
school_work_calender23_24_Seq

Unnamed: 0,date,is_school_day,is_work_day
0,2023-08-01,0,1
1,2023-08-02,0,1
2,2023-08-03,0,1
3,2023-08-04,0,1
4,2023-08-05,0,0
...,...,...,...
300,2024-05-27,0,0
301,2024-05-28,0,1
302,2024-05-29,0,1
303,2024-05-30,0,1


In [27]:
school_work_calender23_24_Seq.to_csv('school_work_calender_aisd_2023_24_schoolYear.csv')