In [69]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

## All 2023-2024 Dates

In [70]:
### importing AISD holidays including federal holidays throughout the summer for work schedule
aisdHolidays_23_24 = pd.read_csv('aisdHolidayDates_23_24.csv').drop(columns='Holiday')
aisdHolidays_23_24

Unnamed: 0,Date,StudentHoliday,FederalHoliday
0,2023-01-02,True,False
1,2023-01-03,True,False
2,2023-01-04,True,False
3,2023-01-05,True,False
4,2023-01-06,True,False
...,...,...,...
65,2024-12-25,True,True
66,2024-12-26,True,False
67,2024-12-27,True,False
68,2024-12-30,True,False


In [71]:
### define the beginning and ending dates
first_date = datetime(year=2023, month=1, day=1)
last_date = datetime(year=2024, month=12, day=31)
date_range = (last_date - first_date)

In [72]:
### create list of all dates in the date range using a generator expression
date_list = [ last_date - timedelta(days=x) for x in range(date_range.days+1) ]

In [73]:
### create dictionary that indicates whether the date is a weekday(True) or weekend(False), 0 = Monday, 6 = Sunday
weekday = {i: (True if i.weekday() < 5 else False) for i in date_list}

In [74]:
### use weekday dictionary to create dataframe and transpose 
weekday_df = pd.DataFrame.from_dict([weekday]).transpose().reset_index()
weekday_df.columns = ['Date', 'is_weekday']

In [75]:
### make sure that the Date columns for the weekday and holidays dataframes are of the type datetime64[ns]
weekday_df['Date'] = weekday_df['Date'].astype('datetime64[ns]')
aisdHolidays_23_24['Date'] = aisdHolidays_23_24['Date'].astype('datetime64[ns]')

In [76]:
### combine the weekday and holiday dataframes by matching the dates
### dates not in the holiday dataframe will have NaN, so replace with False 
schoolWork_schedule23_24 = pd.merge(weekday_df, aisdHolidays_23_24, on='Date', how='left')
schoolWork_schedule23_24.fillna(False, inplace=True)
schoolWork_schedule23_24

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday
0,2024-12-31,True,True,False
1,2024-12-30,True,True,False
2,2024-12-29,False,False,False
3,2024-12-28,False,False,False
4,2024-12-27,True,True,False
...,...,...,...,...
726,2023-01-05,True,True,False
727,2023-01-04,True,True,False
728,2023-01-03,True,True,False
729,2023-01-02,True,True,False


In [77]:
### define the first and last class days for 2023 and 2024
lastClass_day23 = datetime(year=2023, month=5, day=25)
lastClass_day24 = datetime(year=2024, month=5, day=23)
firstClass_day23 = datetime(year=2023, month=8, day=14)
firstClass_day24 = datetime(year=2024, month=8, day=20)

In [78]:
### create summer day column based on first and last class days
### entry will be True if the date falls between the first and last class day for a particular year, False otherwise
schoolWork_schedule23_24['SummerDay'] = [ 
    True if (i > lastClass_day23 and i < firstClass_day23) or (i > lastClass_day24 and i < firstClass_day24) else False for i in schoolWork_schedule23_24['Date'] ]

In [79]:
### checking summer day logic is correct by sorting the dataframe and checking the earliest dates
schoolWork_schedule23_24.sort_values('Date').head()

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday,SummerDay
730,2023-01-01,False,False,False,False
729,2023-01-02,True,True,False,False
728,2023-01-03,True,True,False,False
727,2023-01-04,True,True,False,False
726,2023-01-05,True,True,False,False


In [80]:
### create school day column based on weekday, student holiday and summer day columns
### is_school_day entry is 0 if the date is a weekend, student holiday or summer day, 1 otherwise
schoolWork_schedule23_24['is_school_day'] = [ 
    0 if (w == False) or (h == True) or (s == True) 
    else 1 for w,h,s in zip(schoolWork_schedule23_24['is_weekday'], schoolWork_schedule23_24['StudentHoliday'], schoolWork_schedule23_24['SummerDay']) ]

In [81]:
### create work day column based on weekday and federal holiday columns
### is_work_day entry is 0 if the date is a weekend or federal holiday
schoolWork_schedule23_24['is_work_day'] = [
    0 if (w == False) or (f == True) else 1 for w, f in zip(schoolWork_schedule23_24['is_weekday'], schoolWork_schedule23_24['FederalHoliday'])
]

In [82]:
### checking that the entries for is_school_day and is_work_day columns follow the logic based on the other columns
schoolWork_schedule23_24.sort_values('Date').head(20)

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday,SummerDay,is_school_day,is_work_day
730,2023-01-01,False,False,False,False,0,0
729,2023-01-02,True,True,False,False,0,1
728,2023-01-03,True,True,False,False,0,1
727,2023-01-04,True,True,False,False,0,1
726,2023-01-05,True,True,False,False,0,1
725,2023-01-06,True,True,False,False,0,1
724,2023-01-07,False,False,False,False,0,0
723,2023-01-08,False,False,False,False,0,0
722,2023-01-09,True,True,False,False,0,1
721,2023-01-10,True,False,False,False,1,1


In [83]:
### only keeping the date, is_school_day and is_work_day columns for the calender dataframe
school_work_calender23_24 = schoolWork_schedule23_24.drop(columns=['is_weekday', 'StudentHoliday', 'FederalHoliday', 'SummerDay']).rename(columns={'Date': 'date'})

In [84]:
school_work_calender23_24.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           731 non-null    datetime64[ns]
 1   is_school_day  731 non-null    int64         
 2   is_work_day    731 non-null    int64         
dtypes: datetime64[ns](1), int64(2)
memory usage: 17.3 KB


In [85]:
### sorting the calender in chronological order
### resetting index so that index column follows chronological dates as well
school_work_calender23_24_seq = school_work_calender23_24.sort_values('date').reset_index()

In [86]:
### convert dates to strings of YYYY-MM-DD format that is neccessary for the CLT model input
school_work_calender23_24_seq['date'] = school_work_calender23_24_seq['date'].dt.strftime('%Y-%m-%d')

In [87]:
### removing the unneccassary 'index' column
school_work_calender23_24_seq.drop(columns='index', inplace=True)

In [88]:
school_work_calender23_24_seq.to_csv('school_work_calender_aisd_2023_24.csv')

## 2023-2024 School Year

In [89]:
aisdHolidays_school23_24 = pd.read_csv('aisdHolidayDates_schoolYear_23_24.csv').drop(columns='Holiday')
aisdHolidays_school23_24

Unnamed: 0,Date,StudentHoliday,FederalHoliday
0,2023-09-04,True,True
1,2023-10-09,True,True
2,2023-11-13,True,False
3,2023-11-20,True,False
4,2023-11-21,True,False
5,2023-11-22,True,False
6,2023-11-23,True,True
7,2023-11-24,True,False
8,2023-12-21,True,False
9,2023-12-22,True,False


In [1]:
first_school_date = datetime(year=2023, month=8, day=1)
last_school_date = datetime(year=2024, month=5, day=23)
date_range_school = (last_school_date - first_school_date)

NameError: name 'datetime' is not defined

In [91]:
date_list_school = [ last_school_date - timedelta(days=x) for x in range(date_range_school.days+1) ]

In [92]:
weekday_school = {i: (True if i.weekday() < 5 else False) for i in date_list_school}

In [93]:
weekday_school_df = pd.DataFrame.from_dict([weekday_school]).transpose().reset_index()
weekday_school_df.columns = ['Date', 'is_weekday']

In [94]:
weekday_school_df['Date'] = weekday_school_df['Date'].astype('datetime64[ns]')
aisdHolidays_school23_24['Date'] = aisdHolidays_school23_24['Date'].astype('datetime64[ns]')

In [95]:
schoolWork_schedule23_24_sy = pd.merge(weekday_school_df, aisdHolidays_school23_24, on='Date', how='left')
schoolWork_schedule23_24_sy.fillna(False, inplace=True)
schoolWork_schedule23_24_sy

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday
0,2024-05-23,True,False,False
1,2024-05-22,True,False,False
2,2024-05-21,True,False,False
3,2024-05-20,True,False,False
4,2024-05-19,False,False,False
...,...,...,...,...
279,2023-08-18,True,False,False
280,2023-08-17,True,False,False
281,2023-08-16,True,False,False
282,2023-08-15,True,False,False


In [96]:
schoolWork_schedule23_24_sy['is_school_day'] = [ 
    0 if (w == False) or (h == True) else 1 for w, h in zip(schoolWork_schedule23_24_sy['is_weekday'], schoolWork_schedule23_24_sy['StudentHoliday']) ]

In [97]:
schoolWork_schedule23_24_sy['is_work_day'] = [
    0 if (w == False) or (f == True) else 1 for w, f in zip(schoolWork_schedule23_24_sy['is_weekday'], schoolWork_schedule23_24_sy['FederalHoliday'])
]

In [103]:
schoolWork_schedule23_24_sy.sort_values('Date').tail(20)

Unnamed: 0,Date,is_weekday,StudentHoliday,FederalHoliday,is_school_day,is_work_day
19,2024-05-04,False,False,False,0,0
18,2024-05-05,False,False,False,0,0
17,2024-05-06,True,False,False,1,1
16,2024-05-07,True,False,False,1,1
15,2024-05-08,True,False,False,1,1
14,2024-05-09,True,False,False,1,1
13,2024-05-10,True,False,False,1,1
12,2024-05-11,False,False,False,0,0
11,2024-05-12,False,False,False,0,0
10,2024-05-13,True,False,False,1,1


In [99]:
school_work_calender23_24_sy = schoolWork_schedule23_24_sy.drop(columns=['is_weekday', 'StudentHoliday', 'FederalHoliday']).rename(columns={'Date': 'date'})

In [100]:
school_work_calender23_24_sySeq = school_work_calender23_24_sy.sort_values('date').reset_index()

In [101]:
school_work_calender23_24_sySeq['date'] = school_work_calender23_24_sySeq['date'].dt.strftime('%Y-%m-%d')

In [102]:
school_work_calender23_24_sySeq.drop(columns='index', inplace=True)
school_work_calender23_24_sySeq

Unnamed: 0,date,is_school_day,is_work_day
0,2023-08-14,1,1
1,2023-08-15,1,1
2,2023-08-16,1,1
3,2023-08-17,1,1
4,2023-08-18,1,1
...,...,...,...
279,2024-05-19,0,0
280,2024-05-20,1,1
281,2024-05-21,1,1
282,2024-05-22,1,1
