In [1]:
# final_df_creation_python_file.py

import pandas as pd
import numpy as np
from datetime import datetime

# Input
staff_df = pd.read_csv("staffing_train.csv")
staff_df['date'] = pd.to_datetime(staff_df['date'])

# Generate all dates from 2021-01-01 to 2025-12-31
all_dates = pd.date_range(start="2021-01-01", end="2025-12-31")

# Filter weekends (Saturday=5, Sunday=6)
weekend_dates = all_dates[all_dates.weekday.isin([5, 6])]
weekend_df = pd.DataFrame({'date': weekend_dates})
weekend_df['weekday'] = weekend_df['date'].dt.day_name()

# Sri Lanka holidays for 2021-2025 (from notebook web_holiday_data)
web_holiday_data = [
    # 2021 Holidays
    ('2021-01-14', 'Tamil Thai Pongal Day'),
    ('2021-01-28', 'Duruthu Full Moon Poya'),
    ('2021-02-04', 'National Day'),
    ('2021-02-26', 'Navam Full Moon Poya'),
    ('2021-03-11', 'Mahasivarathri Day'),
    ('2021-03-28', 'Madin Full Moon Poya'),
    ('2021-04-02', 'Good Friday'),
    ('2021-04-12', 'Sinhala and Tamil New Year Holiday'),
    ('2021-04-13', 'Sinhala and Tamil New Year Eve'),
    ('2021-04-14', 'Sinhala and Tamil New Year'),
    ('2021-04-26', 'Bak Full Moon Poya'),
    ('2021-05-01', 'Labour Day'),
    ('2021-05-14', 'Id-Ul-Fitr (Ramazan Festival Day)'),
    ('2021-05-24', 'Special Public Holiday'),
    ('2021-05-25', 'Special Public Holiday'),
    ('2021-05-26', 'Vesak Full Moon Poya'),
    ('2021-05-27', 'Vesak Full Moon Poya Holiday'),
    ('2021-06-24', 'Poson Full Moon Poya'),
    ('2021-07-21', 'Idul Adha'),
    ('2021-07-23', 'Esala Full Moon Poya'),
    ('2021-08-22', 'Nikini Full Moon Poya'),
    ('2021-09-20', 'Binara Full Moon Poya'),
    ('2021-10-19', 'Milad-Un-Nabi'),
    ('2021-10-20', 'Vap Full Moon Poya'),
    ('2021-11-04', 'Deepavali Festival Day'),
    ('2021-11-18', 'Ill Full Moon Poya'),
    ('2021-12-18', 'Unduvap Full Moon Poya'),
    ('2021-12-25', 'Christmas Day'),
    # 2022 Holidays
    ('2022-01-14', 'Tamil Thai Pongal Day'),
    ('2022-01-17', 'Duruthu Full Moon Poya'),
    ('2022-02-04', 'National Day'),
    ('2022-02-16', 'Navam Full Moon Poya'),
    ('2022-03-01', 'Mahasivarathri Day'),
    ('2022-03-17', 'Madin Full Moon Poya'),
    ('2022-04-11', 'Special Public Holiday'),
    ('2022-04-12', 'Special Public Holiday'),
    ('2022-04-13', 'Sinhala and Tamil New Year Eve'),
    ('2022-04-14', 'Sinhala and Tamil New Year'),
    ('2022-04-15', 'Good Friday'),
    ('2022-04-16', 'Bak Full Moon Poya'),
    ('2022-05-01', 'Labour Day'),
    ('2022-05-02', 'Labour Day (in lieu)'),
    ('2022-05-03', 'Id-Ul-Fitr'),
    ('2022-05-15', 'Vesak Full Moon Poya'),
    ('2022-05-16', 'Vesak Full Moon Poya Holiday'),
    ('2022-06-13', 'Special Public Holiday'),
    ('2022-06-14', 'Poson Full Moon Poya'),
    ('2022-06-17', 'Sri Lanka Friday Holiday'),
    ('2022-06-24', 'Sri Lanka Friday Holiday'),
    ('2022-07-01', 'Sri Lanka Friday Holiday'),
    ('2022-07-08', 'Sri Lanka Friday Holiday'),
    ('2022-07-10', 'Idul Adha'),
    ('2022-07-13', 'Esala Full Moon Poya'),
    ('2022-07-15', 'Sri Lanka Friday Holiday'),
    ('2022-07-22', 'Sri Lanka Friday Holiday'),
    ('2022-07-29', 'Sri Lanka Friday Holiday'),
    ('2022-08-11', 'Nikini Full Moon Poya'),
    ('2022-09-10', 'Binara Full Moon Poya'),
    ('2022-09-19', 'Special Public Holiday'),
    ('2022-10-09', 'Vap Full Moon Poya'),
    ('2022-10-09', 'Milad-Un-Nabi'),
    ('2022-10-10', 'Milad-Un-Nabi (in lieu)'),
    ('2022-10-24', 'Deepavali Festival Day'),
    ('2022-11-07', 'Ill Full Moon Poya'),
    ('2022-12-07', 'Unduvap Full Moon Poya'),
    ('2022-12-25', 'Christmas Day'),
    ('2022-12-26', 'Special Public Holiday (in lieu)'),
    # 2023 Holidays
    ('2023-01-06', 'Duruthu Full Moon Poya'),
    ('2023-01-15', 'Tamil Thai Pongal Day'),
    ('2023-02-04', 'National Day'),
    ('2023-02-05', 'Navam Full Moon Poya'),
    ('2023-02-18', 'Mahasivarathri Day'),
    ('2023-03-06', 'Madin Full Moon Poya'),
    ('2023-04-05', 'Bak Full Moon Poya'),
    ('2023-04-07', 'Good Friday'),
    ('2023-04-13', 'Sinhala and Tamil New Year Eve'),
    ('2023-04-14', 'Sinhala and Tamil New Year'),
    ('2023-04-22', 'Id-Ul-Fitr'),
    ('2023-05-01', 'Labour Day'),
    ('2023-05-05', 'Vesak Full Moon Poya'),
    ('2023-05-06', 'Vesak Full Moon Poya Holiday'),
    ('2023-06-03', 'Poson Full Moon Poya'),
    ('2023-06-29', 'Idul Adha'),
    ('2023-07-03', 'Esala Full Moon Poya'),
    ('2023-08-01', 'Nikini Full Moon Poya'),
    ('2023-08-30', 'Adhi Nikini Full Moon Poya'),
    ('2023-09-28', 'Milad-Un-Nabi'),
    ('2023-09-29', 'Binara Full Moon Poya'),
    ('2023-10-28', 'Vap Full Moon Poya'),
    ('2023-11-12', 'Deepavali Festival Day'),
    ('2023-11-26', 'Ill Full Moon Poya'),
    ('2023-12-25', 'Christmas Day'),
    ('2023-12-26', 'Unduvap Full Moon Poya'),
    # 2024 Holidays
    ('2024-01-15', 'Tamil Thai Pongal Day'),
    ('2024-01-25', 'Duruthu Full Moon Poya'),
    ('2024-02-04', 'National Day'),
    ('2024-02-23', 'Navam Full Moon Poya'),
    ('2024-03-08', 'Mahasivarathri Day'),
    ('2024-03-24', 'Madin Full Moon Poya'),
    ('2024-03-29', 'Good Friday'),
    ('2024-04-11', 'Id-Ul-Fitr'),
    ('2024-04-12', 'Sinhala and Tamil New Year Eve'),
    ('2024-04-13', 'Sinhala and Tamil New Year'),
    ('2024-04-15', 'Sinhala and Tamil New Year (in lieu)'),
    ('2024-04-23', 'Bak Full Moon Poya'),
    ('2024-05-01', 'Labour Day'),
    ('2024-05-23', 'Vesak Full Moon Poya'),
    ('2024-05-24', 'Vesak Full Moon Poya Holiday'),
    ('2024-06-17', 'Idul Adha'),
    ('2024-06-21', 'Poson Full Moon Poya'),
    ('2024-07-20', 'Esala Full Moon Poya'),
    ('2024-08-19', 'Nikini Full Moon Poya'),
    ('2024-09-16', 'Milad-Un-Nabi'),
    ('2024-09-17', 'Binara Full Moon Poya'),
    ('2024-09-23', 'Public Holiday'),
    ('2024-10-17', 'Vap Full Moon Poya'),
    ('2024-10-31', 'Deepavali Festival Day'),
    ('2024-11-15', 'Ill Full Moon Poya'),
    ('2024-12-14', 'Unduvap Full Moon Poya'),
    ('2024-12-25', 'Christmas Day'),
    # 2025 Holidays
    ('2025-01-13', 'Duruthu Full Moon Poya'),
    ('2025-01-14', 'Tamil Thai Pongal Day'),
    ('2025-02-04', 'National Day'),
    ('2025-02-12', 'Navam Full Moon Poya'),
    ('2025-02-26', 'Mahasivarathri Day'),
    ('2025-03-13', 'Madin Full Moon Poya'),
    ('2025-03-31', 'Id-Ul-Fitr'),
    ('2025-04-12', 'Bak Full Moon Poya'),
    ('2025-04-13', 'Sinhala and Tamil New Year Eve'),
    ('2025-04-14', 'Sinhala and Tamil New Year'),
    ('2025-04-15', 'Special Bank Holiday'),
    ('2025-04-18', 'Good Friday'),
    ('2025-05-01', 'Labour Day'),
    ('2025-05-12', 'Vesak Full Moon Poya'),
    ('2025-05-13', 'Vesak Full Moon Poya Holiday'),
    ('2025-06-07', 'Idul Adha'),
    ('2025-06-10', 'Poson Full Moon Poya'),
    ('2025-07-10', 'Esala Full Moon Poya'),
    ('2025-08-08', 'Nikini Full Moon Poya'),
    ('2025-09-05', 'Milad-Un-Nabi'),
    ('2025-09-07', 'Binara Full Moon Poya'),
    ('2025-10-06', 'Vap Full Moon Poya'),
    ('2025-10-20', 'Deepavali Festival Day'),
    ('2025-11-05', 'Ill Full Moon Poya'),
    ('2025-12-04', 'Unduvap Full Moon Poya'),
    ('2025-12-25', 'Christmas Day'),
]
web_holiday_df = pd.DataFrame(web_holiday_data, columns=['date', 'holiday_name'])
web_holiday_df['date'] = pd.to_datetime(web_holiday_df['date'])

# Exclude specific dates (from notebook exclude_dates)
exclude_dates = [
    '2023-09-29', '2022-05-02', '2023-06-29', '2022-06-24', '2022-07-08', '2024-09-23',
    '2022-06-13', '2022-09-19', '2023-08-30', '2022-07-15', '2022-07-01', '2023-03-06',
    '2021-05-25', '2022-07-22', '2022-07-29', '2021-05-24', '2022-06-17'
]
exclude_dates_dt = pd.to_datetime(exclude_dates)
filtered_holiday = web_holiday_df[~web_holiday_df['date'].isin(exclude_dates_dt)].reset_index(drop=True)

# Create final DataFrame by combining staff, weekend, and filtered_holiday
sections = staff_df['section_id'].unique()
final_rows = []
for date in all_dates:
    for section in sections:
        staff_row = staff_df[(staff_df['date'] == date) & (staff_df['section_id'] == section)]
        employees_on_duty = staff_row['employees_on_duty'].values[0] if not staff_row.empty else 0
        total_task_time_minutes = staff_row['total_task_time_minutes'].values[0] if not staff_row.empty else 0
        weekend_row = weekend_df[weekend_df['date'] == date]
        weekday = weekend_row['weekday'].values[0] if not weekend_row.empty else 0
        holiday_row = filtered_holiday[filtered_holiday['date'] == date]
        holiday_name = holiday_row['holiday_name'].values[0] if not holiday_row.empty else 0
        final_rows.append({
            'date': date,
            'section_id': section,
            'employees_on_duty': employees_on_duty,
            'total_task_time_minutes': total_task_time_minutes,
            'weekday': weekday,
            'holiday_name': holiday_name
        })
final_df = pd.DataFrame(final_rows)
final_df['date'] = pd.to_datetime(final_df['date'])

# Convert 'holiday_name' and 'weekday' to binary columns
final_df['is_holiday'] = final_df['holiday_name'].apply(lambda x: 1 if x != 0 else 0)
final_df['is_weekend'] = final_df['weekday'].apply(lambda x: 1 if x != 0 else 0)

# Drop the original 'holiday_name' and 'weekday' columns
final_binary_submission_df = final_df.drop(['holiday_name', 'weekday'], axis=1)

# Output
final_binary_submission_df.to_csv('final_binary_submission_df.csv', index=False)