# Daily Staffing Analysis
This notebook extracts raw data from multiple per-person Excel files and generates a combined raw data table and a daily staffing summary workbook with formatted blocks.

In [57]:
# ! pip install pandas xlrd openpyxl -q

In [58]:
import glob
import os
import pandas as pd
import xlrd
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Font
from datetime import datetime

In [59]:
# Configuration
DATA_FOLDER = "./data/raw" 
COMBINED_CSV = "./results/combined_raw_data.csv"

In [60]:
# Extract raw data
records = []
for filepath in glob.glob(os.path.join(DATA_FOLDER, "*.xls*")):
    wb = xlrd.open_workbook(filepath)
    sh = wb.sheet_by_index(0)
    individual = (
        sh.cell_value(2, 3).split(",")[0].strip()
    )  # Individual name is in D3 cell, split by comma and take first

    print(f"Processing file: {filepath} | Individual: {individual}")
    # The first 39 rows are metadata and headers, so we start from row 40

    date_cell_idx = 0
    provider_cell_idx = 6
    duration_cell_idx = 3

    for rx in range(40, sh.nrows):
        date_cell = sh.cell_value(rx, date_cell_idx)
        if date_cell in (None, ""):
            continue
        if isinstance(date_cell, str):
            try:
                dt = pd.to_datetime(date_cell).date()
            except ValueError:
                print(f"Invalid date format in row {rx}: {date_cell}")
                continue

        provider = sh.cell_value(rx, provider_cell_idx).split(",")[0].strip()
        duration = sh.cell_value(rx, duration_cell_idx)

        records.append(
            {
                "Date": dt,
                "Individual": individual,
                "Service Provider": provider,
                "Duration": duration,
            }
        )

df_raw = pd.DataFrame(records)
df_raw.to_csv(COMBINED_CSV, index=False)
print(f"Combined raw data saved to {COMBINED_CSV}")

# df_raw.to_excel(COMBINED_XLSX, index=False)
# print(f"Combined raw data saved to {COMBINED_XLSX}")

Processing file: ./data/raw/GP.xls | Individual: George Poulston
Invalid date format in row 100: Comments/Recommendations
Processing file: ./data/raw/HR.xls | Individual: Harold Russell
Invalid date format in row 129: Comments/Recommendations
Combined raw data saved to ./results/combined_raw_data.csv


In [61]:
# --- Configuration ---
OUTPUT_XLSX   = "./results/daily_summary_output.xlsx"
SUMMARY_SHEET = "DailyMatrix"

# --- Load and prepare raw data ---
df = pd.read_csv(COMBINED_CSV, parse_dates=["Date"])
df["Date"] = df["Date"].dt.date  # ensure pure date for grouping

# --- Start a new workbook for summary ---
wb = Workbook()
ws = wb.active
ws.title = SUMMARY_SHEET
bold = Font(bold=True)
row = 1

individuals = sorted(df["Individual"].unique())

In [None]:
# --- Convert Duration to numeric (hours as float) ---
# Ensure Duration is in hh:mm:ss format by appending ":00" if necessary
df["Duration"] = df["Duration"].apply(lambda x: f"{x}:00" if len(x.split(":")) == 2 else x)
df["Duration"] = pd.to_timedelta(df["Duration"]).dt.total_seconds() / 3600

# --- Generate per-date blocks ---
for current_date in sorted(df["Date"].unique()):
    day_df = df[df["Date"] == current_date]
    
    # Date header
    ws.cell(row=row, column=1, value=current_date.strftime("%m/%d/%Y")).font = bold
    row += 1
    
    # Column headers
    ws.cell(row=row, column=1, value="Service Provider").font = bold
    for idx, indiv in enumerate(individuals, start=2):
        ws.cell(row=row, column=idx, value=indiv).font = bold
    ws.cell(row=row, column=2 + len(individuals), value="Provider Total").font = bold
    row += 1
    
    # One row per service provider
    for provider in day_df["Service Provider"].unique():
        ws.cell(row=row, column=1, value=provider)
        total_h = 0
        for idx, indiv in enumerate(individuals, start=2):
            hours = day_df[
                (day_df["Service Provider"] == provider) &
                (day_df["Individual"] == indiv)
            ]["Duration"].sum()
            ws.cell(row=row, column=idx, value=hours)
            total_h += hours
        ws.cell(row=row, column=2 + len(individuals), value=total_h)
        row += 1
    
    # Totals per individual
    ws.cell(row=row, column=1, value="Total hours for individual").font = bold
    for idx, indiv in enumerate(individuals, start=2):
        tot = day_df[day_df["Individual"] == indiv]["Duration"].sum()
        ws.cell(row=row, column=idx, value=tot)
    row += 1
    
    # Remaining hours to reach 24h cap
    ws.cell(row=row, column=1, value="Total hrs pending in a 24hr period").font = bold
    for idx, indiv in enumerate(individuals, start=2):
        tot = day_df[day_df["Individual"] == indiv]["Duration"].sum()
        ws.cell(row=row, column=idx, value=max(24 - tot, 0))
    row += 2  # blank line


wb.save(OUTPUT_XLSX)
print(f"Daily summary written to {OUTPUT_XLSX}")


Daily summary written to ./results/daily_summary_output.xlsx
