### LOAD AND CLEAN DATA

In [None]:
import pandas as pd
import holidays
import numpy as np
import datetime as dt
from dateutil.relativedelta import relativedelta
from ortools.sat.python import cp_model

### Preprocess

### METRO-ELECTRIC District & Interval 01


In [None]:
DISTRICT = "METRO-ELECTRIC"
INTERVAL = "PAD MOUNT 5Y-4 INTERVAL 01"

def get_planning(file_link, district, interval):
    df = pd.read_csv(file_link)
    df['EARLYSTART'] = pd.to_datetime(df['EARLYSTART'])
    df['DUEDATE'] = pd.to_datetime(df['DUEDATE'])

    df["LATITUDE"] = df["LATITUDE"] / 1e6
    df["LONGITUDE"] = df["LONGITUDE"] / 1e6

    # duration in minutes
    df["duration_min"] = (df["DURATION"] / 60).astype(int)

    jobs = df[
        (df["DISTRICT"] == district) &
        (df["SEMPRAORDERDESCRIPTION"] == interval)
    ]
    jobs.reset_index(drop=True, inplace=True)
    return jobs

jobs = get_planning('Assignment2_Planning.csv', DISTRICT, INTERVAL)

In [None]:
MONTH = int(INTERVAL.split(" ")[-1])
YEAR = 2023
BASE_CREWS = 3
FOURTH_CREW_MAX_SHARE = 0.25     
SHIFT_MINUTES_PER_CREW = 8 * 60  


W_LATE_JOB = 1000           # Minimize number of late jobs (primary)
W_OUTSIDE_MONTH = 100       # Penalize jobs scheduled outside first month of interval
W_EXTRA_CREW_DAY = 10       # Penalize using 4th crew on a day
# W_LATER_DAY = 0           # Mild preference for earlier days overall
# W_IDLE_DAY = 0            # Penalize idle days to spread work out

planning_month_start = dt.date(YEAR, MONTH, 1)
planning_month_end = (planning_month_start + relativedelta(day=31))

In [None]:
us_holidays = holidays.UnitedStates(years=list(range(planning_month_start.year,
                                                     int(jobs["DUEDATE"].max().year) + 2)))


max_due_date = jobs["DUEDATE"].dt.date.max()
horizon_end = max_due_date + relativedelta(months=1)

horizon_start = planning_month_start

all_days = pd.date_range(horizon_start, horizon_end, freq="D")
business_days = [
    d.date() for d in all_days
    if d.weekday() < 5 and d.date() not in us_holidays
]

if not business_days:
    raise ValueError("No business days in the chosen horizon. Check dates and holidays.")

day_to_idx = {d: i for i, d in enumerate(business_days)}
idx_to_day = {i: d for d, i in day_to_idx.items()}
D = len(business_days)

print(f"Business-day horizon: {business_days[0]} to {business_days[-1]} ({D} days).")

In [None]:
def next_business_day_on_or_after(given_date):
    next_date = given_date
    while True:
        if (next_date.weekday() < 5) and (next_date.date() not in us_holidays):
            return next_date.date()
        # move to next day until business day
        next_date += dt.timedelta(days=1)

def prev_business_day_on_or_before(given_date):
    prev_date = given_date
    while True:
        if (prev_date.weekday() < 5) and (prev_date.date() not in us_holidays):
            return prev_date.date()
        # move to previous day until business day
        prev_date -= dt.timedelta(days=1)


In [None]:
# cannot start before earliest start
jobs["earliest_bd"] = jobs["EARLYSTART"].apply(next_business_day_on_or_after)

# Jobs scheduled AFTER this date are considered past due.
jobs["due_bd"] = jobs["DUEDATE"].apply(prev_business_day_on_or_before)

# If due date rounds to before horizon_start, clamp to earliest business day
first_bd = business_days[0]
jobs.loc[jobs["due_bd"] < first_bd, "due_bd"] = first_bd

# Earliest index for scheduling
jobs["earliest_idx"] = jobs["earliest_bd"].apply(lambda d: day_to_idx[d])

# Index of last on-time day
jobs["due_idx"] = jobs["due_bd"].apply(lambda d: day_to_idx[d])

# Job weights
jobs["job_weight"] = (jobs["duration_min"] * jobs["REQUIREDCREWSIZE"]).astype(int)
job_weight = jobs["job_weight"].tolist()

J = len(jobs)
print(f"Total jobs to schedule: {J}")

In [None]:
# which business days are in the first available month.
first_month_indices = [i for i, d in idx_to_day.items()
                       if d.month == MONTH and d.year == YEAR]
if first_month_indices:
    first_month_end_idx = max(first_month_indices)
else:
    first_month_end_idx = -1
print(first_month_end_idx)

In [None]:
model = cp_model.CpModel()

# x[j, d] = 1 if job j is scheduled on business-day index d (d >= earliest_idx[j])
x = {}
for j in range(J):
    e = int(jobs.loc[j, "earliest_idx"])
    for d in range(e, D):
        x[(j, d)] = model.NewBoolVar(f"x_j{j}_d{d}")

'''to ensure job spread out over days'''
# Integer job_weight in crew-minutes
jobs["job_weight"] = (jobs["duration_min"] * jobs["REQUIREDCREWSIZE"]).astype(int)
job_weight = jobs["job_weight"].tolist()

# Daily workload for each day
load = []
for d in range(D):
    # Upper bound
    load_d = model.NewIntVar(0, 4 * SHIFT_MINUTES_PER_CREW, f"load_d{d}")
    load.append(load_d)

    terms = []
    for j in range(J):
        e = int(jobs.loc[j, "earliest_idx"])
        if d >= e:
            terms.append(job_weight[j] * x[(j, d)])

    if terms:
        model.Add(load_d == sum(terms))
    else:
        model.Add(load_d == 0)

# Max and min daily load 
max_load = model.NewIntVar(0, 4 * SHIFT_MINUTES_PER_CREW, "max_load")
min_load = model.NewIntVar(0, 4 * SHIFT_MINUTES_PER_CREW, "min_load")

for d in range(D):
    model.Add(load[d] <= max_load)
    model.Add(load[d] >= min_load)


'''Add interval to ensure:
   - no overlapping jobs exceed crew capacity
   - no job crosses the mandatory break (11:00-11:30)
'''
intervals_per_day = [[] for _ in range(D)]
demands_per_day = [[] for _ in range(D)]
start_vars = {}  

# Real-time constants
MORNING_START   = 0       # 07:00
MORNING_END     = 240     # 11:00
BREAK_START     = 240     # 11:00
BREAK_END       = 270     # 11:30
AFTERNOON_START = 270     # 11:30
SHIFT_END       = 510     # 15:30

for j in range(J):
    dur = int(jobs.loc[j, "duration_min"])
    e = int(jobs.loc[j, "earliest_idx"])

    # If a job is longer than either block, it's infeasible
    if dur > (MORNING_END - MORNING_START) and dur > (SHIFT_END - AFTERNOON_START):
        raise ValueError(f"Job {j} duration {dur} min cannot fit in either block.")

    for d in range(e, D):
        # Morning: [0, MORNING_END - dur]
        start_morning   = model.NewIntVar(MORNING_START,
                                         max(MORNING_START, MORNING_END - dur),
                                         f"startM_j{j}_d{d}")
        # Afternoon: [AFTERNOON_START, SHIFT_END - dur]
        start_afternoon = model.NewIntVar(AFTERNOON_START,
                                          max(AFTERNOON_START, SHIFT_END - dur),
                                          f"startA_j{j}_d{d}")

        # Binary selectors
        is_morning   = model.NewBoolVar(f"is_morning_j{j}_d{d}")
        is_afternoon = model.NewBoolVar(f"is_afternoon_j{j}_d{d}")

        # Exactly one of morning/afternoon must be chosen
        model.Add(is_morning + is_afternoon == x[(j, d)])

        start_jd = model.NewIntVar(MORNING_START,
                                   SHIFT_END - dur,
                                   f"start_j{j}_d{d}")
        end_jd = model.NewIntVar(MORNING_START,
                                 SHIFT_END,
                                 f"end_j{j}_d{d}")

        # Link start_jd 
        model.Add(start_jd == start_morning).OnlyEnforceIf(is_morning)
        model.Add(start_jd == start_afternoon).OnlyEnforceIf(is_afternoon)

        model.Add(end_jd == start_jd + dur)

        # interval: present iff x[(j,d)] == 1
        interval_jd = model.NewOptionalIntervalVar(
            start_jd, dur, end_jd, x[(j, d)], f"interval_j{j}_d{d}"
        )

        intervals_per_day[d].append(interval_jd)
        demands_per_day[d].append(int(jobs.loc[j, "REQUIREDCREWSIZE"]))

        start_vars[(j, d)] = start_jd


'''add day_used variables to penalize idle days'''
day_used = [model.NewBoolVar(f"day_used_{d}") for d in range(D)]

# for d in range(D):
#     jobs_on_d = []
#     for j in range(J):
#         e = int(jobs.loc[j, "earliest_idx"])
#         if d >= e:
#             jobs_on_d.append(x[(j, d)])
#     if jobs_on_d:
#         # If any job is scheduled on day d, day_used[d] must be 1
#         model.Add(sum(jobs_on_d) >= day_used[d])
#     else:
#         # No feasible jobs this day → force day_used[d] = 0
#         model.Add(day_used[d] == 0)

'''to ensure each job scheduled once'''
# Each job must be scheduled exactly once
for j in range(J):
    e = int(jobs.loc[j, "earliest_idx"])
    model.Add(sum(x[(j, d)] for d in range(e, D)) == 1)

# late[j] = 1 if job j is scheduled AFTER its due date (past due)
late = [model.NewBoolVar(f"late_j{j}") for j in range(J)]

'''to ensure lateness tracking'''
for j in range(J):
    e = int(jobs.loc[j, "earliest_idx"])
    due_idx = int(jobs.loc[j, "due_idx"])
    # If e > due_idx, then job is inevitably late. But still model it:
    for d in range(e, D):
        if d > due_idx:
            model.Add(late[j] >= x[(j, d)])

# outside_month[j] = 1 if job j is scheduled after first_month_end_idx
outside_month = [model.NewBoolVar(f"outside_month_j{j}") for j in range(J)]

'''To ensure first month preference'''
for j in range(J):
    e = int(jobs.loc[j, "earliest_idx"])
    if first_month_end_idx < 0:
        model.Add(outside_month[j] == 1)
    else:
        for d in range(e, D):
            if d > first_month_end_idx:
                model.Add(outside_month[j] >= x[(j, d)])

# extra_crew[d] = 1 if we activate the 4th crew on day d 
extra_crew = [model.NewBoolVar(f"extra_crew_d{d}") for d in range(D)]

'''to ensure crew capacity'''
# capacity_d in crew units (3 or 4)
capacity_vars = []
for d in range(D):
    cap_d = model.NewIntVar(BASE_CREWS, BASE_CREWS + 1, f"cap_d{d}")
    # cap_d = 3 + extra_crew[d]
    model.Add(cap_d == BASE_CREWS + extra_crew[d])
    capacity_vars.append(cap_d)

    if intervals_per_day[d]:
        model.AddCumulative(intervals_per_day[d],
                            demands_per_day[d],
                            cap_d)

# Limit use of 4th crew to at most 25% of business days
max_extra_days = int(np.floor(FOURTH_CREW_MAX_SHARE * D))
model.Add(sum(extra_crew[d] for d in range(D)) <= max_extra_days)

In [None]:
objective_terms = []

# minimize number of past-due jobs
objective_terms.append(W_LATE_JOB * sum(late[j] for j in range(J)))

# minimize number of jobs scheduled outside planning month
objective_terms.append(W_OUTSIDE_MONTH * sum(outside_month[j] for j in range(J)))

# penalize use of 4th crew
objective_terms.append(W_EXTRA_CREW_DAY * sum(extra_crew[d] for d in range(D)))

# prefer earlier days overall (optional)
# for j in range(J):
#     e = int(jobs.loc[j, "earliest_idx"])
#     for d in range(e, D):
#         objective_terms.append(W_LATER_DAY * d * x[(j, d)])

# penalize idle days so solver tries to spread work out
# for d in range(D):
#     objective_terms.append(W_IDLE_DAY * (1 - day_used[d]))

model.Minimize(sum(objective_terms))

# Solve...

solver = cp_model.CpSolver()
solver.parameters.max_time_in_seconds = 120.0  # give it some time
solver.parameters.num_search_workers = 8

status = solver.Solve(model)

if status not in (cp_model.OPTIMAL, cp_model.FEASIBLE):
    raise RuntimeError("No feasible solution found under given constraints.")

print("Solver status:", solver.StatusName(status))

In [None]:
# Map to real minutes from 07:00
def compressed_to_real(m):
    if m < 240:
        return m
    else:
        return m + 30  # 30-min break

assignments = []
for j in range(J):
    chosen_d = None
    e = int(jobs.loc[j, "earliest_idx"])
    for d in range(e, D):
        if solver.BooleanValue(x[(j, d)]):
            chosen_d = d
            break
    assert chosen_d is not None
    # sched_date = idx_to_day[chosen_d]

    # # for Hours and minutes scheduling:
    # # Start minute in compressed timeline
    # s_comp = solver.Value(start_vars[(j, chosen_d)])  # compressed start minutes
    # dur = int(jobs.loc[j, "duration_min"])
    # e_comp = s_comp + dur

    # s_real = compressed_to_real(s_comp)
    # e_real = compressed_to_real(e_comp)

    # base_dt = dt.datetime.combine(sched_date, dt.time(7, 0))
    # start_dt = base_dt + dt.timedelta(minutes=s_real)
    # end_dt   = base_dt + dt.timedelta(minutes=e_real)


    sched_date = idx_to_day[chosen_d]
    start_min = solver.Value(start_vars[(j, chosen_d)])   # 0–510
    dur = int(jobs.loc[j, "duration_min"])
    end_min = start_min + dur

    base_dt = dt.datetime.combine(sched_date, dt.time(7, 0))
    start_dt = base_dt + dt.timedelta(minutes=start_min)
    end_dt   = base_dt + dt.timedelta(minutes=end_min)

    assignments.append({
        "CALLID": jobs.loc[j, "CALLID"],
        "EARLIEST STARTDATE": jobs.loc[j, "EARLYSTART"],
        "DUEDATE": jobs.loc[j, "DUEDATE"],
        "SCHEDULEDDATE": sched_date,
        "SCHEDULED_START": start_dt,
        "SCHEDULED_END": end_dt
    })

result = pd.DataFrame(assignments)


result["late"] = result["SCHEDULEDDATE"] > result["DUEDATE"]
late_count = result["late"].sum()
print(f"Late (past-due) jobs: {late_count} / {len(result)} "
      f"({late_count/len(result):.1%})")

In [None]:
result

In [None]:
result[["CALLID", "EARLIEST STARTDATE", "DUEDATE", "SCHEDULEDDATE"]].to_csv(
    'Assignment2_Result.csv', index=False
)
print("Saved optimized schedule to Assignment2_Result.csv")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def analyze_schedule(result, jobs, extra_crew, idx_to_day, solver):
    # Number of jobs scheduled past due
    num_late_jobs = result["late"].sum()
    print(f"Number of jobs scheduled past due: {num_late_jobs}")
    
    result['SCHEDULEDDATE'] = pd.to_datetime(result['SCHEDULEDDATE'])

    # Number of jobs scheduled within the planning month
    planning_month = result["SCHEDULEDDATE"].dt.month.mode()[0]
    planning_year = result["SCHEDULEDDATE"].dt.year.mode()[0]
    in_month = (result["SCHEDULEDDATE"].dt.month == planning_month) & (result["SCHEDULEDDATE"].dt.year == planning_year)
    num_in_month = in_month.sum()
    print(f"Number of jobs scheduled within planning month: {num_in_month}")

    # Number of days using the 4th crew
    num_extra_crew_days = sum(solver.BooleanValue(extra_crew[d]) for d in range(len(extra_crew)))
    print(f"Number of days with 4th crew used: {num_extra_crew_days}")

    # Distribution of scheduled tasks per day
    day_counts = result["SCHEDULEDDATE"].value_counts().sort_index()
    plt.figure(figsize=(12, 4))
    day_counts.plot(kind='bar')
    plt.title("Number of Scheduled Tasks per Day")
    plt.xlabel("Date")
    plt.ylabel("Number of Tasks")
    plt.tight_layout()
    plt.show()

    # Distribution of scheduled tasks by month and day
    result['SCHEDULEDDATE'] = pd.to_datetime(result['SCHEDULEDDATE'])
    result['scheduled_month'] = result['SCHEDULEDDATE'].dt.to_period('M')

    months = sorted(result['scheduled_month'].unique())

    for month in months:
        month_str = str(month)
        month_mask = result['scheduled_month'] == month
        month_counts = result.loc[month_mask, 'SCHEDULEDDATE'].dt.day.value_counts().sort_index()
        plt.figure(figsize=(8, 4))
        month_counts.plot(kind='bar')
        plt.title(f"Distribution of Scheduled Tasks by Day of Month: {month_str}")
        plt.xlabel("Day of Month")
        plt.ylabel("Number of Tasks")
        plt.tight_layout()
        plt.show()
        print(f"Plotted distribution for {month_str} ({month_counts.sum()} tasks)")

    print("\nSummary:")
    print(f"Total jobs: {len(result)}")
    print(f"Late jobs: {num_late_jobs}")
    print(f"Jobs in planning month: {num_in_month}")
    print(f"Days with 4th crew: {num_extra_crew_days}")


analyze_schedule(result, jobs, extra_crew, idx_to_day, solver)

In [None]:
import matplotlib.pyplot as plt

def plot_day_schedule(result, date):
    """
    Visualize all tasks scheduled on a given date as a Gantt chart.
    Args:
        result: DataFrame with columns 'SCHEDULEDDATE', 'SCHEDULED_START', 'SCHEDULED_END', 'CALLID'
        date: datetime.date or string in 'YYYY-MM-DD' format
    """
    # Filter tasks scheduled
    day_tasks = result[result['SCHEDULEDDATE'] == date]
    if day_tasks.empty:
        print(f"No tasks scheduled on {date}")
        return

    day_tasks = day_tasks.sort_values('SCHEDULED_START')
    fig, ax = plt.subplots(figsize=(10, max(2, len(day_tasks) * 0.5)))
    for i, (_, row) in enumerate(day_tasks.iterrows()):
        start = row['SCHEDULED_START'].time()
        end = row['SCHEDULED_END'].time()
        start_min = start.hour * 60 + start.minute
        end_min = end.hour * 60 + end.minute
        ax.barh(i, end_min - start_min, left=start_min, height=0.4, color='skyblue')
        ax.text(start_min, i, f"{row['CALLID']}", va='center', ha='right', fontsize=8)
    ax.set_yticks(range(len(day_tasks)))
    ax.set_yticklabels(day_tasks['CALLID'])
    ax.set_xlabel("Minutes since midnight")
    ax.set_title(f"Scheduled Tasks on {date}")
    ax.set_xlim(7*60, 16*60)
    ax.set_xticks([h*60 for h in range(7, 17)])
    ax.set_xticklabels([f"{h}:00" for h in range(7, 17)])
    plt.tight_layout()
    plt.show()

plot_day_schedule(result, '2023-01-09')

In [None]:
# file_link = 'Assignment2_Actuals.csv'
# district = DISTRICT
# interval = INTERVAL

# actual_df = pd.read_csv(file_link)
# actual_df.dropna(subset=['SCHEDULEDSTART'], inplace=True)
# actual_df['SCHEDULEDSTART'] = pd.to_datetime(actual_df['SCHEDULEDSTART'])
# actual_df['SCHEDULEDFINISH'] = pd.to_datetime(actual_df['SCHEDULEDFINISH'])

# actual_df