In [None]:
# Instalar PuLP si no está
!pip install pulp --quiet

# Importar librerías
import pandas as pd
import numpy as np
from pulp import LpProblem, LpMinimize, LpVariable, LpBinary, lpSum, LpStatus

# Cargar los CSVs
cancel_logs = pd.read_csv("cleveland_cancel_logs.csv")
shifts = pd.read_csv("cleveland_shifts.csv")
booking_logs = pd.read_csv("cleveland_booking_logs.csv")

# Convertir fechas
shifts["Start"] = pd.to_datetime(shifts["Start"], format="mixed", errors="coerce")
shifts["End"] = pd.to_datetime(shifts["End"], format="mixed", errors="coerce")


In [None]:
# Seleccionamos una semana real con actividad
first_date = pd.Timestamp("2021-10-01")
last_date = first_date + pd.Timedelta(days=6)

# Filtrar turnos que empiezan en esa semana
shifts_week = shifts[(shifts["Start"] >= first_date) & (shifts["Start"] <= last_date)].copy()

# Calcular duración de cada turno en horas
shifts_week["Duration"] = (shifts_week["End"] - shifts_week["Start"]).dt.total_seconds() / 3600

# Limpiar turnos con fechas inválidas o duración negativa
shifts_week = shifts_week[(shifts_week["Duration"] > 0) & (shifts_week["Duration"] <= 16)]

# Mostrar resumen
print(f"Turnos entre {first_date.date()} y {last_date.date()}")
print(f"Total turnos válidos: {len(shifts_week)}")

# Ver muestra
shifts_week[["ID", "Start", "End", "Agent Req", "Duration"]].head()


Turnos entre 2021-10-01 y 2021-10-07
Total turnos válidos: 1447


Unnamed: 0,ID,Start,End,Agent Req,Duration
23,6151ed5fa98f2e0184999d5d,2021-10-04 18:30:00,2021-10-05 03:00:00,CNA,8.5
24,6151ed8219f1270184a913ce,2021-10-05 18:30:00,2021-10-06 03:00:00,CNA,8.5
25,6151edb848fc310185021752,2021-10-06 18:30:00,2021-10-07 03:00:00,CNA,8.5
26,61520de8a98f2e01849a58b6,2021-10-01 15:00:00,2021-10-01 19:00:00,NURSE,4.0
27,61520e4da98f2e01849a5a82,2021-10-02 03:00:00,2021-10-02 11:00:00,CNA,8.0


In [None]:
# Convertir a datetime la fecha de creación de la reserva
booking_logs["Created At"] = pd.to_datetime(booking_logs["Created At"], errors="coerce")

# Filtrar solo las reservas de esa semana
bookings_week = booking_logs.merge(shifts_week[["ID"]], left_on="Shift ID", right_on="ID", how="inner")

# Obtener trabajadores únicos
workers = bookings_week["Worker ID"].dropna().unique()

print(f"Trabajadores activos esa semana: {len(workers)}")

# Crear lista de emparejamientos válidos (turno, trabajador)
# Simplificamos: se puede asignar cualquier turno a cualquier trabajador
assignments = []
for shift_id in shifts_week["ID"]:
    for worker in workers:
        assignments.append((shift_id, worker))


Trabajadores activos esa semana: 208


In [None]:
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpBinary

# Crear el modelo
model = LpProblem("Scheduling_Optimization", LpMinimize)

# Variables: x[(shift_id, worker_id)] ∈ {0, 1}
x = {
    (shift_id, worker_id): LpVariable(f"x_{shift_id}_{worker_id}", cat=LpBinary)
    for (shift_id, worker_id) in assignments
}

# Objetivo: minimizar número de turnos sin cubrir
# (equivale a cubrir el máximo posible si se maximizara el total de x)
model += lpSum(1 - lpSum(x[shift_id, w] for w in workers if (shift_id, w) in x) for shift_id in shifts_week["ID"]), "Uncovered_Shifts"

# Restricción 1: Cada turno como máximo a un trabajador
for shift_id in shifts_week["ID"]:
    model += lpSum(x[shift_id, w] for w in workers if (shift_id, w) in x) <= 1

# Restricción 2: No solapamiento de turnos por trabajador
from collections import defaultdict

# Crear dict de turnos por ID para acceso rápido
shifts_dict = shifts_week.set_index("ID")[["Start", "End"]].to_dict("index")

# Para cada par de turnos solapados por trabajador
for worker in workers:
    # Turnos donde este trabajador es candidato
    worker_shifts = [s for s in shifts_week["ID"] if (s, worker) in x]
    for i in range(len(worker_shifts)):
        for j in range(i + 1, len(worker_shifts)):
            s1 = worker_shifts[i]
            s2 = worker_shifts[j]
            start1, end1 = shifts_dict[s1]["Start"], shifts_dict[s1]["End"]
            start2, end2 = shifts_dict[s2]["Start"], shifts_dict[s2]["End"]
            if not (end1 <= start2 or end2 <= start1):  # Si solapan
                model += x[s1, worker] + x[s2, worker] <= 1

print("Modelo construido con", len(x), "variables binarias.")


In [None]:
from pulp import PULP_CBC_CMD, LpStatus

# Resolver el modelo
solver = PULP_CBC_CMD(msg=True)  # msg=True para ver progreso
result_status = model.solve(solver)

# Mostrar estado del solver
print("Estado de la optimización:", LpStatus[result_status])


In [None]:
# Extraer las asignaciones seleccionadas
assigned = [
    (shift_id, worker_id)
    for (shift_id, worker_id), var in x.items()
    if var.varValue == 1
]

# Mostrar resumen
print(f"Total turnos asignados: {len(assigned)}")
print("Ejemplo de asignaciones:")
for shift_id, worker_id in assigned[:10]:
    row = shifts_week[shifts_week["ID"] == shift_id].iloc[0]
    print(f"- Turno {shift_id} ({row['Start']} a {row['End']}) asignado a trabajador {worker_id}")
