In [3]:
import pandas as pd
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpBinary, PULP_CBC_CMD

# 读取数据
df = pd.read_csv("for_read.csv")
df["date"] = pd.to_datetime(df["date"]).dt.date


In [4]:

# 班次及时间段
shifts = [(0, 8), (5, 13), (8, 16), (12, 20), (14, 22), (16, 24)]
shift_labels = ["Shift1", "Shift2", "Shift3", "Shift4", "Shift5", "Shift6"]
dates = sorted(df["date"].unique())

# 计算每个班次的需求
demand_per_shift = {date: {} for date in dates}
for date in dates:
    daily_data = df[df["date"] == date]
    for label, (start, end) in zip(shift_labels, shifts):
        demand_per_shift[date][label] = daily_data[
            (daily_data["hour"] >= start) & (daily_data["hour"] < end)
        ]["value"].sum()

# 建立优化模型
model = LpProblem("Personnel_Scheduling", LpMinimize)


In [5]:

# 定义变量
full_time = LpVariable.dicts(
    "FullTime", (dates, shift_labels, range(200)), 0, 1, LpBinary
)
temp_workers = LpVariable.dicts(
    "TempWorkers", (dates, shift_labels), 0, None, cat="Integer"
)

# 目标函数：最小化总人天数
model += lpSum(
    full_time[date][shift][i]
    for date in dates
    for shift in shift_labels
    for i in range(200)
) + lpSum(temp_workers[date][shift] for date in dates for shift in shift_labels)

# 每个班次的需求必须被满足的约束
for date in dates:
    for shift in shift_labels:
        model += (
            25 * lpSum(full_time[date][shift][i] for i in range(200))
            + 20 * temp_workers[date][shift]
            >= demand_per_shift[date][shift]
        )


In [6]:

# 正式工的出勤率不超过85%
for i in range(200):
    model += (
        lpSum(full_time[date][shift][i] for date in dates for shift in shift_labels)
        <= 30 * 0.85
    )

# 正式工连续出勤天数不超过7天
for i in range(200):
    for d in range(len(dates) - 6):
        model += (
            lpSum(
                full_time[dates[d + k]][shift][i]
                for k in range(7)
                for shift in shift_labels
            )
            <= 7
        )
# 求解问题
# model.solve(PULP_CBC_CMD(msg=1))
model.solve(PULP_CBC_CMD(msg=1, threads=8,gapRel=0.001))


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/yinmo19/Mathor_cup_2024/venv/lib/python3.11/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/abaf458cf94641c99222a0b884e53c06-pulp.mps -ratio 0.001 -threads 8 -timeMode elapsed -branch -printingOptions all -solution /tmp/abaf458cf94641c99222a0b884e53c06-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 5391 COLUMNS
At line 402136 RHS
At line 407523 BOUNDS
At line 444910 ENDATA
Problem MODEL has 5386 rows, 37386 columns and 284586 elements
Coin0008I MODEL read with 0 errors
ratioGap was changed from 0 to 0.001
threads was changed from 0 to 8
Option for timeMode changed from cpu to elapsed
Continuous objective value is 204196 - 0.99 seconds
Cgl0003I 0 fixed, 186 tightened bounds, 0 strengthened rows, 0 substitutions
Cgl0004I processed model has 5386 rows, 37386 columns (37386 integer (37200 of which binary)) and 284586 elements
Cutoff increment increased from

1

In [7]:

# 收集结果并输出为CSV
results = []
for date in dates:
    for shift in shift_labels:
        for i in range(200):
            if full_time[date][shift][i].varValue > 0:
                results.append(
                    {
                        "Sorting_Center": "SC60",
                        "Date": date,
                        "Shift": shift,
                        "Employee": f"FullTime({i})",
                    }
                )
        temp_workers_count = int(temp_workers[date][shift].varValue)
        for j in range(temp_workers_count):
            results.append(
                {
                    "Sorting_Center": "SC60",
                    "Date": date,
                    "Shift": shift,
                    "Employee": f"Temp({j})",
                }
            )

# 创建DataFrame并保存到CSV
results_df = pd.DataFrame(results)
results_df.to_csv("scheduling_results.csv", index=False)
print("CSV file has been saved with the scheduling results.")


CSV file has been saved with the scheduling results.
