In [6]:
import pandas as pd
from ortools.linear_solver import pywraplp
from itertools import combinations

# Sample datasets
df1 = pd.DataFrame({
    'date': ['6/4/2023', '6/14/2023', '6/29/2023', '6/29/2018', '7/7/2023', '7/28/2023', '7/28/2023', '7/28/2023'],
    'amount': [1000, 20, 10, 20, 500, 1000, 2000, 50]
})

df2 = pd.DataFrame({
    'date': ['8/31/2023', '8/31/2023', '9/30/2023', '9/30/2023', '10/31/2023', '10/31/2023', '11/21/2023', '11/21/2023'],
    'amount': [2000, 1000, 1000, 10, 30, 10, 50, 500]
})

# Create the solver
solver = pywraplp.Solver.CreateSolver("SCIP")
if solver is None:
    print("SCIP solver unavailable.")
    exit()

# Data preparation
data = {}
data['amounts_1'] = df1['amount'].tolist()
data['num_items_1'] = len(data['amounts_1'])

data['amounts_2'] = df2['amount'].tolist()
data['num_items_2'] = len(data['amounts_2'])

# Generate all possible combinations of at least 2 items from each dataset
min_group_size = 2

# Create a list of all combinations of at least two elements from Dataset 1
combinations_1 = []
for r in range(min_group_size, data['num_items_1'] + 1):
    combinations_1.extend(combinations(range(data['num_items_1']), r))

# Create a list of all combinations of at least two elements from Dataset 2
combinations_2 = []
for r in range(min_group_size, data['num_items_2'] + 1):
    combinations_2.extend(combinations(range(data['num_items_2']), r))

# Variables: x[g1][g2] = 1 if group g1 from Dataset 1 is matched with group g2 from Dataset 2
x = {}
for g1 in range(len(combinations_1)):
    for g2 in range(len(combinations_2)):
        x[g1, g2] = solver.BoolVar(f"x_{g1}_{g2}")

# Constraints
# Each group from Dataset 1 can be matched with at most one group from Dataset 2
for g1 in range(len(combinations_1)):
    solver.Add(sum(x[g1, g2] for g2 in range(len(combinations_2))) <= 1)

# Each group from Dataset 2 can be matched with at most one group from Dataset 1
for g2 in range(len(combinations_2)):
    solver.Add(sum(x[g1, g2] for g1 in range(len(combinations_1))) <= 1)

# Ensure that the sum of the amounts in the groups from Dataset 1 matches the sum in Dataset 2
for g1 in range(len(combinations_1)):
    sum_1 = sum(data['amounts_1'][i] for i in combinations_1[g1])
    for g2 in range(len(combinations_2)):
        sum_2 = sum(data['amounts_2'][i] for i in combinations_2[g2])
        solver.Add(x[g1, g2] * sum_1 == x[g1, g2] * sum_2)

# Objective: Minimize the number of unused amounts
objective = solver.Objective()
for g1 in range(len(combinations_1)):
    for g2 in range(len(combinations_2)):
        objective.SetCoefficient(x[g1, g2], 1)  # We want to maximize the number of valid matches

objective.SetMinimization()

# Solve the problem
status = solver.Solve()

# Output the results
if status == pywraplp.Solver.OPTIMAL:
    print("Optimal solution found!")
    for g1 in range(len(combinations_1)):
        for g2 in range(len(combinations_2)):
            if x[g1, g2].solution_value() > 0:
                print(f"Match: Dataset1 group {combinations_1[g1]} (Sum: {sum(data['amounts_1'][i] for i in combinations_1[g1])})")
                print(f"   with Dataset2 group {combinations_2[g2]} (Sum: {sum(data['amounts_2'][i] for i in combinations_2[g2])})")
else:
    print("No optimal solution found.")


Optimal solution found!


In [5]:
pip uninstall protobuf

In [1]:
pip install ortools




In [3]:
import pandas as pd
from itertools import product
import numpy as np

In [None]:
a = [30,34,36]
df = pd.DataFrame(product([0, 1], repeat=len(a)), columns=a)
df['sum'] = df[a] @ a
print(df)

In [None]:
def knapsack(cap):
    from ortools.linear_solver import pywraplp
    data = {}
    # ------------------ weights and values from a candidate pool -----------------------------------
    # ----------------- values = (1/(days difference)+1)---------------------------------------------

    data["weights"] = [15, 25, 60]
    data["values"] = [0.1,0.4,0.5]
    #data["match_score"]=[0.1]
    #assert len(data["weights"]) == len(data["values"])
    data["num_items"] = len(data["weights"])
    data["all_items"] = range(data["num_items"])
 
    # ---------------------for LHS a particular day ----------------------- 13th may
    data["bin_capacities"] = [cap]
 
    #--------------------------------------------------------------------
    data["num_bins"] = len(data["bin_capacities"])
    data["all_bins"] = range(data["num_bins"])
 
    #data["num_matches"] = len(data["match_score"])
    #data["all_matches"] = range(data["num_matches"])
 
    # Create the mip solver with the SCIP backend.
    solver = pywraplp.Solver.CreateSolver("SCIP")
    if solver is None:
        print("SCIP solver unavailable.")
 
    # Variables.
    # x[i, b] = 1 if item i is packed in bin b.
    x = {}
    for i in data["all_items"]:
        for b in data["all_bins"]:
            x[i, b] = solver.BoolVar(f"x_{i}_{b}")
 
 
    # Constraints.
    # Each item is assigned to at most one bin.
    for i in data["all_items"]:
        solver.Add(sum(x[i, b] for b in data["all_bins"]) <= 1)
 
    # for b in data['all_bins']:
    #     for i in data['all_items']:
    #         solver.Add(len(data['weights'])<=1)
 
    # for i in data["all_items"]:
    #     solver.Add(data["match_score"][i]>=0.2)
 
    # The amount packed in each bin cannot exceed its capacity.
    for b in data["all_bins"]:
        solver.Add(
            sum(x[i, b] * data["weights"][i] for i in data["all_items"])
            <= data["bin_capacities"][b]+1)
 
    for b in data["all_bins"]:
        solver.Add(
            sum(x[i, b] * data["weights"][i] for i in data["all_items"])
            >= abs(data["bin_capacities"][b]-1))
 
 
    # Objective.
    # Maximize total value of packed items.
    # for j in data["all_matches"]:
    #     for b in data["all_bins"]:
    #         objective.SetCoefficient(y[j, b], data["match_score"][i])
    objective = solver.Objective()
    for i in data["all_items"]:
        for b in data["all_bins"]:
            objective.SetCoefficient(x[i, b], data["values"][i])
 
    objective.SetMinimization()
 
    print(f"Solving with {solver.SolverVersion()}")
    status = solver.Solve()
 
 
    #if status == pywraplp.Solver.OPTIMAL :
    print(f"Total packed value: {objective.Value()}")
    total_weight = 0
    for b in data["all_bins"]:
        print(f"Bin {b}")
        bin_weight = 0
        bin_value = 0
        for i in data["all_items"]:
            if x[i, b].solution_value()>0:
                print(
                    f"Item {i} weight: {data['weights'][i]} value:"
                    f" {data['values'][i]}")
                bin_weight += data["weights"][i]
                bin_value += data["values"][i]
        print(f"Packed bin weight: {bin_weight}")
        print(f"Packed bin value: {bin_value}\n")
        total_weight += bin_weight
    print(f"Total packed weight: {total_weight}")
    #else:
    #    print("The problem does not have an optimal solution.")


In [7]:
def knapsack(cap):
    """Solve a multiple knapsack problem using a MIP solver."""
    from ortools.linear_solver import pywraplp

    data = {}
    data["weights"] = [15, 25, 60]
    data["values"] = [0.1,0.4,0.5]
    # assert len(data["weights"]) == len(data["values"])
    data["num_items"] = len(data["weights"])
    data["all_items"] = range(data["num_items"])

    data["bin_capacities"] = [cap]
    data["num_bins"] = len(data["bin_capacities"])
    data["all_bins"] = range(data["num_bins"])

    # Create the mip solver with the SCIP backend.
    solver = pywraplp.Solver.CreateSolver("SCIP")
    if solver is None:
        print("SCIP solver unavailable.")
        return

    # Variables.
    # x[i, b] = 1 if item i is packed in bin b.
    x = {}
    for i in data["all_items"]:
        for b in data["all_bins"]:
            x[i, b] = solver.BoolVar(f"x_{i}_{b}")

    # Constraints.
    # Each item is assigned to at most one bin.
    for i in data["all_items"]:
        solver.Add(sum(x[i, b] for b in data["all_bins"]) <= 1)

    # The amount packed in each bin cannot exceed its capacity.
    for b in data["all_bins"]:
        solver.Add(
            sum(x[i, b] * data["weights"][i] for i in data["all_items"])
            <= data["bin_capacities"][b]
        )

    # Objective.
    # Maximize total value of packed items.
    objective = solver.Objective()
    for i in data["all_items"]:
        for b in data["all_bins"]:
            objective.SetCoefficient(x[i, b], data["values"][i])
    objective.SetMaximization()

    print(f"Solving with {solver.SolverVersion()}")
    status = solver.Solve()

    if status == pywraplp.Solver.OPTIMAL:
        print(f"Total packed value: {objective.Value()}")
        total_weight = 0
        for b in data["all_bins"]:
            print(f"Bin {b}")
            bin_weight = 0
            bin_value = 0
            for i in data["all_items"]:
                if x[i, b].solution_value() > 0:
                    print(
                        f"Item {i} weight: {data['weights'][i]} value:"
                        f" {data['values'][i]}"
                    )
                    bin_weight += data["weights"][i]
                    bin_value += data["values"][i]
            print(f"Packed bin weight: {bin_weight}")
            print(f"Packed bin value: {bin_value}\n")
            total_weight += bin_weight
        print(f"Total packed weight: {total_weight}")
    # else:
    #     print("The problem does not have an optimal solution.")

In [None]:
for i in df['sum']:
  print(i)
  knapsack(i)