# Fase Beta de la entrega del reto
### ***Secuencia***
1. Generar frecuencias de ventas y simular una venta de 105 clientes.
2. Tomar los volúmenes de las compras de los clientes para determinar la cantidad de camiones necesarios.
3. Sabiendo los camiones, utilizar k-medoids para hacer los mini-tcps.
4. Resolver los mtcps.

### ***Librerías***

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import math

from scipy.stats import poisson
from scipy.stats import poisson
from scipy.stats import chisquare


from ortools.linear_solver import pywraplp

from scipy.spatial.distance import cdist

import time

## Paso 1. Simular un pedido de 100 clientes

In [2]:
compras = pd.read_excel('informacion_compra.xlsx')

In [3]:
compras.head(3)

Unnamed: 0,Producto,Unidades,Factura
0,48443,1,799186
1,42877,1,717106
2,48296,1,468125


In [4]:
facturas = []
productos = []
for _,i in compras.iterrows():
    facturas += [i[2]]*i[1]
for _,i in compras.iterrows():
    productos += [i[0]]*i[1]

compras = pd.DataFrame({
    "Factura": facturas,
    "Producto": productos
})

In [5]:
df = compras.groupby(['Factura']).size().reset_index(name='Frequency').Frequency.value_counts()
df = df.reset_index()
df.columns = ['Pedidos', 'Frecuencia']
df['Frecuencia ln'] = np.log(df['Frecuencia'])
valor_medio = np.sum(df['Pedidos'] * df['Frecuencia ln']) / np.sum(df['Frecuencia ln'])
df['Poisson'] = poisson.pmf(df['Pedidos'], valor_medio)
df['valores_esperados'] = df['Poisson'] * np.sum(df['Frecuencia ln'])
df.head(3)

Unnamed: 0,Pedidos,Frecuencia,Frecuencia ln,Poisson,valores_esperados
0,1,19235,9.864487,0.011672,0.783311
1,2,3546,8.173575,0.036706,2.463277
2,3,1024,6.931472,0.076953,5.164178


In [6]:
df2 = compras.groupby(['Producto']).size().reset_index(name='Frequency').Frequency.value_counts()
df2 = df2.reset_index()
df2.columns = ['Producto', 'Frecuencia']
df2['Frecuencia ln'] = np.log(df2['Frecuencia'])
valor_medio = np.sum(df2['Producto'] * df2['Frecuencia ln']) / np.sum(df2['Frecuencia ln'])
df2['Poisson'] = poisson.pmf(df2['Producto'], valor_medio)
df2['valores_esperados'] = df2['Poisson'] * np.sum(df2['Frecuencia ln'])
df2.head(3)

Unnamed: 0,Producto,Frecuencia,Frecuencia ln,Poisson,valores_esperados
0,1,1793,7.491645,1.565792e-11,2.886239e-09
1,2,779,6.658011,2.209339e-10,4.072494e-08
2,3,414,6.025866,2.078257e-09,3.83087e-07


In [7]:
def montecarlo(df, n):  
    resultados = []
    for _ in range(n):
        num = np.random.rand()
        for j in range(len(df)):
            if df[j][3] <= num < df[j][4]:
                resultados.append(j)
            
    return resultados

In [8]:
pedidos = df[['Pedidos','Poisson']]
pedidos['Acumulado'] = pedidos.Poisson.cumsum()
pedidos['Inferior'] = [0] + pedidos['Acumulado'].tolist()[:-1]
pedidos['Superior'] = pedidos['Acumulado']
pedidos = pedidos.to_numpy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pedidos['Acumulado'] = pedidos.Poisson.cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pedidos['Inferior'] = [0] + pedidos['Acumulado'].tolist()[:-1]


In [9]:
#num_pedidos = montecarlo(pedidos,df.Frecuencia.sum())
num_pedidos = montecarlo(pedidos,compras.size)


In [10]:
producto = df2[['Producto','Poisson']]
producto['Acumulado'] = producto.Poisson.cumsum()
producto['Inferior'] = [0] + producto['Acumulado'].tolist()[:-1]
producto['Superior'] = producto['Acumulado']
producto = producto.to_numpy()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  producto['Acumulado'] = producto.Poisson.cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  producto['Inferior'] = [0] + producto['Acumulado'].tolist()[:-1]


In [11]:
envios = [np.array(montecarlo(producto, num)) for num in montecarlo(pedidos,110) if num!=0]
envios_np = envios[:106]

In [12]:
enviosArreglo = np.array([[-1,-1,-1]])
for i in range(len(envios)):
    a = np.unique(np.array(envios[i]), return_counts=True)
    enviosArreglo = np.concatenate((enviosArreglo, np.array([np.array([i]*len(a[0])),a[0],a[1]]).T))
enviosArreglo

array([[ -1,  -1,  -1],
       [  0,  22,   1],
       [  0,  27,   1],
       ...,
       [107,  35,   2],
       [107,  40,   1],
       [107,  45,   2]], dtype=int64)

In [13]:
dp = pd.read_csv('info_productos.csv')
dp.columns = ['Producto', 'Volumen']
nuevo_registro = {"Producto": 0, "Volumen": 0}
dp = pd.concat([pd.DataFrame([nuevo_registro]), dp], ignore_index=True)
volumenes = dp.to_numpy()[:,1]

In [14]:
dimensiones = []
for pedido in envios_np:
    acumulado = 0
    for i in pedido:
        acumulado += volumenes[i]
    dimensiones.append(round(acumulado,4))

In [15]:
from ortools.linear_solver import pywraplp


def create_data_model():
    """Create the data for the example."""
    data = {}
    weights = dimensiones
    data["weights"] = weights
    data["items"] = list(range(len(weights)))
    data["bins"] = data["items"]
    data["bin_capacity"] = 27 #isuzu aprox
    return data



def main():
    data = create_data_model()

    # Create the mip solver with the SCIP backend.
    solver = pywraplp.Solver.CreateSolver("SCIP")

    if not solver:
        return

    # Variables
    # x[i, j] = 1 if item i is packed in bin j.
    x = {}
    for i in data["items"]:
        for j in data["bins"]:
            x[(i, j)] = solver.IntVar(0, 1, "x_%i_%i" % (i, j))

    # y[j] = 1 if bin j is used.
    y = {}
    for j in data["bins"]:
        y[j] = solver.IntVar(0, 1, "y[%i]" % j)

    # Constraints
    # Each item must be in exactly one bin.
    for i in data["items"]:
        solver.Add(sum(x[i, j] for j in data["bins"]) == 1)

    # The amount packed in each bin cannot exceed its capacity.
    for j in data["bins"]:
        solver.Add(
            sum(x[(i, j)] * data["weights"][i] for i in data["items"])
            <= y[j] * data["bin_capacity"]
        )

    # Objective: minimize the number of bins used.
    solver.Minimize(solver.Sum([y[j] for j in data["bins"]]))

    status = solver.Solve()

    if status == pywraplp.Solver.OPTIMAL:
        num_bins = 0
        for j in data["bins"]:
            if y[j].solution_value() == 1:
                bin_items = []
                bin_weight = 0
                for i in data["items"]:
                    if x[i, j].solution_value() > 0:
                        bin_items.append(i)
                        bin_weight += data["weights"][i]
                if bin_items:
                    num_bins += 1
                    print("Camión # ", j+1)
                    print("Cantidad de clientes # ", len(bin_items))
                    print("  Clientes:", bin_items)
                    print(f"  Volumen total (m^3): {round(bin_weight, 2)}")
                    print()
        print()
        print("Cantidad de camiones:", num_bins)
        print("Capacidad de los camiones:", data["bin_capacity"], "m^3")
        print("Tiempo = ", solver.WallTime(), " milisegundos")
    else:
        print("No existe solución óptima.")


if __name__ == "__main__":
    main()
 

Camión #  1
Cantidad de clientes #  27
  Clientes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 22, 25, 34, 88, 93, 100]
  Volumen total (m^3): 26.99

Camión #  2
Cantidad de clientes #  23
  Clientes: [19, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 47]
  Volumen total (m^3): 27.0

Camión #  3
Cantidad de clientes #  29
  Clientes: [42, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 76, 92]
  Volumen total (m^3): 26.98

Camión #  4
Cantidad de clientes #  25
  Clientes: [73, 75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 89, 90, 91, 94, 95, 96, 97, 98, 99, 101, 102, 103]
  Volumen total (m^3): 26.69

Camión #  5
Cantidad de clientes #  2
  Clientes: [104, 105]
  Volumen total (m^3): 3.18


Cantidad de camiones: 5
Capacidad de los camiones: 27 m^3
Tiempo =  1340  milisegundos


## Paso 3. Sabiendo los camiones necesarios, hacer clusters

In [16]:
path = "https://raw.githubusercontent.com/gerardoramc/AlgoritmoDatos/Gerardo/distancias_tiempos2.csv"
df = pd.read_csv(path)
df.head(3)

Unnamed: 0,Distance,Duration
0,0.0,0:00
1,15.26,0:22
2,14.93,0:17


In [17]:
df['Duration'] = pd.to_datetime(df['Duration'])
df['Duration'] = df['Duration'].dt.strftime('%H:%M')
df['Duration']=df['Duration'].astype('string')

In [18]:

def convert_to_seconds(time_str):
    hours, minutes = map(int, time_str.split(':'))
    total_seconds = hours * 3600 + minutes * 60
    return total_seconds

# Apply the conversion function to the DataFrame column
df['Segundos'] = df['Duration'].apply(convert_to_seconds)

In [19]:
rangos = np.arange(0,11236,106)

In [20]:
cuadrada = []
for i in range(len(rangos)-1):
    cuadrada.append(list((df['Segundos'][rangos[i]:rangos[i+1]])))
result_array = np.append(np.array(cuadrada), [(df['Segundos'][rangos[-1]:])], axis = 0)
result_array.shape

(106, 106)

In [21]:
def remove_first_row_and_column(input_array):
    if input_array.shape[0] <= 1 or input_array.shape[1] <= 1:
        raise ValueError("Input array must have at least 2 rows and 2 columns.")

    new_array = input_array[1:, 1:]
    return new_array


In [36]:
def k_medoids(X, k, max_iters=100):
    num_samples, num_features = X.shape
    medoids_indices = np.random.choice(num_samples, k, replace=False)
    medoids = X[medoids_indices]

    for _ in range(max_iters):
        distances = cdist(X, medoids, metric='euclidean')
        cluster_assignments = np.argmin(distances, axis=1)

        for i in range(k):
            cluster_points = X[cluster_assignments == i]
            cluster_distances = distances[cluster_assignments == i][:, i]
            new_medoid_index = np.argmin(cluster_distances)
            medoids[i] = cluster_points[new_medoid_index]

    return medoids, cluster_assignments
# Ejemplo de uso

result_array_no_warehouse = remove_first_row_and_column(result_array)
X = result_array_no_warehouse

k = 5
medoids, cluster_assignments = k_medoids(X, k)

#print("Medoides finales:")
#print(medoids)

clusters = {}
for i in range(k):
    cluster_points = np.where(cluster_assignments == i)[0]
    clusters[i] = cluster_points

print("Nodos en cada cluster:")
for cluster_id, node_indices in clusters.items():
    print(f"\n**Cluster {cluster_id+1}** \nCantidad: {len(node_indices)}\nNodos: {node_indices+1}\n")


Nodos en cada cluster:

**Cluster 1** 
Cantidad: 36
Nodos: [  1   3   6   7   9  12  15  19  20  22  32  33  41  44  45  46  47  48
  49  51  52  58  59  62  63  65  67  72  74  78  79  80  85 102 103 104]


**Cluster 2** 
Cantidad: 30
Nodos: [  2   4  10  13  14  16  23  29  31  36  39  40  42  54  56  57  61  64
  68  73  81  82  84  87  88  96  97  99 100 101]


**Cluster 3** 
Cantidad: 10
Nodos: [  8  21  24  37  76  77  86  89  95 105]


**Cluster 4** 
Cantidad: 24
Nodos: [17 18 25 26 27 28 30 34 38 43 53 55 60 66 69 70 71 83 90 91 92 93 94 98]


**Cluster 5** 
Cantidad: 5
Nodos: [ 5 11 35 50 75]



In [37]:
vol_por_clus = {}

for clave, indices in clusters.items():
    pesos = [dimensiones[indice] for indice in indices]
    vol_por_clus[clave] = pesos


In [48]:
multiplier = 1
trucks_used = 0
start_time=time.time()
chamions = []
for clave, volumenes in vol_por_clus.items():

    print(f"**********CLUSTER # {clave + 1 }**********\n")
    incomplete = True
    while incomplete:
        data = {}
        data["weights"] = volumenes
        #data["values"] = len(data["weights"]) * [1]
        data["values"] =  volumenes
        sum_of_all_weights = sum(data["weights"])

        assert len(data["weights"]) == len(data["values"])
        data["num_items"] = len(data["weights"])
        data["all_items"] = range(data["num_items"])

        data["bin_capacities"] = [27] * multiplier
        data["num_bins"] = len(data["bin_capacities"])
        data["all_bins"] = range(data["num_bins"])

        solver = pywraplp.Solver.CreateSolver("SCIP")

        # x[i, b] = 1 if item i is packed in bin b.
        x = {}
        for i in data["all_items"]:
            for b in data["all_bins"]:
                x[i, b] = solver.BoolVar(f"x_{i}_{b}")

        # Each item is assigned to at most one bin.
        for i in data["all_items"]:
            solver.Add(sum(x[i, b] for b in data["all_bins"]) <= 1)

        # The amount packed in each bin cannot exceed its capacity.
        for b in data["all_bins"]:
            solver.Add(
                sum(x[i, b] * data["weights"][i] for i in data["all_items"])
                <= data["bin_capacities"][b])
        
        # Each bin must contain at least 2 items
        for b in data["all_bins"]:
            solver.Add(sum(x[i, b] for i in data["all_items"]) <= 20)
            
         # Each bin must contain at most 20 items
        '''for b in data["all_bins"]:
            solver.Add(sum(x[i, b] for i in data["all_items"]) <= 20)'''
        
        # Maximize total value of packed items.
        objective = solver.Objective()
        for i in data["all_items"]:
            for b in data["all_bins"]:
                objective.SetCoefficient(x[i, b], data["values"][i])
        objective.SetMaximization()





        start_time=time.time()
        status = solver.Solve()

        if status == pywraplp.Solver.OPTIMAL:

            unused_items = [i for i in data["all_items"] if all(x[i, b].solution_value() == 0 for b in data["all_bins"])]


            used_bins = [b for b in data["all_bins"] if any(x[i, b].solution_value() > 0 for i in data["all_items"])]
            if len(unused_items) != 0:
                multiplier += 1

            if len(unused_items) == 0:


                print(f"Número de camiones requeridos: {len(used_bins)}\n")

                print(f"Valor total empaquetado: {round(objective.Value(), 2)}\n")

                max_value_bin = None
                max_value = 0
                total_weight = 0
                total_value = 0
                for b in used_bins:
                    print(f"*** CAMIÓN # {b+1} ***")
                    print(f'Capacidad: {data["bin_capacities"][b]}')
                    bin_weight = 0
                    bin_value = 0
                    packed_items = []
                    for i in data["all_items"]:
                        if x[i, b].solution_value() > 0:
                            packed_items.append(str(clusters[clave][i]+1))
                            bin_weight += data["weights"][i]
                            bin_value += data["values"][i]
                    print(f"Clientes empacados: {', '.join(packed_items)}")
                    print(f"Volumen empaquetado en el camión: {round(bin_weight, 4)}\n")
                    #print(f"Valor empaquetado del camión: {bin_value}\n")
                    total_value += bin_value
                    total_weight += bin_weight
                    if bin_value > max_value:
                        max_value = bin_value
                        max_value_bin = b
                    chamions.append([int(x) for x in packed_items])

                trucks_used += len(used_bins)
                incomplete = False

            # Print solution for the new bin (if exists)



        else:
            print("No existe solución óptima.")
        
my_dict = dict(zip(range(trucks_used), chamions))
mini_tcps = []
for i in my_dict:
    i = list(my_dict[i]) + [0]
    
    mini_tcps.append(result_array[i,:][:,i])
print(mini_tcps[3])

end_time=time.time()-start_time

print(f"Camiones totales usados: {trucks_used}")
print(f"{end_time} segundos")

**********CLUSTER # 1**********

Número de camiones requeridos: 2

Valor total empaquetado: 38.14

*** CAMIÓN # 1 ***
Capacidad: 27
Clientes empacados: 1, 3, 6, 7, 9, 12, 15, 19, 20, 22, 32, 33, 41, 44, 45, 46, 47, 48, 49, 51
Volumen empaquetado en el camión: 22.1164

*** CAMIÓN # 2 ***
Capacidad: 27
Clientes empacados: 52, 58, 59, 62, 63, 65, 67, 72, 74, 78, 79, 80, 85, 102, 103, 104
Volumen empaquetado en el camión: 16.0235

**********CLUSTER # 2**********

Número de camiones requeridos: 2

Valor total empaquetado: 29.08

*** CAMIÓN # 1 ***
Capacidad: 27
Clientes empacados: 2, 4, 10, 13, 14, 16, 23, 29, 31, 36, 39, 40, 42, 54, 56, 57, 61, 64, 68, 73
Volumen empaquetado en el camión: 18.2341

*** CAMIÓN # 2 ***
Capacidad: 27
Clientes empacados: 81, 82, 84, 87, 88, 96, 97, 99, 100, 101
Volumen empaquetado en el camión: 10.8412

**********CLUSTER # 3**********

Número de camiones requeridos: 1

Valor total empaquetado: 9.64

*** CAMIÓN # 1 ***
Capacidad: 27
Clientes empacados: 8, 21, 24

In [44]:
my_dict = dict(zip(range(trucks_used), chamions))

In [45]:
mini_tcps = []
for i in my_dict:
    i = list(my_dict[i]) + [0]

    mini_tcps.append(result_array[i,:][:,i])
mini_tcps[3]

array([[   0, 1440,  960,  420,  960,  600,  780,  420, 1680,  420,  900],
       [1560,    0, 1800, 1680, 1980, 1860, 1920, 1680,  480, 1680, 2160],
       [1020, 1500,    0, 1140, 1440, 1320, 1380, 1140, 1740, 1140, 1560],
       [ 360, 1560, 1080,    0,  840,  600,  720,  360, 1800,    0,  780],
       [ 900, 1920, 1500,  780,    0,  600,  600,  720, 2160,  780,  780],
       [ 600, 1620, 1140,  480,  660,    0,  540,  360, 1860,  480,  660],
       [ 840, 1800, 1320,  720,  600,  600,    0,  420, 2040,  720,  780],
       [ 540, 1560, 1080,  420,  720,  480,  420,    0, 1800,  420,  780],
       [1680,  720, 1920, 1860, 2100, 2040, 2100, 1800,    0, 1860, 2280],
       [ 360, 1560, 1080,    0,  840,  600,  720,  360, 1800,    0,  780],
       [ 720, 1860, 1440,  600,  840,  480,  780,  660, 2100,  600,    0]],
      dtype=int64)