In [163]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

In [164]:
# Adaptar a las rutas de los archivos
eventos = pd.read_json("../../simulation.jsonlines", lines=True)
planes = pd.read_json("../../plans.jsonlines", lines=True)

In [165]:
eventos.eventType.unique()

array(['Truck departed', 'Truck initialized', 'Truck received packets',
       'Truck arrived', 'Truck started delivering',
       'Truck ended delivering', 'Truck departed to depot',
       'Truck ended route'], dtype=object)

In [166]:
eventos.columns

Index(['eventDescription', 'eventTime', 'eventType', 'simulationId',
       'truckId'],
      dtype='object')

In [167]:
camiones = []
for simId in planes.simulationId.unique():
    for truck in planes[planes.simulationId == simId].trucks.values[0]:
        camiones.append(pd.DataFrame(truck["route"]).assign(simulationId=simId, truckId=truck["truck_id"]))
camiones = pd.concat(camiones)

In [168]:
tiempos_plan = camiones.sort_values(["simulationId","truckId"]).assign(duration=lambda x: x["duration"]*1000).groupby(["simulationId","truckId"]).duration.agg(list).reset_index()
tiempos_plan.rename(columns={"duration":"tiempo_plan"}, inplace=True)
tiempos_plan

Unnamed: 0,simulationId,truckId,tiempo_plan
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,"[614000, 397000, 259000, 349000, 384000, 26800..."
1,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3953RLD,"[4164000, 4064000]"
2,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5030LXK,"[189000, 1522000, 1655000, 1125000, 2093000, 6..."
3,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5534TPH,"[630000, 496000, 725000, 680000, 491000, 53400..."
4,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,6270NFM,"[621000, 130000, 294000, 74000, 403000]"
...,...,...,...
635,fea13535-ade6-4215-96ca-dab5b4ef309b,0013DYS,"[3714000, 3663000]"
636,fea13535-ade6-4215-96ca-dab5b4ef309b,3953RLD,"[208000, 670000, 260000, 492000, 178000, 79900..."
637,fea13535-ade6-4215-96ca-dab5b4ef309b,5534TPH,"[589000, 909000, 725000, 455000, 297000, 82900..."
638,fea13535-ade6-4215-96ca-dab5b4ef309b,6270NFM,"[445000, 223000, 219000, 408000, 1538000, 1591..."


In [169]:
eventos = eventos.sort_values(["simulationId", "truckId", "eventTime","eventType"])
eventos["prev_event"] = eventos.groupby(["truckId", "simulationId"])["eventType"].shift(1)
eventos["prev_time"] = eventos.groupby(["truckId", "simulationId"])["eventTime"].shift(1)
eventos["delta"] = eventos.eventTime - eventos.prev_time 
tiempos_sim = eventos[eventos.eventType.isin(["Truck arrived", "Truck ended route"])].sort_values(["simulationId","truckId", "eventTime"]).groupby(["simulationId","truckId"]).delta.agg(list).reset_index()
tiempos_sim.rename(columns={"delta":"tiempo_sim"}, inplace=True)
tiempos_sim

Unnamed: 0,simulationId,truckId,tiempo_sim
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,"[750960.0, 482520.0, 311850.0, 418680.0, 45882..."
1,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3953RLD,"[4636050.0, 4524090.0]"
2,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5030LXK,"[231120.0, 1811130.0, 1950900.0, 1300470.0, 24..."
3,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5534TPH,"[791640.0, 618480.0, 894330.0, 827790.0, 59145..."
4,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,6270NFM,"[776190.0, 161310.0, 357180.0, 88740.0, 474390.0]"
...,...,...,...
635,fea13535-ade6-4215-96ca-dab5b4ef309b,0013DYS,"[3901230.0, 3846930.0]"
636,fea13535-ade6-4215-96ca-dab5b4ef309b,3953RLD,"[252840.0, 801060.0, 310980.0, 583290.0, 21072..."
637,fea13535-ade6-4215-96ca-dab5b4ef309b,5534TPH,"[723990.0, 1106700.0, 871290.0, 545490.0, 3529..."
638,fea13535-ade6-4215-96ca-dab5b4ef309b,6270NFM,"[576960.0, 287250.0, 278220.0, 516150.0, 19308..."


In [170]:
df_plans=pd.merge(tiempos_sim,tiempos_plan,on=['simulationId','truckId'])
df_plans

Unnamed: 0,simulationId,truckId,tiempo_sim,tiempo_plan
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,"[750960.0, 482520.0, 311850.0, 418680.0, 45882...","[614000, 397000, 259000, 349000, 384000, 26800..."
1,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3953RLD,"[4636050.0, 4524090.0]","[4164000, 4064000]"
2,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5030LXK,"[231120.0, 1811130.0, 1950900.0, 1300470.0, 24...","[189000, 1522000, 1655000, 1125000, 2093000, 6..."
3,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5534TPH,"[791640.0, 618480.0, 894330.0, 827790.0, 59145...","[630000, 496000, 725000, 680000, 491000, 53400..."
4,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,6270NFM,"[776190.0, 161310.0, 357180.0, 88740.0, 474390.0]","[621000, 130000, 294000, 74000, 403000]"
...,...,...,...,...
635,fea13535-ade6-4215-96ca-dab5b4ef309b,0013DYS,"[3901230.0, 3846930.0]","[3714000, 3663000]"
636,fea13535-ade6-4215-96ca-dab5b4ef309b,3953RLD,"[252840.0, 801060.0, 310980.0, 583290.0, 21072...","[208000, 670000, 260000, 492000, 178000, 79900..."
637,fea13535-ade6-4215-96ca-dab5b4ef309b,5534TPH,"[723990.0, 1106700.0, 871290.0, 545490.0, 3529...","[589000, 909000, 725000, 455000, 297000, 82900..."
638,fea13535-ade6-4215-96ca-dab5b4ef309b,6270NFM,"[576960.0, 287250.0, 278220.0, 516150.0, 19308...","[445000, 223000, 219000, 408000, 1538000, 1591..."


In [171]:
retrasos = tiempos_sim.merge(tiempos_plan, on=["simulationId","truckId"]).dropna().reset_index(drop=True)
retrasos

Unnamed: 0,simulationId,truckId,tiempo_sim,tiempo_plan
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,"[750960.0, 482520.0, 311850.0, 418680.0, 45882...","[614000, 397000, 259000, 349000, 384000, 26800..."
1,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3953RLD,"[4636050.0, 4524090.0]","[4164000, 4064000]"
2,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5030LXK,"[231120.0, 1811130.0, 1950900.0, 1300470.0, 24...","[189000, 1522000, 1655000, 1125000, 2093000, 6..."
3,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,5534TPH,"[791640.0, 618480.0, 894330.0, 827790.0, 59145...","[630000, 496000, 725000, 680000, 491000, 53400..."
4,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,6270NFM,"[776190.0, 161310.0, 357180.0, 88740.0, 474390.0]","[621000, 130000, 294000, 74000, 403000]"
...,...,...,...,...
635,fea13535-ade6-4215-96ca-dab5b4ef309b,0013DYS,"[3901230.0, 3846930.0]","[3714000, 3663000]"
636,fea13535-ade6-4215-96ca-dab5b4ef309b,3953RLD,"[252840.0, 801060.0, 310980.0, 583290.0, 21072...","[208000, 670000, 260000, 492000, 178000, 79900..."
637,fea13535-ade6-4215-96ca-dab5b4ef309b,5534TPH,"[723990.0, 1106700.0, 871290.0, 545490.0, 3529...","[589000, 909000, 725000, 455000, 297000, 82900..."
638,fea13535-ade6-4215-96ca-dab5b4ef309b,6270NFM,"[576960.0, 287250.0, 278220.0, 516150.0, 19308...","[445000, 223000, 219000, 408000, 1538000, 1591..."


In [172]:
# Suponiendo que el dataframe con los planes se llama df_plans
df_plans["suma_sim"] = df_plans["tiempo_sim"].apply(sum)
df_plans["suma_plan"] = df_plans["tiempo_plan"].apply(sum)
df_plans=df_plans.explode(['tiempo_sim','tiempo_plan'])
df_plans["diferencia_por_elemento"] = df_plans["tiempo_sim"] - df_plans["tiempo_plan"]
df_plans["diferencia_total"] = df_plans["suma_sim"] - df_plans["suma_plan"]
df_plans


Unnamed: 0,simulationId,truckId,tiempo_sim,tiempo_plan,suma_sim,suma_plan,diferencia_por_elemento,diferencia_total
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,750960.0,614000,9477390.0,8192000,136960.0,1285390.0
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,482520.0,397000,9477390.0,8192000,85520.0,1285390.0
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,311850.0,259000,9477390.0,8192000,52850.0,1285390.0
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,418680.0,349000,9477390.0,8192000,69680.0,1285390.0
0,007f3d08-14a7-4a93-af9e-f0d9cfbcea94,3321FBL,458820.0,384000,9477390.0,8192000,74820.0,1285390.0
...,...,...,...,...,...,...,...,...
639,fea13535-ade6-4215-96ca-dab5b4ef309b,6965XLY,198720.0,157000,8528700.0,6916000,41720.0,1612700.0
639,fea13535-ade6-4215-96ca-dab5b4ef309b,6965XLY,372420.0,300000,8528700.0,6916000,72420.0,1612700.0
639,fea13535-ade6-4215-96ca-dab5b4ef309b,6965XLY,3208830.0,2620000,8528700.0,6916000,588830.0,1612700.0
639,fea13535-ade6-4215-96ca-dab5b4ef309b,6965XLY,556620.0,472000,8528700.0,6916000,84620.0,1612700.0


In [173]:
arr = np.array(retrasos.apply(lambda x: list(zip(x.tiempo_plan, x.tiempo_sim)), axis=1).explode())
arr = np.array(arr.tolist())
x = arr[:,0].reshape(-1,1) / 1000
y = arr[:,1].reshape(-1,1) / 1000


In [174]:
# Entrenar modelo
travelModel = LinearRegression()
travelModel.fit(x, y)

# Guardar modelo
with open('travelModel.pkl', 'wb') as f:
    pickle.dump(travelModel, f)


In [175]:
tiemposEntrega = eventos[eventos.eventType=="Truck ended delivering"][["truckId", "delta"]]

# Label encoding
le = LabelEncoder()
tiemposEntrega["truckId"] = le.fit_transform(tiemposEntrega["truckId"])

# Entrenar modelo
deliveryModel = LinearRegression()
deliveryModel.fit(tiemposEntrega["truckId"].values.reshape(-1,1), tiemposEntrega["delta"].values.reshape(-1,1))

# Guardar modelo y label encoder
with open('deliveryModel.pkl', 'wb') as f:
    pickle.dump(deliveryModel, f)
    
with open('le.pkl', 'wb') as f:
    pickle.dump(le, f)
