In [1]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from datetime import datetime, timedelta
from collections import OrderedDict 
import sys
import re

In [2]:
# read file 'ktotal_contratos'
df_contratos = pd.read_csv('ktotal_contratos.csv')
# clean data
### format col names 
df_contratos.columns = df_contratos.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('\(', '_').str.replace('\)', '_')
### select and rename relevant cols
df_contratos = df_contratos[['no._carro', 'fecha_contratacion','fecha_de_entrega_auto']]
df_contratos = df_contratos.rename(columns={'no._carro': 'stockid', 'fecha_contratacion':'date_sale_ktotal','fecha_de_entrega_auto':'date_delivery'})
### format date cols
df_contratos['date_sale_ktotal'] = pd.to_datetime(df_contratos['date_sale_ktotal'])
df_contratos['date_delivery'] = pd.to_datetime(df_contratos['date_delivery'])
# examine data
print(df_contratos.shape)
df_contratos.head()

(1275, 3)


Unnamed: 0,stockid,date_sale_ktotal,date_delivery
0,16680,2019-12-05 00:00:00,2019-11-25
1,13139,2019-12-05 00:00:00,2019-11-23
2,13571,2019-12-05 00:00:00,2019-11-24
3,16151,2019-12-06 17:06:25,2019-12-06
4,17324,2020-01-23 00:00:00,2020-01-20


In [3]:
# read file 'entregas'
df_entregas = pd.read_csv("entregas.csv")
# clean data
### format col names
df_entregas.columns = df_entregas.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('\(', '_').str.replace('\)', '_')
### select and rename relevant cols
df_entregas = df_entregas[['estimate_asociado','auto', 'tipo_de_lugar_destino', 'fecha_de_entrega_agendada', 'hora']]
df_entregas = df_entregas.rename(columns={'estimate_asociado': 'estimate', 'auto':'stockid', 'tipo_de_lugar_destino':'location','fecha_de_entrega_agendada':'date_delivery', 'hora':'hour'})
### format and filter by date col
df_entregas['date_delivery'] = pd.to_datetime(df_entregas['date_delivery'])
min_date = df_contratos['date_delivery'].min()
df_entregas = df_entregas[df_entregas['date_delivery'] >= min_date]
### format hour col
def extr_hour(hr):
    return re.findall('[\d]+(?=:)', str(hr))
df_entregas['hour'] = df_entregas['hour'].apply(extr_hour)
### format estimate col
df_entregas['estimate'] = df_entregas['estimate'].str[10:]
### format stockid col
def extr_stockid(auto):
    return re.findall('[\d]+', auto)[0]
df_entregas['stockid'] = df_entregas['stockid'].apply(extr_stockid)
# examine data
print(df_entregas.shape)
df_entregas.head()

(9635, 5)


Unnamed: 0,estimate,stockid,location,date_delivery,hour
1069,337693,3201,CASA CLIENTE,2019-12-01,[9]
1070,329772,3269,CASA CLIENTE,2019-12-01,[10]
1071,336821,3131,WH - LERMA,2019-12-01,[11]
1072,295121,2931,FLORENCIA,2019-12-01,[11]
1073,337623,3004,FLORENCIA,2019-12-01,[11]


In [23]:
# Transform 'entregas' to include ktotal col
ktotal = np.zeros(len(df_entregas))
for i in range(len(df_contratos)):
    print(i)
    indcs = [ind for ind, x in enumerate(df_entregas['stockid']) 
               if int(x) == int(df_contratos['stockid'].iloc[i])]
    print(indcs)
    for j in indcs:
        print(df_contratos['date_delivery'].iloc[i].date())
        print(df_entregas['date_delivery'].iloc[j].date())
        print(df_entregas['date_delivery'].iloc[j].date() == df_contratos['date_delivery'].iloc[i].date())
        if df_contratos['date_delivery'].iloc[i].date() == df_entregas['date_delivery'].iloc[j].date():
            ktotal[j] = 1
df_entregas['ktotal'] = ktotal

0
[570]
2019-11-25
2019-11-25
True
1
[538]
2019-11-23
2019-11-23
True
2
[498]
2019-11-24
2019-11-22
False
3
[]
4
[1847]
2020-01-20
2020-01-20
True
5
[1185]
2019-12-28
2019-12-29
False
6
[1968]
2020-01-27
2020-01-24
False
7
[2097]
2020-01-28
2020-01-28
True
8
[2088]
2020-01-28
2020-01-28
True
9
[2085]
2020-01-28
2020-01-28
True
10
[2068]
2020-01-28
2020-01-28
True
11
[2074]
2020-01-28
2020-01-28
True
12
[2078]
2020-01-28
2020-01-28
True
13
[]
14
[]
15
[2118]
2020-01-29
2020-01-29
True
16
[2115]
2020-01-29
2020-01-29
True
17
[2146]
2020-01-30
2020-01-30
True
18
[2153]
2020-01-30
2020-01-30
True
19
[2081]
2020-01-30
2020-01-28
False
20
[]
21
[271]
2019-11-13
2019-11-13
True
22
[]
23
[2331]
2020-02-06
2020-06-02
False
24
[285]
2019-11-15
2019-11-14
False
25
[2289]
2020-02-05
2020-05-02
False
26
[2355]
2020-02-07
2020-07-02
False
27
[549]
2019-11-24
2019-11-24
True
28
[636]
2019-11-28
2019-11-28
True
29
[274]
2019-11-13
2019-11-13
True
30
[]
31


KeyboardInterrupt: 