In [126]:
import pandas as pd
from datetime import datetime
from datetime import date
import json
import math
import xlrd
import numpy as np
from scipy.stats import norm as scipynorm

pd.set_option('display.float_format','{:.4f}'.format)

hubs_dict = {
    'ARICA': 'IQUIQUE',
    'IQUIQUE': 'IQUIQUE',
    'ANTOFAGASTA': 'ANTOFAGASTA',
    'COPIAPO': 'COPIAPO',
    'COQUIMBO': 'COQUIMBO',
    'OVALLE': 'COQUIMBO',
    'ILLAPEL': 'CURAUMA',
    'LLAY LLAY': 'CURAUMA',
    'CURAUMA': 'CURAUMA',
    'SANTIAGO SUR': 'SANTIAGO SUR',
    'RANCAGUA': 'SANTIAGO SUR',
    'TALCA': 'TALCA',
    'CHILLAN': 'TALCAHUANO',
    'TALCAHUANO': 'TALCAHUANO',
    'LOS ANGELES': 'TALCAHUANO',
    'TEMUCO': 'TEMUCO',
    'VALDIVIA': 'VALDIVIA',
    'OSORNO': 'VALDIVIA',
    'PUERTO MONTT': 'PUERTO MONTT',
    'CASTRO': 'PUERTO MONTT',
    'COYHAIQUE': 'COYHAIQUE',
    'CALAMA': 'ANTOFAGASTA'
}

def stats(datos, confianza, lead_time):
    
    t = scipynorm.ppf(confianza)

    mean_no_group = datos.groupby(by=['ID_SKU_VENTA', 'DESCR_CENDIST']).mean().reset_index().rename(columns={'Venta en pallets': 'MEDIA'})
    stdev_no_group = datos.groupby(by=['ID_SKU_VENTA', 'DESCR_CENDIST']).std().reset_index().rename(columns={'Venta en pallets': 'STD'})

    mean_no_group['MEDIA'] = mean_no_group.apply(lambda x: x.MEDIA * lead_time, axis=1)
    stdev_no_group['STD'] = stdev_no_group.apply(lambda x: x.STD * lead_time, axis=1)

    data_completa = mean_no_group.merge(stdev_no_group, on=['ID_SKU_VENTA', 'DESCR_CENDIST'])

    data_completa['COEF_VAR'] = data_completa.apply(lambda x: x['STD'] / x['MEDIA'] if x['MEDIA'] != 0 else 0, axis=1)

    data_completa['SS'] = data_completa.apply(lambda x: confianza * x['STD'], axis=1)

    data_completa['STOCK_TEORICO'] = data_completa.apply(lambda x: x['MEDIA'] + x['SS'], axis=1)

    data_completa['PORCENTAJE_SS_DEL_TOTAL'] = data_completa.apply(lambda x: x['SS'] / x['STOCK_TEORICO'] if x['MEDIA'] != 0 else 0, axis=1)

    porcentaje_ss_del_total = data_completa['SS'].sum() / data_completa['STOCK_TEORICO'].sum()
    
    return data_completa, porcentaje_ss_del_total

restricted = False
porcentaje = 100
lead_time = 7

In [127]:
datos_limpios = pd.read_csv('../data/datos_limpios_filtrados.csv')
datos_limpios = datos_limpios[(datos_limpios['DESCR_CENDIST'] != 'CERVECERA') & (datos_limpios['DESCR_CENDIST'] != 'MODELO')]
datos_limpios = datos_limpios.drop(columns='Unnamed: 0')

In [193]:
confianza_inicial = 0.95
stats_no_agrupados, porcentaje_ss_no_agrupado = stats(datos_limpios, confianza_inicial, lead_time)

In [194]:
if restricted:    
    FILE_AGRUPACIONES = f'../data/skuUnification/agrupacion_sku_{porcentaje}%.xlsx'
    sheets_dict = pd.read_excel(FILE_AGRUPACIONES, sheet_name=None)

    master = pd.DataFrame()
    for name in sheets_dict:

        df = sheets_dict[name]
        master = pd.concat([master, df])

    dict_sku_cd = {}
    master = master.rename(columns={'SKU': 'ID_SKU_VENTA', 'CD': 'DESCR_CENDIST'})

    for index, row in master.iterrows():

        if row['CANTIDAD HUB 3R'] != 0:
            dict_sku_cd[(row['ID_SKU_VENTA'], row['DESCR_CENDIST'])] = 1

        else:
            dict_sku_cd[(row['ID_SKU_VENTA'], row['DESCR_CENDIST'])] = 0
            
    assign_hub = (lambda x: x['DESCR_CENDIST'] if (x['ID_SKU_VENTA'], x['DESCR_CENDIST']) in \
                            dict_sku_cd and dict_sku_cd[(x['ID_SKU_VENTA'], x['DESCR_CENDIST'])] == 0 \
                            else hubs_dict[x['DESCR_CENDIST']])

else:
    assign_hub = lambda x: hubs_dict[x['DESCR_CENDIST']]
    
datos_para_agrupar = datos_limpios.copy()
datos_para_agrupar['DESCR_CENDIST'] = datos_para_agrupar.apply(assign_hub, axis=1)
datos_agrupados = datos_para_agrupar.groupby(by=['ID_SKU_VENTA', 'DESCR_CENDIST', 'FECHA']).sum().reset_index()

In [195]:
confianza_final = 0.9

stats_agrupados, porcentaje_ss_agrupado = stats(datos_agrupados, confianza_final, lead_time)
# stats_agrupados

In [132]:
demandas = stats_agrupados.groupby(by=['ID_SKU_VENTA']).sum().reset_index()[['ID_SKU_VENTA', 'MEDIA']]
# demandas

Unnamed: 0,ID_SKU_VENTA,MEDIA
0,515,9.7609
1,592,163.0098
2,595,23.4087
3,622,213.7829
4,763,17.5732
...,...,...
1346,875220,2.8157
1347,875225,0.2179
1348,875359,0.0084
1349,875365,0.0252


In [133]:
no_agrupado_agregado = stats_no_agrupados.groupby(by=['DESCR_CENDIST']).sum()[['MEDIA', 'SS', 'STOCK_TEORICO']]
no_agrupado_agregado = no_agrupado_agregado.reset_index()
# no_agrupado_agregado

Unnamed: 0,DESCR_CENDIST,MEDIA,SS,STOCK_TEORICO
0,ANTOFAGASTA,594.1741,1148.8482,1743.0223
1,ARICA,293.7469,615.0635,908.8104
2,CALAMA,299.2557,604.7845,904.0402
3,CASTRO,174.2444,385.7135,559.958
4,CHILLAN,516.2376,1034.4513,1550.6889
5,COPIAPO,426.2447,866.0754,1292.3201
6,COQUIMBO,627.7351,1287.6912,1915.4263
7,COYHAIQUE,123.4891,285.3772,408.8663
8,CURAUMA,1774.7444,3020.262,4795.0065
9,ILLAPEL,100.1641,276.1314,376.2956


In [134]:
agrupado_agregado = stats_agrupados.groupby(by=['DESCR_CENDIST']).sum()[['MEDIA', 'SS', 'STOCK_TEORICO']]
agrupado_agregado = agrupado_agregado.reset_index()
# agrupado_agregado

Unnamed: 0,DESCR_CENDIST,MEDIA,SS,STOCK_TEORICO
0,ANTOFAGASTA,893.4298,1515.8061,2409.236
1,COPIAPO,426.2447,820.4925,1246.7372
2,COQUIMBO,809.7393,1507.3209,2317.0602
3,COYHAIQUE,123.4891,270.3574,393.8465
4,CURAUMA,2610.0967,4152.1336,6762.2302
5,IQUIQUE,720.5237,1276.1933,1996.717
6,PUERTO MONTT,650.6805,1148.158,1798.8386
7,SANTIAGO SUR,3249.734,5164.4733,8414.2073
8,TALCA,1004.3075,1760.1965,2764.504
9,TALCAHUANO,2270.0388,3700.2315,5970.2702


In [135]:
if restricted:
    diff = no_agrupado_agregado.merge(agrupado_agregado, on=['DESCR_CENDIST'], how='outer')
    diff = diff.fillna(0)
    diff['DELTA'] = diff['STOCK_TEORICO_y'] - diff['STOCK_TEORICO_x']
    deltaStocks = diff[['DESCR_CENDIST', 'DELTA']]
    
    OUTPUT_FILE_STOCKS = f'../data/Agrupación {porcentaje}%/inventario_final_agrupacion_{porcentaje}.csv'
    
    
    OUTPUT_FILE_DELTAS = f'../data/Agrupación {porcentaje}%/deltas_inventario_agrupacion_{porcentaje}.csv'
    deltaStocks.to_csv(OUTPUT_FILE_DELTAS)

In [136]:
if not restricted:
    hubs = list(set(hubs_dict.values()))

    diff = no_agrupado_agregado.merge(agrupado_agregado, how='outer', on=['DESCR_CENDIST'])
    diff = diff.fillna(0)
    diff['DELTA'] = diff['STOCK_TEORICO_y'] - diff['STOCK_TEORICO_x']

    diff['NEW_STOCK'] = 0

    diff['NEW_STOCK_CD'] = diff['MEDIA_x']
    diff['NEW_STOCK_HUB'] = diff['MEDIA_x'] + diff['SS_y']

    diff = diff[['DESCR_CENDIST', 'NEW_STOCK_HUB', 'NEW_STOCK_CD']]

    hubsStocks = diff[diff['DESCR_CENDIST'].isin(hubs)].reset_index()
    hubsStocks = hubsStocks[['DESCR_CENDIST', 'NEW_STOCK_HUB']]
    hubsStocks = hubsStocks.rename(columns={'NEW_STOCK_HUB': 'STOCK'})
    # hubsStocks

    cdStocks = diff[~diff['DESCR_CENDIST'].isin(hubs)].reset_index()
    cdStocks = cdStocks[['DESCR_CENDIST', 'NEW_STOCK_CD']]
    cdStocks = cdStocks.rename(columns={'NEW_STOCK_CD': 'STOCK'})
    # cdStocks

    newStocks = pd.concat([hubsStocks, cdStocks])
    newStocks = newStocks.sort_values(by=['DESCR_CENDIST'])
    # newStocks
    
    OUTPUT_FILE_STOCKS = f'../data/Agrupacion total variabilidad/inventario_final_agrupacion_variabilidad_total_{confianza}_confianza_final.csv'
    newStocks.to_csv()

    oldStocks = no_agrupado_agregado[['DESCR_CENDIST', 'STOCK_TEORICO']]
    oldStocks = oldStocks.rename(columns={'STOCK_TEORICO': 'STOCK'})
    # oldStocks

    deltaStocks = oldStocks.merge(newStocks, on='DESCR_CENDIST')
    deltaStocks['DELTA'] = deltaStocks['STOCK_y'] - deltaStocks['STOCK_x'] 
    deltaStocks
    deltaStocks = deltaStocks[['DESCR_CENDIST', 'DELTA']]
    #deltaStocks
    
    OUTPUT_FILE_DELTAS = f'../data/Agrupacion total variabilidad/deltas_inventario_agrupacion_variabilidad_total_{confianza}_confianza_final.csv'
    deltaStocks.to_csv(OUTPUT_FILE_DELTAS)

'''
#########################################################################
'''

'''
#########################################################################
'''

In [164]:
stats_no_agrupados

Unnamed: 0,ID_SKU_VENTA,DESCR_CENDIST,MEDIA,STD,COEF_VAR,SS,STOCK_TEORICO,PORCENTAJE_SS_DEL_TOTAL
0,515,ANTOFAGASTA,0.4005,0.7071,1.7653,0.6717,1.0722,0.6265
1,515,ARICA,0.0419,0.1407,3.3622,0.1337,0.1756,0.7616
2,515,CALAMA,0.2196,0.4439,2.0217,0.4218,0.6413,0.6576
3,515,CASTRO,0.1207,0.3007,2.4909,0.2857,0.4064,0.7029
4,515,CHILLAN,0.2766,0.5116,1.8497,0.4860,0.7626,0.6373
...,...,...,...,...,...,...,...,...
23007,875225,VALDIVIA,0.0003,0.0041,14.6616,0.0039,0.0042,0.9330
23008,875359,SANTIAGO SUR,0.0084,0.2427,28.8444,0.2305,0.2390,0.9648
23009,875365,CURAUMA,0.0168,0.3430,20.3838,0.3258,0.3427,0.9509
23010,875365,SANTIAGO SUR,0.0084,0.2427,28.8444,0.2305,0.2390,0.9648


In [188]:
finalStocks = stats_agrupados.merge(stats_no_agrupados, on=['ID_SKU_VENTA', 'DESCR_CENDIST'], how='outer')
finalStocks.fillna(0, inplace=True)

In [189]:
hubsSet = set(hubs_dict.values())
if not restricted:
    finalStocks.rename(columns={'STOCK_TEORICO_x': 'STOCK_POST', 'STOCK_TEORICO_y': 'STOCK_PRE'}, inplace=True)
    finalStocks.rename(columns={'MEDIA_y': 'MEDIA_PRE', 'SS_x': 'SS_POST', 'SS_y': 'SS_PRE'}, inplace=True)
    finalStocks.rename(columns={'STD_y': 'STD_DEMANDA'}, inplace=True)
    
    finalStocks['NEW_STOCK_CD'] = finalStocks['MEDIA_PRE']
    finalStocks['NEW_STOCK_HUB'] = finalStocks['MEDIA_PRE'] + finalStocks['SS_POST']
    finalStocks = finalStocks[['ID_SKU_VENTA', 'DESCR_CENDIST', 'STOCK_PRE', 'NEW_STOCK_CD', 'NEW_STOCK_HUB', 'STD_DEMANDA']]
    
    hubsNewStocks = finalStocks[finalStocks['DESCR_CENDIST'].isin(hubsSet)].reset_index()
    hubsNewStocks = hubsNewStocks[['ID_SKU_VENTA', 'DESCR_CENDIST', 'STOCK_PRE', 'NEW_STOCK_HUB', 'STD_DEMANDA']]
    hubsNewStocks = hubsNewStocks.rename(columns={'NEW_STOCK_HUB': 'STOCK_POST'})
    # hubsStocks

    cdNewStocks = finalStocks[~finalStocks['DESCR_CENDIST'].isin(hubsSet)].reset_index()
    cdNewStocks = cdNewStocks[['ID_SKU_VENTA', 'DESCR_CENDIST', 'STOCK_PRE', 'NEW_STOCK_CD', 'STD_DEMANDA']]
    cdNewStocks = cdNewStocks.rename(columns={'NEW_STOCK_CD': 'STOCK_POST'})
    # cdStocks

    newFullStocks = pd.concat([hubsNewStocks, cdNewStocks])
    newFullStocks = newFullStocks.sort_values(by=['ID_SKU_VENTA', 'DESCR_CENDIST'])

In [191]:
newFullStocks['DELTA'] = newFullStocks['STOCK_POST'] - newFullStocks['STOCK_PRE']
# newFullStocks

Unnamed: 0,ID_SKU_VENTA,DESCR_CENDIST,STOCK_PRE,STOCK_POST,STD_DEMANDA,DELTA
0,515,ANTOFAGASTA,1.0722,1.3043,0.7071,0.2320
0,515,ARICA,0.1756,0.0419,0.1407,-0.1337
1,515,CALAMA,0.6413,0.2196,0.4439,-0.4218
2,515,CASTRO,0.4064,0.1207,0.3007,-0.2857
3,515,CHILLAN,0.7626,0.2766,0.5116,-0.4860
...,...,...,...,...,...,...
13009,875225,VALDIVIA,0.0042,0.0185,0.0041,0.0143
13010,875359,SANTIAGO SUR,0.2390,0.2268,0.2427,-0.0121
13011,875365,CURAUMA,0.3427,0.3255,0.3430,-0.0171
13012,875365,SANTIAGO SUR,0.2390,0.2268,0.2427,-0.0121


In [156]:
groupedNewFullStocks = newFullStocks.groupby(by=['DESCR_CENDIST']).sum().reset_index()
# groupedNewFullStocks

Unnamed: 0,DESCR_CENDIST,ID_SKU_VENTA,STOCK_PRE,STOCK_POST,DELTA
0,ANTOFAGASTA,559221277,1743.0223,2109.9803,366.958
1,ARICA,471467173,908.8104,293.7469,-615.0635
2,CALAMA,530462498,904.0402,299.2557,-604.7845
3,CASTRO,490751609,559.958,174.2444,-385.7135
4,CHILLAN,536187220,1550.6889,516.2376,-1034.4513
5,COPIAPO,541405324,1292.3201,1246.7372,-45.5829
6,COQUIMBO,546372534,1915.4263,2135.0559,219.6296
7,COYHAIQUE,457141296,408.8663,393.8465,-15.0199
8,CURAUMA,624978552,4795.0065,5926.878,1131.8715
9,ILLAPEL,439036582,376.2956,100.1641,-276.1314


In [159]:
newFullStocks.to_excel('stats_agrupacion_variabilidad_90_conf.xlsx')
groupedNewFullStocks.to_excel('stats_agregados_agrupacion_variabilidad_90_conf.xlsx')

In [None]:
newFullStocks['DELTA'].sum() / newFullStocks['STOCK_PRE'].sum()