In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from unidecode import unidecode

In [3]:
def fix_column_names(df):
    dict_fix = {'.':' ','-':' ','/':' ',' ':'_'}
    column_names = df.columns
    new_column_names = []
    for col in column_names:
        # Remove acentos e substitui caracteres especiais
        fixed_col = unidecode(col)
        for char in dict_fix:
            fixed_col = fixed_col.replace(char, dict_fix[char])
        new_column_names.append(fixed_col)
    return new_column_names

In [4]:
# importing data
df_base_article = pd.read_excel("BaseArticle.xlsx")
df_base_prev = pd.read_excel('Base ventes et prévisions.xlsx')
df_data_modele = pd.read_excel('Data Modèle.xlsx', sheet_name = 'Stocks')
df_stock_central = pd.read_excel('Stock Central.xlsx')
df_stock_local = pd.read_excel('Stock Local.xlsx')

In [5]:
# cleaning column names
df_base_article.columns = fix_column_names(df_base_article)
df_base_article = df_base_article.rename(columns={'Classe_':'Classe'})

df_base_prev.columns = fix_column_names(df_base_prev)

df_data_modele = fix_column_names(df_data_modele)

df_stock_central.columns = fix_column_names(df_stock_central)

df_stock_local = fix_column_names(df_stock_local)

In [6]:
# making that every negative number on Historique vente is 0
df_base_prev.loc[df_base_prev.query('Historique_vente<0').index,'Historique_vente'] = 0

In [48]:
# calculating the mean value of forecast respecting the product and the entrepot
mean_forecast = df_base_prev.groupby(['Product','Subsidiary'])[['Forecast_M_3']].mean().reset_index()
df_merge_forecast = pd.merge(mean_forecast, df_base_prev, how='left')

# joining dataframes
df_merge_forecast = pd.merge(mean_forecast, df_base_prev, how='left', on=['Product','Subsidiary'])

# attributing the mean value calculated to nan
condition = df_merge_forecast['Forecast_M_3_y'].isna() & ~df_merge_forecast['Forecast_M_3_x'].isna()
df_merge_forecast.loc[condition, 'Forecast_M_3_y'] = df_merge_forecast.loc[condition, 'Forecast_M_3_x']

# renaming columns
df_merge_forecast = df_merge_forecast.drop('Forecast_M_3_x', axis=1).rename(columns={'Forecast_M_3_y':'Forecast'})

# calculating the standard deviation, in respect to the product and the entrepot
df_merge_forecast['Difference'] = df_merge_forecast['Historique_vente'] - df_merge_forecast['Forecast']
df_std = df_merge_forecast.groupby(['Product','Subsidiary'])[['Difference']].std().reset_index().rename(columns={'Difference':'Std_deviation'})
df_merge_forecast = pd.merge(df_merge_forecast, df_std, on=['Subsidiary','Product']).drop('Difference', axis=1)

In [56]:
df_merge_forecast

Unnamed: 0,Product,Subsidiary,Mois,Brand,Range,Product_1,Category,Historique_vente,Forecast,Std_deviation_x,Std_deviation_y
0,1000001,Belgium,2018-09-01,MARQUE B,Parfum,PRODUIT 1000001,Produit vente,1925.34,2265.105882,2445.669201,2445.669201
1,1000001,Belgium,2018-06-01,MARQUE B,Parfum,PRODUIT 1000001,Produit vente,316.00,360.000000,2445.669201,2445.669201
2,1000001,Belgium,2018-04-01,MARQUE B,Parfum,PRODUIT 1000001,Produit vente,21606.02,29059.155168,2445.669201,2445.669201
3,1000001,Belgium,2018-08-01,MARQUE B,Parfum,PRODUIT 1000001,Produit vente,314.00,360.000000,2445.669201,2445.669201
4,1000001,Belgium,2018-12-01,MARQUE B,Parfum,PRODUIT 1000001,Produit vente,2040.15,2180.046000,2445.669201,2445.669201
...,...,...,...,...,...,...,...,...,...,...,...
26071,6000001,Belgium,2018-03-01,MARQUE B,Soin corps,PRODUIT 6000001,Echantillon,0.00,0.000000,0.000000,0.000000
26072,6000001,France,2018-12-01,MARQUE B,Soin corps,PRODUIT 6000001,Echantillon,0.00,0.000000,0.000000,0.000000
26073,6000001,France,2018-01-01,MARQUE B,Soin corps,PRODUIT 6000001,Echantillon,0.00,0.000000,0.000000,0.000000
26074,6000001,France,2018-02-01,MARQUE B,Soin corps,PRODUIT 6000001,Echantillon,0.00,0.000000,0.000000,0.000000
