In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt # basic plotting
import seaborn as sns # for prettier plots
import random as rd # generating random numbers
import datetime # manipulating date formats

from tqdm import tqdm
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import warnings
warnings.filterwarnings("ignore")


In [None]:
# Load sales
sales=pd.read_csv("/kaggle/input/competitive-data-science-predict-future-sales/sales_train.csv")
sales.head()

In [None]:
# you can use this tip to save some memory : #downcast data to save memory : from 134.4+ MB, we went to 61.6+ MB
def downcast_dtypes(df):
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols] = df[int_cols].astype(np.int16)
    return df

print("-- mem before", sales.info())
sales = downcast_dtypes(sales)
print("-- mem after", sales.info())


# TS des produits les plus répandus

pour chaque produit 

- voir les dates min / max 
- le nombre de dates avec des ventes
- par jour
- aggreger par semaine, par mois






In [None]:
from dateutil.parser import parse
sales['rdate'] = sales['date'].apply(lambda d: parse(d))

In [None]:
df = sales.groupby(by = 'item_id').agg({'rdate': ["min","max"]}).reset_index()
df.head()

In [None]:
df['days'] =  df.apply(lambda d : (d['rdate']['max'] - d['rdate']['min']).days   , axis = 1   )

df.head()

In [None]:
def njours(item_id):
    return len(sales[ (sales.item_id == item_id)   ].rdate.unique())

df['njours'] =  df.item_id.apply(lambda id : njours(id) )
df.head()

In [None]:
df.sort_values(by = ['njours', 'days'], ascending = [False, False],  inplace = True)
df.head()

In [None]:
item_id = 5822
# item_id = 12552
df.head(10)

In [None]:
item_ids = df.head(10).item_id.values

def build_ts(item_id):

    df2 = sales[sales.item_id == item_id][['item_cnt_day', 'rdate']].groupby( by = [ 'rdate']  ).sum().reset_index()
    ts = pd.Series(index = df2.rdate, data = df2.item_cnt_day.values)
    return ts


In [None]:
ts = build_ts(item_ids[2])
ts.plot(figsize= (18,6))

# Simple Exponential Smoothing


$ \hat{y}_{t+1} = \alpha . y_t + (1 -\alpha ) . \hat{y}_{t} $


In [None]:
def exponential_smoothing(series, alpha):
    """
        series - dataset with timestamps
        alpha - float [0.0, 1.0], smoothing parameter
    """
    result = [series[0]] # first value is same as series
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return result
    

    
def plotExponentialSmoothing(series, alphas):
    """
        Plots exponential smoothing with different alphas
        
        series - dataset with timestamps
        alphas - list of floats, smoothing parameters
        
    """
    with plt.style.context('seaborn-white'):    
        plt.figure(figsize=(18, 7))
        plt.plot(series.values, "c", label = "Actual", alpha = 0.5)
        for alpha in alphas:
            plt.plot(exponential_smoothing(series, alpha), label="Alpha {}".format(alpha))
        plt.legend(loc="best")
        plt.axis('tight')
        plt.title("Exponential Smoothing")
        plt.grid(True);
        


In [None]:
ts = build_ts(item_ids[9])
plotExponentialSmoothing(ts, [0.3, 0.05])


# Double exponential smoothing


In [None]:
def double_exponential_smoothing(series, alpha, beta):
    """
        series - dataset with timeseries
        alpha - float [0.0, 1.0], smoothing parameter for level
        beta - float [0.0, 1.0], smoothing parameter for trend
    """
    # first value is same as series
    result = [series[0]]
    level_ = [series[0]]
    trend_ = [series[0]]
    for n in range(1, len(series)+1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series): # forecasting
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha*value + (1-alpha)*(level+trend)
        trend = beta*(level-last_level) + (1-beta)*trend
        result.append(level+trend)
        level_.append(level)
        trend_.append(trend)
    return result, level_, trend_

def plotDoubleExponentialSmoothing(series, alphas, betas):
    """
        Plots double exponential smoothing with different alphas and betas
        
        series - dataset with timestamps
        alphas - list of floats, smoothing parameters for level
        betas - list of floats, smoothing parameters for trend
    """
    
    with plt.style.context('seaborn-white'):    
        plt.figure(figsize=(20, 8))
        for alpha in alphas:
            for beta in betas:
                r, l, t = double_exponential_smoothing(series, alpha, beta)
                plt.plot(r, label="Alpha {}, beta {}".format(alpha, beta))
        plt.plot(series.values, label = "Actual", alpha = 0.5, linestyle = '-')
                
                
        plt.legend(loc="best")
        plt.axis('tight')
        plt.title("Double Exponential Smoothing")
        plt.grid(True)
        
        plt.figure(figsize=(20, 8))
#         plt.plot(series.values, label = "Actual", alpha = 0.5, linestyle = '-')
        for alpha in alphas:
            for beta in betas:
                r, l, t = double_exponential_smoothing(series, alpha, beta)
                plt.plot(l, label="Alpha {}, beta {}".format(alpha, beta))
        plt.plot(series.values, label = "Actual", alpha = 0.5, linestyle = '-')
                
                
        plt.legend(loc="best")
        plt.axis('tight')
        plt.title("Double Exponential Smoothing - level")
        plt.grid(True)        

        plt.figure(figsize=(20, 8))
#         plt.plot(series.values, label = "Actual", alpha = 0.5, linestyle = '-')
        for alpha in alphas:
            for beta in betas:
                r, l, t = double_exponential_smoothing(series, alpha, beta)
                plt.plot(t, label="Alpha {}, beta {}".format(alpha, beta))
        plt.plot(series.values, label = "Actual", alpha = 0.5, linestyle = '-')
                
                
        plt.legend(loc="best")
        plt.axis('tight')
        plt.title("Double Exponential Smoothing - trend")
        plt.grid(True)        


In [None]:
ts = build_ts(item_ids[0])
plotDoubleExponentialSmoothing(ts, alphas=[0.9], betas=[0.9])


In [None]:
r, l, t = double_exponential_smoothing(ts, 0.5, 0.5)


In [None]:
l

In [None]:
plotDoubleExponentialSmoothing(ts, alphas=[0.5, 0.02], betas=[0.9])

# With statsmodels



In [None]:
ts = build_ts(item_ids[0])

from statsmodels.tsa.api import SimpleExpSmoothing, ExponentialSmoothing,  Holt

mdl = SimpleExpSmoothing(ts, initialization_method="estimated")
mdl.fit()




In [None]:
plt.figure(figsize=(18, 8))
plt.plot(ts)
plt.plot(mdl.fittedvalues)
# plt.plot(fcast)


In [None]:
mdl.params

In [None]:
fcast = mdl.forecast(12)
fcast

In [None]:

# mdl = SimpleExpSmoothing.best_params()
# print('Best Score: ', grid_result.best_score_)

# Holt double exponential smoothing




In [None]:
ts = build_ts(item_ids[9])

mdl = Holt(ts, initialization_method="estimated", damped_trend=True).fit(optimized = True)

print(mdl.params)



In [None]:
plt.figure(figsize=(18, 8))
plt.plot(ts)
plt.plot(mdl.fittedvalues)

In [None]:
fcast = mdl.forecast(12)
fcast

# Holt Winter

In [None]:
ts = build_ts(item_ids[0])


In [None]:
fit1 = ExponentialSmoothing(ts, seasonal_periods=7, trend='add', seasonal='add', use_boxcox=False, initialization_method="estimated").fit()
fit2 = ExponentialSmoothing(ts, seasonal_periods=7, trend='add', seasonal='mul', use_boxcox=False, initialization_method="estimated").fit()
fit3 = ExponentialSmoothing(ts, seasonal_periods=7, trend='add', seasonal='add', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
fit4 = ExponentialSmoothing(ts, seasonal_periods=7, trend='add', seasonal='mul', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
results=pd.DataFrame(index=[r"$\alpha$",r"$\beta$",r"$\phi$",r"$\gamma$",r"$l_0$","$b_0$","SSE"])
params = ['smoothing_level', 'smoothing_trend', 'damping_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Additive"]       = [fit1.params[p] for p in params] + [fit1.sse]
results["Multiplicative"] = [fit2.params[p] for p in params] + [fit2.sse]
results["Additive Dam"]   = [fit3.params[p] for p in params] + [fit3.sse]
results["Multiplica Dam"] = [fit4.params[p] for p in params] + [fit4.sse]

In [None]:
results

In [None]:
fit1 = ExponentialSmoothing(ts, seasonal_periods=28, trend='add', seasonal='add', use_boxcox=False, initialization_method="estimated").fit()
fit2 = ExponentialSmoothing(ts, seasonal_periods=28, trend='add', seasonal='mul', use_boxcox=False, initialization_method="estimated").fit()
fit3 = ExponentialSmoothing(ts, seasonal_periods=28, trend='add', seasonal='add', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
fit4 = ExponentialSmoothing(ts, seasonal_periods=28, trend='add', seasonal='mul', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
results=pd.DataFrame(index=[r"$\alpha$",r"$\beta$",r"$\phi$",r"$\gamma$",r"$l_0$","$b_0$","SSE"])
params = ['smoothing_level', 'smoothing_trend', 'damping_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Additive"]       = [fit1.params[p] for p in params] + [fit1.sse]
results["Multiplicative"] = [fit2.params[p] for p in params] + [fit2.sse]
results["Additive Dam"]   = [fit3.params[p] for p in params] + [fit3.sse]
results["Multiplica Dam"] = [fit4.params[p] for p in params] + [fit4.sse]
results

In [None]:
fit1 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='add', use_boxcox=False, initialization_method="estimated").fit()
fit2 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='mul', use_boxcox=False, initialization_method="estimated").fit()
fit3 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='add', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
fit4 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='mul', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit()
results=pd.DataFrame(index=[r"$\alpha$",r"$\beta$",r"$\phi$",r"$\gamma$",r"$l_0$","$b_0$","SSE"])
params = ['smoothing_level', 'smoothing_trend', 'damping_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Additive"]       = [fit1.params[p] for p in params] + [fit1.sse]
results["Multiplicative"] = [fit2.params[p] for p in params] + [fit2.sse]
results["Additive Dam"]   = [fit3.params[p] for p in params] + [fit3.sse]
results["Multiplica Dam"] = [fit4.params[p] for p in params] + [fit4.sse]
results

In [None]:
fit1 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='add', use_boxcox=False, initialization_method="estimated").fit(smoothing_trend = 0)
fit2 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='mul', use_boxcox=False, initialization_method="estimated").fit(smoothing_trend = 0)
fit3 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='add', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit(smoothing_trend = 0)
fit4 = ExponentialSmoothing(ts, seasonal_periods=365, trend='add', seasonal='mul', damped_trend=True, use_boxcox=False, initialization_method="estimated").fit(smoothing_trend = 0)
results=pd.DataFrame(index=[r"$\alpha$",r"$\beta$",r"$\phi$",r"$\gamma$",r"$l_0$","$b_0$","SSE"])
params = ['smoothing_level', 'smoothing_trend', 'damping_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Additive"]       = [fit1.params[p] for p in params] + [fit1.sse]
results["Multiplicative"] = [fit2.params[p] for p in params] + [fit2.sse]
results["Additive Dam"]   = [fit3.params[p] for p in params] + [fit3.sse]
results["Multiplica Dam"] = [fit4.params[p] for p in params] + [fit4.sse]
results




In [None]:
plt.figure(figsize=(18, 8))
plt.plot(ts)
plt.plot(fit1.fittedvalues)



In [None]:
fig, ax = plt.subplots(1,1,figsize=(24,9))

plt.plot(ts.values, color='black', alpha = 0.5)
plt.plot(fit1.fittedvalues.values, marker='x', color='blue')
fit1.forecast(24).rename('Holt-Winters (add-add-seasonal)').plot(ax=ax, style='--', marker='o', color='red', legend=True)

plt.show()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(24,9))

plt.plot(ts.values, color='black', alpha = 0.5)
plt.plot(fit3.fittedvalues.values, marker='x', color='blue')
fit3.forecast(24).rename('Holt-Winters').plot(ax=ax, style='--', marker='o', color='red', legend=True)

plt.show()

In [None]:
fit1.fittedvalues