In [122]:
import pandas as pd

In [123]:
df = pd.read_csv('media/csvs/DemandForecast.csv')

In [124]:
df.head()

Unnamed: 0,Date,Demand
0,01-Jan-23,20
1,02-Jan-23,28
2,03-Jan-23,19
3,04-Jan-23,44
4,05-Jan-23,52


In [125]:
date_column = "Date"
demand_column = "Demand"
forecast_days = 30
train_test_split = 80
date_format= '%d-%b-%y'


In [126]:
try:
    df[date_column] = pd.to_datetime(df[date_column], format=date_format, errors='coerce')
except ValueError:
    df[date_column] = pd.to_datetime(df[date_column], infer_datetime_format=True, errors='coerce')

In [127]:
df[date_column]

0     2023-01-01
1     2023-01-02
2     2023-01-03
3     2023-01-04
4     2023-01-05
         ...    
285   2023-10-13
286   2023-10-14
287   2023-10-15
288   2023-10-16
289   2023-10-17
Name: Date, Length: 290, dtype: datetime64[ns]

In [128]:
df = df.set_index(date_column)
df = df[[demand_column]].dropna()
df[demand_column] = pd.to_numeric(df[demand_column], errors='coerce').fillna(0)

In [129]:
df.head()

Unnamed: 0_level_0,Demand
Date,Unnamed: 1_level_1
2023-01-01,20
2023-01-02,28
2023-01-03,19
2023-01-04,44
2023-01-05,52


In [144]:
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import plotly.graph_objs as go
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.express as px


df = df.asfreq('D')


# Apply Exponential Smoothing
model = ExponentialSmoothing(df['Demand'], trend='add', seasonal='add', seasonal_periods=7)
fit = model.fit(optimized=True)

# Forecast for the historical period and the next 30 days
historical_forecast = fit.fittedvalues
future_forecast = fit.forecast(30)

# Create a new DataFrame for the future forecast
forecast_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=30, freq='D')
forecast_df = pd.DataFrame({'demand': future_forecast}, index=forecast_dates)

# Calculate errors
mae = mean_absolute_error(df['Demand'], historical_forecast)
mse = mean_squared_error(df['Demand'], historical_forecast)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')

# Combine historical and forecast data for plotting
df['type'] = 'Actual'
forecast_df['type'] = 'Forecast'
combined_df = pd.concat([df, forecast_df])

# Plot using Plotly Express
fig = px.line(combined_df.reset_index(), x='date', y='demand', color='type', 
              title='Demand Forecasting using Triple Exponential Smoothing',
              labels={'demand': 'Demand', 'date': 'Date'},
              line_dash='type')

# Show plot
fig.show()



No frequency information was provided, so inferred frequency D will be used.



Mean Absolute Error: 23.067359921821534
Mean Squared Error: 1630.366509624168


ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['index', 'Demand', 'type', 'demand'] but received: date

In [68]:
split_idx = int(len(df) * train_test_split/100)

In [69]:
split_idx

232

In [70]:
train, test = df[:split_idx], df[split_idx:]

In [71]:
kpi_data = {}
forecast_methods = {}

In [72]:
try:
    forecast_methods['Moving Average'] = test[demand_column].rolling(window=forecast_days).mean().iloc[forecast_days:]
except Exception as e:
    forecast_methods['Moving Average'] = None

In [73]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [74]:
# Exponential Smoothing

model = ExponentialSmoothing(train, seasonal='add', seasonal_periods=12)
fit = model.fit()
forecast_methods['Exponential Smoothing'] = fit.forecast(forecast_days)


  self._init_dates(dates, freq)


In [75]:
 # Double Exponential Smoothing
try:
    model = ExponentialSmoothing(train, trend='add')
    fit = model.fit()
    forecast_methods['Double Exponential Smoothing'] = fit.forecast(forecast_days)
except Exception as e:
    forecast_methods['Double Exponential Smoothing'] = None

  self._init_dates(dates, freq)


In [76]:
forecast_methods

{'Moving Average': Date
 2023-09-20    22.700000
 2023-09-21    23.100000
 2023-09-22    23.200000
 2023-09-23    23.033333
 2023-09-24    21.933333
 2023-09-25    22.466667
 2023-09-26    26.600000
 2023-09-27    30.733333
 2023-09-28    30.366667
 2023-09-29    29.366667
 2023-09-30    30.633333
 2023-10-01    30.933333
 2023-10-02    31.866667
 2023-10-03    32.366667
 2023-10-04    33.433333
 2023-10-05    33.866667
 2023-10-06    35.133333
 2023-10-07    33.966667
 2023-10-08    34.433333
 2023-10-09    35.400000
 2023-10-10    36.266667
 2023-10-11    36.500000
 2023-10-12    36.633333
 2023-10-13    37.100000
 2023-10-14    40.700000
 2023-10-15    44.433333
 2023-10-16    43.433333
 2023-10-17    44.000000
 Name: Demand, dtype: float64,
 'Exponential Smoothing': 2023-08-21    69.051934
 2023-08-22    47.599742
 2023-08-23    64.153913
 2023-08-24    33.169424
 2023-08-25    28.079473
 2023-08-26    36.361962
 2023-08-27    28.225703
 2023-08-28    29.831182
 2023-08-29    33.87

In [77]:
model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=12)
fit = model.fit()
forecast_methods['Triple Exponential Smoothing'] = fit.forecast(forecast_days)

  self._init_dates(dates, freq)


In [78]:
model = ExponentialSmoothing(train, trend='add', seasonal='mul', seasonal_periods=12)
fit = model.fit()
forecast_methods['Triple Additive Exponential Smoothing'] = fit.forecast(forecast_days)

  self._init_dates(dates, freq)


In [80]:
from sklearn.ensemble import AdaBoostRegressor

In [81]:
model = AdaBoostRegressor()
model.fit(train.index.factorize()[0].reshape(-1, 1), train.values.ravel())
forecast_methods['Adaptive Boosting'] = model.predict(pd.Series(test.index.factorize()[0]).iloc[:forecast_days].values.reshape(-1, 1))


In [82]:
forecast_methods

{'Moving Average': Date
 2023-09-20    22.700000
 2023-09-21    23.100000
 2023-09-22    23.200000
 2023-09-23    23.033333
 2023-09-24    21.933333
 2023-09-25    22.466667
 2023-09-26    26.600000
 2023-09-27    30.733333
 2023-09-28    30.366667
 2023-09-29    29.366667
 2023-09-30    30.633333
 2023-10-01    30.933333
 2023-10-02    31.866667
 2023-10-03    32.366667
 2023-10-04    33.433333
 2023-10-05    33.866667
 2023-10-06    35.133333
 2023-10-07    33.966667
 2023-10-08    34.433333
 2023-10-09    35.400000
 2023-10-10    36.266667
 2023-10-11    36.500000
 2023-10-12    36.633333
 2023-10-13    37.100000
 2023-10-14    40.700000
 2023-10-15    44.433333
 2023-10-16    43.433333
 2023-10-17    44.000000
 Name: Demand, dtype: float64,
 'Exponential Smoothing': 2023-08-21    69.051934
 2023-08-22    47.599742
 2023-08-23    64.153913
 2023-08-24    33.169424
 2023-08-25    28.079473
 2023-08-26    36.361962
 2023-08-27    28.225703
 2023-08-28    29.831182
 2023-08-29    33.87

In [84]:
model = AdaBoostRegressor()
model.fit(train.index.factorize()[0].reshape(-1, 1), train.values.ravel())
forecast_methods['Adaptive Boosting'] = model.predict(pd.Series(test.index.factorize()[0]).iloc[:forecast_days].values.reshape(-1, 1))

In [85]:
forecast_methods['Adaptive Boosting']

array([45.84615385, 45.84615385, 45.84615385, 45.84615385, 45.84615385,
       45.84615385, 45.84615385, 45.84615385, 45.84615385, 45.84615385,
       44.26666667, 44.26666667, 44.26666667, 44.26666667, 44.26666667,
       45.66666667, 45.66666667, 45.66666667, 45.66666667, 45.66666667,
       45.66666667, 45.66666667, 45.66666667, 45.66666667, 45.66666667,
       28.03125   , 28.03125   , 28.03125   , 28.03125   , 28.03125   ])

In [87]:
from xgboost import XGBRegressor

In [88]:
model = XGBRegressor()
model.fit(train.index.factorize()[0].reshape(-1, 1), train.values.ravel())
forecast_methods['Extreme Gradient Boosting'] = model.predict(pd.Series(test.index.factorize()[0]).iloc[:forecast_days].values.reshape(-1, 1))

In [89]:
forecast_methods['Extreme Gradient Boosting']

array([20.933443 , 27.594698 , 20.100788 , 43.63753  , 50.4996   ,
       20.380152 , 46.98264  , 49.851997 , 79.833595 , 34.130085 ,
       15.5580635, 17.439993 , 27.648874 , 49.132812 , 12.196057 ,
       32.5641   , 27.557455 , 85.859505 , 12.884094 , 26.009483 ,
       26.009483 , 20.941645 , 39.360054 , 14.207999 , 40.30644  ,
       27.83219  , 16.09493  , 16.394932 , 10.231154 , 17.905123 ],
      dtype=float32)

In [91]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

In [92]:
train_scaled = (train - train.mean()) / train.std()
X_train, y_train = [], []
for i in range(3, len(train_scaled)):
    X_train.append(train_scaled[i-3:i].values)
    y_train.append(train_scaled[i].values)
X_train, y_train = np.array(X_train), np.array(y_train)

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=20, batch_size=1, verbose=0)

inputs = train_scaled[len(train_scaled) - len(test) - 3:].values
X_test = []
for i in range(3, len(inputs)):
    X_test.append(inputs[i-3:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

predictions = model.predict(X_test)
predictions = predictions * train.std() + train.mean()
forecast_methods['LSTM'] = predictions[:forecast_days]

KeyError: 3

In [95]:
import plotly.express as px

In [96]:
for method, forecast in forecast_methods.items():
    if forecast is not None:
        forecast = pd.Series(forecast, index=pd.date_range(start=train.index[-1], periods=forecast_days + 1, inclusive='right'))
        error = round((forecast - test[demand_column]).mean(), 2)
        mape = round(((forecast - test[demand_column]).abs() / test[demand_column]).mean() * 100, 2)
        mae = round((forecast - test[demand_column]).abs().mean(), 2)
        rmse = round(((forecast - test[demand_column]) ** 2).mean() ** 0.5, 2)
        kpi_data[method] = {'error': error, 'mape': mape, 'mae': mae, 'rmse': rmse}

        fig = px.line(df, x=df.index, y=demand_column, title=f'{method} Demand Forecast')
        fig.add_scatter(x=forecast.index, y=forecast.values, mode='lines', name='Forecast')
        kpi_data[method]['plot'] = fig.to_html(full_html=False)
    else:
        kpi_data[method] = {'error': 'N/A', 'mape': 'N/A', 'mae': 'N/A', 'rmse': 'N/A', 'plot': None}


In [101]:
for method, forecast in forecast_methods.items():
    if forecast is not None:
        # Ensure the forecast length matches the forecast index length
        forecast_length = min(len(forecast), forecast_days)
        forecast_index = pd.date_range(start=test.index[-1], periods=forecast_length + 1, inclusive='right')[1:]
        forecast_series = pd.Series(forecast[:forecast_length], index=forecast_index)
        extended_test = pd.concat([test[demand_column], pd.Series([None] * forecast_length, index=forecast_index)])

        error = round((forecast_series - extended_test).mean(), 2)
        mape = round(((forecast_series - extended_test).abs() / extended_test).mean() * 100, 2)
        mae = round((forecast_series - extended_test).abs().mean(), 2)
        rmse = round(((forecast_series - extended_test) ** 2).mean() ** 0.5, 2)
        
        kpi_data[method] = {'error': error, 'mape': mape, 'mae': mae, 'rmse': rmse}

        fig = px.line(df, x=df.index, y=demand_column, title=f'{method} Demand Forecast')
        fig.add_scatter(x=forecast_series.index, y=forecast_series.values, mode='lines', name='Forecast')
        kpi_data[method]['plot'] = fig.to_html(full_html=False)
    else:
        kpi_data[method] = {'error': 'N/A', 'mape': 'N/A', 'mae': 'N/A', 'rmse': 'N/A', 'plot': None}


ValueError: Length of values (28) does not match length of index (27)