In [1]:
import pandas as pd
df = pd.read_csv('train.csv')

In [2]:
df = df[['Order Date', 'Category', 'Sales']]

In [3]:
df['Order Date'] = pd.to_datetime(df['Order Date'], format='%d/%m/%Y')

In [4]:
df_furniture = df[df['Category'] == 'Furniture']
df_office = df[df['Category'] == 'Office Supplies']
df_tech = df[df['Category'] == 'Technology']

In [5]:
df_furniture = df_furniture.groupby('Order Date')['Sales'].sum().reset_index()
df_office = df_office.groupby('Order Date')['Sales'].sum().reset_index()
df_tech = df_tech.groupby('Order Date')['Sales'].sum().reset_index()

In [6]:
df_furniture = df_furniture.sort_values('Order Date')
df_office = df_office.sort_values('Order Date')
df_office = df_office.sort_values('Order Date')

In [7]:
df_furniture['Day of Week'] = df_furniture['Order Date'].dt.dayofweek
df_furniture['Month'] = df_furniture['Order Date'].dt.month

df_office['Day of Week'] = df_office['Order Date'].dt.dayofweek
df_office['Month'] = df_office['Order Date'].dt.month

df_tech['Day of Week'] = df_tech['Order Date'].dt.dayofweek
df_tech['Month'] = df_tech['Order Date'].dt.month

In [8]:
for lag in range(1, 8):
    df_furniture[f'Lag_{lag}'] = df_furniture['Sales'].shift(lag)
    df_office[f'Lag_{lag}'] = df_office['Sales'].shift(lag)
    df_tech[f'Lag_{lag}'] = df_tech['Sales'].shift(lag)
    
df_furniture['MA_7'] = df_furniture['Sales'].rolling(window=7).mean()
df_furniture['EMA_7'] = df_furniture['Sales'].ewm(span=7).mean()

df_office['MA_7'] = df_office['Sales'].rolling(window=7).mean()
df_office['EMA_7'] = df_office['Sales'].ewm(span=7).mean()

df_tech['MA_7'] = df_tech['Sales'].rolling(window=7).mean()
df_tech['EMA_7'] = df_tech['Sales'].ewm(span=7).mean()

In [9]:
df_furniture = df_furniture.dropna()
df_office = df_office.dropna()
df_tech = df_tech.dropna()

In [10]:
from sklearn.preprocessing import MinMaxScaler
scaler_furniture = MinMaxScaler()
scaler_tech = MinMaxScaler()
scaler_office = MinMaxScaler()
furniture_sales_scaled = scaler_furniture.fit_transform(df_furniture.drop(columns=['Order Date']))
office_sales_scaled = scaler_office.fit_transform(df_office.drop(columns=['Order Date']))
tech_sales_scaled = scaler_tech.fit_transform(df_tech.drop(columns=['Order Date']))

In [11]:
X_furniture = furniture_sales_scaled
y_furniture = furniture_sales_scaled[:, 0]

X_office = office_sales_scaled
y_office = office_sales_scaled[:, 0]

X_tech = tech_sales_scaled
y_tech = tech_sales_scaled[:, 0]

In [12]:
def create_sequences(data, target, seq_length):
    X_seq = []
    y_seq = []
    for i in range(len(data) - seq_length):
        X_seq.append(data[i:i + seq_length])
        y_seq.append(target[i + seq_length])
    return np.array(X_seq), np.array(y_seq)

In [13]:
import numpy as np
seq_length = 14
X_furniture, y_furniture = create_sequences(X_furniture, y_furniture, seq_length)
X_office, y_office = create_sequences(X_office, y_office, seq_length)
X_tech, y_tech = create_sequences(X_tech, y_tech, seq_length)

In [14]:
from keras.models import Model, Sequential
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Input, Dropout, Bidirectional

In [15]:
model_furniture = Sequential()
model_furniture.add(Bidirectional(LSTM(100, return_sequences=True, input_shape=(seq_length, X_furniture.shape[2]))))
model_furniture.add(Dropout(0.2))
model_furniture.add(Bidirectional(LSTM(100, return_sequences=False)))
model_furniture.add(Dropout(0.2))
model_furniture.add(Dense(1))

  super().__init__(**kwargs)


In [16]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
model_furniture.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [17]:
model_furniture.fit(X_furniture, y_furniture, epochs=200, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 108ms/step - loss: 0.0172 - val_loss: 0.0115
Epoch 2/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - loss: 0.0103 - val_loss: 0.0109
Epoch 3/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0080 - val_loss: 0.0110
Epoch 4/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - loss: 0.0109 - val_loss: 0.0112
Epoch 5/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.0084 - val_loss: 0.0106
Epoch 6/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0118 - val_loss: 0.0114
Epoch 7/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 0.0100 - val_loss: 0.0109
Epoch 8/200
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - loss: 0.0133 - val_loss: 0.0108
Epoch 9/200
[1m22/22[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x2ac41d68310>

In [29]:
y_pred_furniture = model_furniture.predict(X_furniture)

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 54ms/step


In [None]:
model_office = Sequential()
model_office.add(Bidirectional(LSTM(100, return_sequences=True, input_shape=(seq_length, X_furniture.shape[2]))))
model_office.add(Dropout(0.2))
model_office.add(Bidirectional(LSTM(100, return_sequences=False)))
model_office.add(Dropout(0.2))
model_office.add(Dense(1))

In [None]:
model_office.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [30]:
model_office.fit(X_office, y_office, epochs=200, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0082 - val_loss: 0.0098
Epoch 2/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 0.0096 - val_loss: 0.0114
Epoch 3/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 0.0092 - val_loss: 0.0101
Epoch 4/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0132 - val_loss: 0.0116
Epoch 5/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.0102 - val_loss: 0.0101
Epoch 6/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.0087 - val_loss: 0.0107
Epoch 7/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0088 - val_loss: 0.0104
Epoch 8/200
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.0120 - val_loss: 0.0104
Epoch 9/200
[1m29/29[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x12ecc6bdc10>

In [31]:
y_pred_office = model.predict(X_office)

[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


In [None]:
model_tech = Sequential()
model_tech.add(Bidirectional(LSTM(100, return_sequences=True, input_shape=(seq_length, X_furniture.shape[2]))))
model_tech.add(Dropout(0.2))
model_tech.add(Bidirectional(LSTM(100, return_sequences=False)))
model_tech.add(Dropout(0.2))
model_tech.add(Dense(1))

In [None]:
model_tech.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

In [32]:
model_tech.fit(X_tech, y_tech, epochs=200, validation_split=0.2, callbacks=[early_stopping])

Epoch 1/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - loss: 0.0044 - val_loss: 0.0059
Epoch 2/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0025 - val_loss: 0.0054
Epoch 3/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 0.0039 - val_loss: 0.0053
Epoch 4/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0043 - val_loss: 0.0055
Epoch 5/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 0.0030 - val_loss: 0.0053
Epoch 6/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0049 - val_loss: 0.0054
Epoch 7/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0042 - val_loss: 0.0053
Epoch 8/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 0.0034 - val_loss: 0.0053
Epoch 9/200
[1m20/20[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x12eccff9350>

In [33]:
y_pred_tech = model_tech.predict(X_tech)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [34]:
df_furniture = df_furniture.iloc[seq_length:]
df_office = df_office.iloc[seq_length:]
df_tech = df_tech.iloc[seq_length:]

In [37]:
y_pred_furniture.shape, X_furniture.shape

((856, 1), (856, 14, 12))

In [38]:
y_pred_furniture = scaler_furniture.inverse_transform(np.hstack((y_pred_furniture, np.zeros((y_pred_furniture.shape[0], X_furniture.shape[2]-1)))))[:, 0]
y_pred_office = scaler_office.inverse_transform(np.hstack((y_pred_office, np.zeros((y_pred_office.shape[0], X_office.shape[2]-1)))))[:, 0]
y_pred_tech = scaler_tech.inverse_transform(np.hstack((y_pred_tech, np.zeros((y_pred_tech.shape[0], X_tech.shape[2]-1)))))[:, 0]

In [40]:
df_furniture['Pred Sales'] = y_pred_furniture
df_office['Pred Sales'] = y_pred_office
df_tech['Pred Sales'] = y_pred_tech

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_furniture['Pred Sales'] = y_pred_furniture
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_office['Pred Sales'] = y_pred_office
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tech['Pred Sales'] = y_pred_tech
