In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd

In [4]:
# Importing Data
train=pd.read_csv('/content/drive/MyDrive/datasets/train.csv')

In [5]:
test=pd.read_csv('/content/drive/MyDrive/datasets/test.csv')

In [6]:
meal=pd.read_csv('/content/drive/MyDrive/datasets/meal_info.csv')

In [7]:
# Merging DataFrames
train=train.merge(meal, on='meal_id')


In [8]:
# Take inputs from the user
center_id = 55
meal_id = 1993

In [9]:
# Filter training data
train_df = train[train['center_id']==center_id]
train_df = train_df[train_df['meal_id']==meal_id]

In [10]:
# Preprocessing for time-series analysis
def pretime():

  df=train_df
  period = len(train_df)
  train_df['Date'] = pd.date_range('2015-01-08', periods=period, freq='W')
  train_df['Day'] = train_df['Date'].dt.day
  train_df['Month'] = train_df['Date'].dt.month
  train_df['Year'] = train_df['Date'].dt.year
  train_df['Quarter'] = train_df['Date'].dt.quarter


In [11]:
pretime()

In [12]:
train_df

Unnamed: 0,id,week,center_id,meal_id,checkout_price,base_price,emailer_for_promotion,homepage_featured,num_orders,category,cuisine,Date,Day,Month,Year,Quarter
11092,1466964,1,55,1993,136.83,135.83,0,0,270,Beverages,Thai,2015-01-11,11,1,2015,1
11169,1035555,2,55,1993,133.89,133.89,0,0,121,Beverages,Thai,2015-01-18,18,1,2015,1
11246,1282652,3,55,1993,134.86,133.86,0,0,258,Beverages,Thai,2015-01-25,25,1,2015,1
11323,1066744,4,55,1993,134.89,133.89,0,0,82,Beverages,Thai,2015-02-01,1,2,2015,1
11400,1115779,5,55,1993,147.50,145.50,0,0,81,Beverages,Thai,2015-02-08,8,2,2015,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21823,1210404,141,55,1993,149.41,148.41,0,0,134,Beverages,Thai,2017-09-17,17,9,2017,3
21900,1162047,142,55,1993,152.35,152.35,0,0,189,Beverages,Thai,2017-09-24,24,9,2017,3
21977,1006104,143,55,1993,151.35,150.35,0,0,109,Beverages,Thai,2017-10-01,1,10,2017,4
22054,1107848,144,55,1993,151.35,150.35,0,0,190,Beverages,Thai,2017-10-08,8,10,2017,4


LSTM

In [13]:
lstm_data = train_df.drop(columns=['id','center_id','meal_id','category','cuisine'])
lstm_data = lstm_data.set_index(['Date'])

In [14]:
period=len(train_df)

In [15]:
x_train = lstm_data.drop(columns='num_orders')
y_train = lstm_data['num_orders']
y_train = np.log1p(y_train)
split_size = period-15
X_train = x_train.iloc[:split_size,:]
X_test = x_train.iloc[split_size:,:]
Y_train = y_train.iloc[:split_size]
Y_test = y_train.iloc[split_size:]

In [16]:
lstm_data

Unnamed: 0_level_0,week,checkout_price,base_price,emailer_for_promotion,homepage_featured,num_orders,Day,Month,Year,Quarter
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-11,1,136.83,135.83,0,0,270,11,1,2015,1
2015-01-18,2,133.89,133.89,0,0,121,18,1,2015,1
2015-01-25,3,134.86,133.86,0,0,258,25,1,2015,1
2015-02-01,4,134.89,133.89,0,0,82,1,2,2015,1
2015-02-08,5,147.50,145.50,0,0,81,8,2,2015,1
...,...,...,...,...,...,...,...,...,...,...
2017-09-17,141,149.41,148.41,0,0,134,17,9,2017,3
2017-09-24,142,152.35,152.35,0,0,189,24,9,2017,3
2017-10-01,143,151.35,150.35,0,0,109,1,10,2017,4
2017-10-08,144,151.35,150.35,0,0,190,8,10,2017,4


LSTM (Light Model)

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error

# Columns to train on
columns_to_train = ["week", "checkout_price", "base_price", "emailer_for_promotion", "homepage_featured", "Day", "Month", "Year", "Quarter"]

# Extract features and target variable
X = lstm_data[columns_to_train].values
y = lstm_data["num_orders"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input (assuming a time series structure)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Model
model = Sequential()
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))

# Compilation
optimizer = Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=optimizer)

# Training
history = model.fit(x=X_train, y=y_train, epochs=200, batch_size=512, validation_data=(X_test, y_test))

# Print MSE
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Training MSE: {history.history["loss"][-1]}, Test MSE: {test_loss}')




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Bidirectional LSTM (Medium Model)

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Bidirectional
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error

# Columns to train on
columns_to_train = ["week", "checkout_price", "base_price", "emailer_for_promotion", "homepage_featured", "Day", "Month", "Year", "Quarter"]

# Extract features and target variable
X = lstm_data[columns_to_train].values
y = lstm_data["num_orders"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input (assuming a time series structure)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Model
model = Sequential()
model.add(Bidirectional(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(50, activation='relu')))
model.add(Dropout(0.2))
model.add(Dense(1))

# Compilation
optimizer = Adam(learning_rate=0.001)
model.compile(loss='mse', optimizer=optimizer)

# Training
history = model.fit(x=X_train, y=y_train, epochs=200, batch_size=512, validation_data=(X_test, y_test))

# Print MSE
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Training MSE: {history.history["loss"][-1]}, Test MSE: {test_loss}')




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

 Bidirectional LSTM (Heavy Model)

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Bidirectional, BatchNormalization
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from keras.callbacks import EarlyStopping

# Columns to train on
columns_to_train = ["week", "checkout_price", "base_price", "emailer_for_promotion", "homepage_featured", "Day", "Month", "Year", "Quarter"]

# Extract features and target variable
X = lstm_data[columns_to_train].values
y = lstm_data["num_orders"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input (assuming a time series structure)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Model
model = Sequential()
model.add(Bidirectional(LSTM(128, activation='relu', return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(BatchNormalization())
model.add(Bidirectional(LSTM(64, activation='relu')))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

# Compilation
optimizer = Adam(learning_rate=0.0005)
model.compile(loss='mse', optimizer=optimizer)

# Training with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

history = model.fit(x=X_train, y=y_train, epochs=300, batch_size=512, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Calculate training MSE manually
train_preds = model.predict(X_train)
train_mse = mean_squared_error(y_train, train_preds)
print(f'Manually Calculated Training MSE: {train_mse}')

# Print MSE
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Training MSE: {history.history["loss"][-1]}, Test MSE: {test_loss}')




Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78