In [None]:

from sklearn.metrics import  mean_squared_error 
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Data Manipulation Tools
import pandas as pd
import numpy as np

# Data Visualization Tools
import matplotlib.pyplot as plt
import seaborn as sns

# Data Preprocessing Tools
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
priceData = pd.read_csv("./data/processed/pricesList.csv")
priceData.drop('Season', axis=1, inplace=True)

In [None]:
def imputeData(data, column):
    imputer = SimpleImputer(missing_values = pd.NA, strategy ='mean')
    imputer.fit(data[[column]])
    # round to 2 decimal places
    data[column] = imputer.transform(data[[column]])
    data[column] = data[column].round(2)

In [None]:
def createLagFeatures(data, lag):
    data2 = data.copy()
    data2["Price_Lag"] = data2["Price"].shift(lag)
    return data2

In [None]:
def createRollingFeatures(data, window):
    data2 = data.copy()
    data2["Price_Mean"] = data2["Price"].rolling(window=window).mean().shift(1)
    return data2

In [None]:
uniqueNames = priceData['Name'].unique()

for name in uniqueNames:
    # impute missing values for each name
    nameData = priceData[priceData['Name'] == name].copy()
    imputeData(nameData, 'Price')

    # create lag features for each name
    nameData = createLagFeatures(nameData, 1)

    # create rolling features for each name
    nameData = createRollingFeatures(nameData, 4)
    priceData.loc[priceData['Name'] == name, ['Price', "Price_Lag", "Price_Mean"]] = nameData[['Price', "Price_Lag", "Price_Mean"]]


In [None]:
# Label Encoding
labelEncoder = LabelEncoder()
priceData['Name'] = labelEncoder.fit_transform(priceData['Name'])
uniqueNames = priceData['Name'].unique()

In [None]:
# Set the index to the date 
priceData["Datetime"] = pd.to_datetime(priceData[['Year', 'Month']].assign(day=(priceData["Week"]-1)*7 +1))
priceData.set_index("Datetime", inplace=True)

priceData.dropna(inplace=True)
priceData = priceData[priceData['Name'] == 0]
priceData.drop('Name', axis=1, inplace=True)

In [None]:
# split the data into train and test
split = "2023-01-01"
trainData = priceData[priceData.index < split]
testData = priceData[priceData.index >= split]

In [None]:
X_train = trainData.drop('Price', axis=1)
y_train = trainData['Price']
X_test = testData.drop('Price', axis=1)
y_test = testData['Price']

In [None]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Build the neural network model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1))  # Output layer for regression
model.compile(optimizer='adam', loss='mse')

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=500, batch_size=16)

In [None]:
# Evaluate the model
NN_pred = model.predict(X_test)
rmse = round(np.sqrt(mean_squared_error(y_test, NN_pred)), 2)
score = round(model.evaluate(X_test, y_test, verbose=0), 2)
print("RMSE: ", rmse)
print("Score: ", score)


In [None]:
# Plot the results
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
ax.plot(testData.index, testData["Price"], label="Actual Price")
predictions = model.predict(scaler.transform(testData.drop('Price', axis=1)))
ax.plot(testData.index, predictions, label="Predicted Price", linestyle='dashed')
plt.legend()
plt.show()