In [3]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.13.1-cp38-cp38-macosx_10_15_x86_64.whl.metadata (3.2 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.1.21 (from tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast<=0.4.0,>=0.2.1 (from tensorflow)
  Downloading gast-0.4.0-py3-none-any.whl.metadata (1.1 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=2.9.0 (from tensorflow)
  Downloading h5py-3.11.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata (2.5 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl.metadata (5.2 kB)
Collecting numpy<=1.24.3,>=1.22 (from tensorflow)
  Downloading num

In [4]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

import matplotlib.pyplot as plt

2024-08-05 22:24:33.505641: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# Function to prepare data for LSTM with item_name encoding
def prepare_data(df, feature_columns, target_column, time_steps):
    # One-hot encode item_name
    item_encoder = OneHotEncoder(sparse=False)
    item_encoded = item_encoder.fit_transform(df[['item_name']])
    item_names = item_encoder.categories_[0]
    
    data = df[feature_columns].values
    target = df[target_column].values.reshape(-1, 1)

    # Concatenate item_name encoding with other features
    data_combined = np.hstack((item_encoded, data))

    scaler_data = MinMaxScaler()
    scaler_target = MinMaxScaler()

    data_scaled = scaler_data.fit_transform(data_combined)
    target_scaled = scaler_target.fit_transform(target)

    X, y = [], []
    for i in range(time_steps, len(data_scaled)):
        X.append(data_scaled[i - time_steps:i])
        y.append(target_scaled[i])
    X, y = np.array(X), np.array(y)

    return X, y, scaler_target, item_names


# Function to create LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

## Prepare the data

In [6]:
df = pd.read_csv("./train_data.csv")

In [7]:
feature_columns = ['avg_bottle_price', 'total_volume_sold_liters', 'total_sale_dollars',
                   'avg_bottle_profit', 'day_of_week', 'week_of_year', 'month', 'year',
                   'ma7_total_amount_sold', 'ma7_avg_bottle_price', 'ma7_total_sale_dollars',
                   'ma3_total_amount_sold', 'ma3_avg_bottle_price', 'ma3_total_sale_dollars']
target_column = 'total_amount_sold'
time_steps = 7

X, y, scaler_target, item_names = prepare_data(df, feature_columns, target_column, time_steps)

# Split the data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]



## Create and train the LSTM model

In [8]:
model = create_lstm_model((X_train.shape[1], X_train.shape[2]))
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Make predictions

In [None]:
y_pred_scaled = model.predict(X_test)
y_pred = scaler_target.inverse_transform(y_pred_scaled)
y_test_actual = scaler_target.inverse_transform(y_test)

## Evaluate the model

In [None]:
rmse = np.sqrt(np.mean((y_pred - y_test_actual) ** 2))
mae = np.mean(np.abs(y_pred - y_test_actual))
print(f'RMSE: {rmse:.2f}, MAE: {mae:.2f}')

## Visualize the results

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(y_test_actual, color='blue', label='Actual Total Amount Sold')
plt.plot(y_pred, color='red', label='Predicted Total Amount Sold')
plt.title('LSTM Model - Total Amount Sold Prediction')
plt.xlabel('Time')
plt.ylabel('Total Amount Sold')
plt.legend()
plt.show()