In [3]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [14]:
def load_user_data():
    dataset_dir = os.path.join('..','dataset_processing', 'sample_data')
    df = pd.read_csv(dataset_dir+"/order_list_20k_pattern.csv")
    orders = df.iloc[:, 1:].values
    return orders

In [6]:
def generate_input_output_pairs(user_data, sequence_length):
    input_seqs = []
    output_seqs = []
    for i in range(len(user_data) - sequence_length):
        input_seq = np.array(user_data[i:i + sequence_length])
        output_seq = np.array(user_data[i + sequence_length])
        input_seqs.append(input_seq)
        output_seqs.append(output_seq)
    return np.array(input_seqs), np.array(output_seqs)

In [7]:
def build_lstm_model(input_shape, lstm_units=128, dropout_rate=0.2, num_outputs=1):
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(256, input_shape=(10, 20), return_sequences= True),
        tf.keras.layers.LSTM(128, return_sequences= True),
        tf.keras.layers.SimpleRNN(64),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(20, activation='sigmoid')
    ])
    return model

In [8]:
optimizer = tf.keras.optimizers.Adam()
def train_model(model, X_train, y_train, epochs=10, batch_size=32, validation_split=0.2):
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)

In [9]:
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

In [15]:
user_data = load_user_data()
input_arr, output_arr = generate_input_output_pairs(user_data, 10)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(input_arr, output_arr, test_size=0.2, random_state=42)

In [17]:
model = build_lstm_model((1,2))

In [18]:
train_model(model, X_train, y_train, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [19]:
evaluate_model(model, X_test, y_test)

Test Loss: 0.38572633266448975, Test Accuracy: 0.25262629985809326


In [20]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 10, 256)           283648    
                                                                 
 lstm_3 (LSTM)               (None, 10, 128)           197120    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 64)                12352     
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 20)                2580      
                                                                 
Total params: 504,020
Trainable params: 504,020
Non-trainable params: 0
_________________________________________________________________


In [22]:
model.save('lstm_model')



INFO:tensorflow:Assets written to: lstm_model\assets


INFO:tensorflow:Assets written to: lstm_model\assets
