In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
def load_user_data():
    dataset_dir = os.path.join('..','dataset_processing', 'sample_data')
    df = pd.read_csv(dataset_dir+"/order_list_100k_pattern.csv")
    orders = df.iloc[:, 1:].values
    return orders

In [3]:
def generate_input_output_pairs(user_data, sequence_length):
    input_seqs = []
    output_seqs = []
    for i in range(len(user_data) - sequence_length):
        input_seq = np.array(user_data[i:i + sequence_length]).flatten()  # Flatten the input sequence
        output_seq = np.array(user_data[i + sequence_length])
        input_seqs.append(input_seq)
        output_seqs.append(output_seq)
    return np.array(input_seqs), np.array(output_seqs)

In [4]:
def build_ann_model(input_shape, num_outputs):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(num_outputs, activation='sigmoid')
    ])
    return model

In [5]:
def train_model(model, X_train, y_train, epochs=10, batch_size=32, validation_split=0.2):
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)

In [6]:
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

In [7]:
SEQUENCE_LENGTH = 5
PRODUCT_SIZE = 20
user_data = load_user_data()
input_arr, output_arr = generate_input_output_pairs(user_data, SEQUENCE_LENGTH)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(input_arr, output_arr, test_size=0.2, random_state=42)

In [9]:
model = build_ann_model(SEQUENCE_LENGTH*PRODUCT_SIZE,PRODUCT_SIZE)

In [10]:
train_model(model, X_train, y_train, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [11]:
evaluate_model(model, X_test, y_test)

Test Loss: 0.42770645022392273, Test Accuracy: 0.12465623021125793


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               12928     
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 512)               131584    
                                                                 
 dense_3 (Dense)             (None, 256)               131328    
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 64)                8256      
                                                                 
 dense_6 (Dense)             (None, 32)                2

In [13]:
a = [[1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,1],
     [1,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,1],
     [1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1,0,1,0],
     [1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,1,0,1,1],
     [1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0]]
p = np.array(a).flatten()

In [14]:
pred = model.predict(p.reshape(1,-1))



In [15]:
pred

array([[3.19342362e-05, 1.62516825e-03, 4.42272431e-04, 2.54939854e-01,
        1.78864971e-01, 8.96744081e-04, 7.83877969e-01, 5.49564313e-04,
        3.76915326e-04, 8.09518097e-05, 1.41098110e-06, 5.36836088e-01,
        1.03997650e-07, 1.15145810e-01, 4.33419034e-09, 9.56691615e-10,
        1.23197037e-06, 3.19568505e-09, 1.23290045e-08, 2.78652300e-17]],
      dtype=float32)

In [16]:
(pred>0.4).astype(int)

array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])

In [20]:
model.save('rnn_model.h5')

In [21]:
loaded_model = tf.keras.models.load_model('rnn_model.h5')

In [22]:
pred = loaded_model.predict(p.reshape(1,-1))



In [23]:
(pred>0.4).astype(int)

array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])