In [73]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [31]:
 def load_user_data(user_id):
    dataset_dir = os.path.join('..','dataset_processing', 'transformed_data', user_id,)
    df = pd.read_csv(dataset_dir+"/train.csv")
    orders = df.iloc[:, 2:].values
    return orders

In [58]:
def generate_input_output_pairs(user_data, sequence_length):
    input_seqs = []
    output_seqs = []
    for i in range(len(user_data) - sequence_length):
        input_seq = np.array(user_data[i:i + sequence_length])
        output_seq = np.array(user_data[i + sequence_length])
        input_seqs.append(input_seq)
        output_seqs.append(output_seq)
    return input_seqs, output_seqs

In [103]:
def build_lstm_model(input_shape, lstm_units=64, dropout_rate=0.2, num_outputs=1):
    # model = tf.keras.Sequential([
    #     tf.keras.layers.LSTM(lstm_units, input_shape=input_shape, return_sequences=True),
    #     tf.keras.layers.Dropout(dropout_rate),
    #     tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(num_outputs, activation='sigmoid'))
    # ])
   
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(lstm_units, input_shape=(5, 4615)),
        tf.keras.layers.Dense(4615, activation='sigmoid')
    ])
    return model

In [41]:
def train_model(model, X_train, y_train, epochs=10, batch_size=32, validation_split=0.2):
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)

In [42]:
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

In [45]:
users = os.listdir(os.path.join('..','dataset_processing', 'transformed_data'))

In [107]:
input_seq = []
output_seq = []
for user in tqdm(users, desc="Processing Users"):
    user_data = load_user_data(f"{user}")
    input, output = generate_input_output_pairs(user_data, 5)
    input_seq += input
    output_seq += output
input_arr = np.array(input_seq)
output_arr = np.array(output_seq)

Processing Users: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 831/831 [01:15<00:00, 11.07it/s]


In [108]:
X_train, X_test, y_train, y_test = train_test_split(input_arr, output_arr, test_size=0.2, random_state=42)

In [109]:
model = build_lstm_model(input_shape)

In [110]:
train_model(model, X_train, y_train)

MemoryError: Unable to allocate 1.89 GiB for an array with shape (21954, 5, 4615) and data type float32

In [86]:
X_train.shape

(1868, 5, 4615)

In [87]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 64)             1198080   
                                                                 
 dropout (Dropout)           (None, 5, 64)             0         
                                                                 
 time_distributed (TimeDistr  (None, 5, 1)             65        
 ibuted)                                                         
                                                                 
Total params: 1,198,145
Trainable params: 1,198,145
Non-trainable params: 0
_________________________________________________________________


In [102]:
y_train.shape

(1868, 4615)