In [None]:
%load_ext autoreload
%autoreload 2

In [2651]:
from utils.UpdateDatabase import *

# Data Formatting

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from urllib.parse import urljoin
from dotenv import load_dotenv
from datetime import *
from bson import ObjectId
import pandas as pd
import numpy as np
import requests
import random
import torch
import json
import os
load_dotenv()

In [2653]:
from utils.FlattenData import *
from modules.PremoAPI import *
from utils.SafeDataConverters import *

In [2654]:
api = PremoAPI()

In [2655]:
username = os.getenv("mongodb_user")
password = os.getenv("mongodb_password")

In [2656]:
from modules.database import Database
db = Database(username, password)

In [2657]:
play_data: list[dict] = api.play_data

In [2658]:
flat_out = flatten_output(db.to_json())
flat_in = flatten_input(db.to_json())

In [2659]:
# inp_df = pd.DataFrame(flat_in).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
# inp_df['sort_order'] = inp_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
# inp_df = inp_df.map(safe_to_datetime)
# inp_df.head()

In [2660]:
# out_df = pd.DataFrame(flat_out).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
# out_df['sort_order'] = out_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
# out_df = out_df.map(safe_to_datetime)
# out_df.head()

# Workspace Detection Model

In [None]:
out_df = pd.DataFrame(flat_out).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
out_df['sort_order'] = out_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
out_df = out_df.map(safe_to_datetime)
out_df.head()

In [None]:
out_df['duration_since_order_created'] = out_df['start_at'] - out_df['order_created_at']
out_df['duration_since_order_created'] = out_df['duration_since_order_created'].dt.total_seconds() / 60 # convert to minutes
out_df['time_until_delivery'] = out_df['delivery_date'] - out_df['start_at']
out_df['time_until_delivery'] = out_df['time_until_delivery'].dt.total_seconds() / 60 # convert to minutes
out_df.head()

In [2663]:
X_columns = {
    'init': ['task_title'],
    'addit': ['material', 'color', 'sort_order']
}

In [2664]:
y_columns = ['workspace', 'duration_since_order_created', 'time_until_delivery']

In [None]:
df = out_df.copy()[['task_title', 'material', 'color', 'sort_order', 'workspace', 'duration_since_order_created', 'time_until_delivery']]
df = df.dropna()
df.head()

In [2666]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2667]:
categorical_columns = ['task_title', 'material', 'color', 'workspace']
scaler_columns = ['duration_since_order_created', 'time_until_delivery']

In [2668]:
scaler = MinMaxScaler(feature_range=(0, 1))
le = LabelEncoder()

In [None]:
df_encoded = pd.get_dummies(df, columns=categorical_columns).astype(float)
df_encoded[scaler_columns] = scaler.fit_transform(df_encoded[scaler_columns])
df_encoded.head()

In [None]:
for col in categorical_columns:
    df[col] = le.fit_transform(df[col])
df_encoded.head()

In [2671]:
# Features
X = df_encoded.drop(columns=list(df_encoded.filter(regex='^task_title_').columns)).drop(columns=['duration_since_order_created', 'time_until_delivery']).values
y = df_encoded[['duration_since_order_created', 'time_until_delivery']].values


In [2672]:
X_columns = df_encoded.drop(columns=list(df_encoded.filter(regex='^task_title_').columns)).drop(columns=['duration_since_order_created', 'time_until_delivery']).columns
y_columns = ['duration_since_order_created', 'time_until_delivery']

### Create and train the model

In [2673]:
import tensorflow as tf
from tensorflow.keras import layers, Model

In [2674]:
input_size = X.shape[1]
output_size = y.shape[1]

In [2675]:
# Hyperparameters
batch_size = 32
num_epochs = 1000
learning_rate = 0.002

In [2676]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [2677]:
class NeuralNet(Model):
    def __init__(self, input_size, output_size):
        super(NeuralNet, self).__init__()
        
        self.nnet = tf.keras.Sequential([
            layers.Dense(64, activation='relu', input_shape=(input_size,)),
            layers.Dense(64, activation='relu'),
            layers.Dense(output_size)
        ])
        
    def call(self, x):
        return self.nnet(x)

In [None]:
model = NeuralNet(input_size, output_size)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), 
    loss='mse',
    metrics=['mse']
)


In [2679]:
# callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3) # Stop training if val_loss does not improve for 3 epochs

In [None]:
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_data=(X_val, y_val)) #, callbacks=[callback])

In [2681]:
import matplotlib.pyplot as plt

In [None]:
# Plot total loss
plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history.get('val_loss', []), label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Predict

In [2683]:
from random import randint

In [2684]:
sample_index = randint(0, X.shape[0])  # Change this index to get different samples

In [2685]:
# Get a sample from the dataset
sample_input_enc = X[sample_index].reshape(1, -1)
sample_correct_target = y[sample_index].reshape(1, -1)

In [2686]:
sample_correct_target = pd.DataFrame(sample_correct_target, columns=y_columns)

In [2687]:
sample_input = pd.DataFrame(sample_input_enc, columns=X_columns)

In [2688]:
sample_correct_target[['duration_since_order_created', 'time_until_delivery']] = scaler.inverse_transform(sample_correct_target[['duration_since_order_created', 'time_until_delivery']])

In [2689]:
forward = model.predict(sample_input_enc, verbose=0)

In [None]:
pred = scaler.inverse_transform(forward)
pred = pd.DataFrame(pred, columns=y_columns)
pred

In [None]:
sample_correct_target