In [None]:
%load_ext autoreload
%autoreload 2

In [54]:
from utils.UpdateDatabase import *

# Data Formatting

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from urllib.parse import urljoin
from dotenv import load_dotenv
from datetime import *
from bson import ObjectId
import pandas as pd
import numpy as np
import requests
import random
import torch
import json
import os
load_dotenv()

In [56]:
from utils.FlattenData import *
from modules.PremoAPI import *
from utils.SafeDataConverters import *

In [57]:
api = PremoAPI()

In [58]:
username = os.getenv("mongodb_user")
password = os.getenv("mongodb_password")

In [59]:
from modules.database import Database
db = Database(username, password)

In [60]:
play_data: list[dict] = api.play_data

In [61]:
flat_out = flatten_output(db.to_json())
flat_in = flatten_input(db.to_json())

In [62]:
# inp_df = pd.DataFrame(flat_in).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
# inp_df['sort_order'] = inp_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
# inp_df = inp_df.map(safe_to_datetime)
# inp_df.head()

In [63]:
# out_df = pd.DataFrame(flat_out).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
# out_df['sort_order'] = out_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
# out_df = out_df.map(safe_to_datetime)
# out_df.head()

# Workspace Detection Model

In [None]:
out_df = pd.DataFrame(flat_out).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
out_df['sort_order'] = out_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
out_df = out_df.map(safe_to_datetime)
out_df.head()

In [None]:
X = pd.DataFrame(out_df[['task_title', 'sort_order']])
X.head()

In [None]:
y = pd.DataFrame(out_df['workspace'])
y.head()

In [67]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [68]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
scaler = MinMaxScaler(feature_range=(0, 1))

In [69]:
ctx = ColumnTransformer(
    transformers=[
        ('cat', ohe, ['task_title']),
        ('num', scaler, ['sort_order'])
    ],
    remainder='passthrough'
)

In [70]:
cty = ColumnTransformer(
    transformers=[
        ('cat', ohe, ['workspace'])
    ],
    remainder='passthrough'
)

In [None]:
X_encoded = ctx.fit_transform(X)
X_encoded = pd.DataFrame(X_encoded, columns=ctx.get_feature_names_out())
X_encoded.head()

In [None]:
y_encoded = cty.fit_transform(y)
y_encoded = pd.DataFrame(y_encoded, columns=cty.get_feature_names_out())
y_encoded.head()

In [73]:
train_X, test_X, train_y, test_y = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

### Create and train the model

In [74]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

In [75]:
model = Sequential([
    Input(shape=(X_encoded.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(y_encoded.shape[1], activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [77]:
epcohs = 100
batch_size = 32

In [None]:
model.fit(train_X, train_y, epochs=epcohs, batch_size=batch_size, validation_data=(test_X, test_y), verbose=1)

In [None]:
loss, accuracy = model.evaluate(test_X, test_y, verbose=0)
print(f'Loss: {loss}, Accuracy: {accuracy*100:.2f}')

In [None]:
sample = out_df.sample(1)[['task_title', 'sort_order', 'workspace']]
sample

In [None]:
sampleX = sample[['task_title', 'sort_order']]
sampleX.head()

In [None]:
sampley = sample[['workspace']]
sampley.head()

In [83]:
sampleX_encoded = ctx.transform(sampleX)

In [None]:
_ = ohe.fit_transform(y) # fit the encoder on the target data so onehot can inverse transform the predictions
predy = model.predict(sampleX_encoded, verbose=0)
predy = ohe.inverse_transform(predy)
predy = pd.DataFrame(predy, columns=['workspace'])
predy.head()

# Task card time prediction

In [None]:
inp_df = pd.DataFrame(flat_in).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
inp_df['sort_order'] = inp_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
inp_df = inp_df.map(safe_to_datetime)
inp_df.head()

In [None]:
out_df = pd.DataFrame(flat_out).sort_values(by=['order_id', 'sort_order'], ascending=True).convert_dtypes()
out_df['sort_order'] = out_df.groupby(['order_id', 'product_id'])['sort_order'].transform(lambda x: x.rank(method='dense').astype(int) - 1)
out_df = out_df.map(safe_to_datetime).dropna()
out_df.head()

In [None]:
out_df['duration_since_order_created'] = out_df['start_at'] - out_df['order_created_at']
out_df['duration_since_order_created'] = out_df['duration_since_order_created'].dt.total_seconds() / 60 # convert to minutes
out_df['time_until_delivery'] = out_df['delivery_date'] - out_df['start_at']
out_df['time_until_delivery'] = out_df['time_until_delivery'].dt.total_seconds() / 60 # convert to minutes
out_df.head()

In [None]:
X = pd.DataFrame(out_df[['material', 'color', 'task_title', 'task_duration', 'workspace']])
X.head()

In [None]:
y = pd.DataFrame(out_df[['duration_since_order_created', 'time_until_delivery']])
y.head()

In [90]:
ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False).set_output(transform='pandas')
scaler = MinMaxScaler(feature_range=(0, 1))

In [91]:
ctx = ColumnTransformer(
    transformers=[
        ('cat', ohe, ['material', 'color', 'task_title', 'workspace'])
    ],
    remainder='passthrough'
)

In [92]:
cty = ColumnTransformer(
    transformers=[
        ('num', scaler, ['duration_since_order_created', 'time_until_delivery'])
    ],
    remainder='passthrough'
)

In [93]:
X_encoded = ctx.fit_transform(X)
X_encoded = pd.DataFrame(X_encoded, columns=ctx.get_feature_names_out())

In [None]:
y_encoded = cty.fit_transform(y)
y_encoded = pd.DataFrame(y_encoded, columns=cty.get_feature_names_out())
y_encoded.head()

In [None]:
# print is there are nan values in the data
print(out_df.isna().sum())

In [96]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

### Create the model

In [97]:
model = Sequential([
    Input(shape=(X_encoded.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(y_encoded.shape[1], activation='linear')
])

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
model.summary()

In [99]:
epochs = 1000
batch_size = 16

In [None]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Loss: {loss}, Accuracy: {accuracy*100:.2f}')

In [None]:
sample = out_df.sample(1)[['material', 'color', 'task_title', 'task_duration', 'workspace', 'duration_since_order_created', 'time_until_delivery']]
sample

In [None]:
sampleX = sample[['material', 'color', 'task_title', 'task_duration', 'workspace']]
sampleX

In [104]:
sampleX_encoded = ctx.transform(sampleX)
sampleX_encoded = pd.DataFrame(sampleX_encoded, columns=ctx.get_feature_names_out())

In [None]:
_ = scaler.fit_transform(y) # fit the scaler on the target data so it can inverse transform the predictions
predy = model.predict(sampleX_encoded, verbose=0)
predy = scaler.inverse_transform(predy)
predy = pd.DataFrame(predy, columns=['duration_since_order_created', 'time_until_delivery'])
predy.head()

In [None]:
sampley = sample[['duration_since_order_created', 'time_until_delivery']]
sampley['predicted_time'] = (sampley['duration_since_order_created'] + sampley['time_until_delivery']) / 2
sampley

In [None]:
predy['predicted_time'] = (predy['duration_since_order_created'] + predy['time_until_delivery']) / 2
predy.head()