<a href="https://colab.research.google.com/github/giakomorssi/Deloitte_Project/blob/main/04_LogisticDelay.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import the Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

from sklearn.metrics import mean_squared_error

import pandas as pd

# Change Colab runtime to GPU
import os
os.environ['COLAB_TPU_ADDR'] = ''
os.environ['COLAB_GPU_ALLOC'] = '1'
os.environ['COLAB_GPU'] = '1'
print("Runtime switched to GPU")

import tensorflow as tf

if not tf.test.gpu_device_name():
    print('GPU device not found')
else:
    print('GPU device found:', tf.test.gpu_device_name())

# This code sets the runtime to use the GPU if available
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

pd.set_option('display.max_columns', None)

!pip install -q category_encoders

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Runtime switched to GPU
GPU device not found
Please install GPU version of TF


In [2]:
df = pd.read_csv('/content/drive/MyDrive/University/Deloitte/SupplyChainDataset_eda.csv', encoding = 'latin-1')

# Dataprocessing

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.histplot(df['Days for shipment (scheduled)'] - df['Days for shipping (real)'])
plt.xlabel('Days for shipment (scheduled) - Days for shipping (real)')
plt.ylabel('Count')
plt.show()

In [None]:
df['Delay'] = df['Days for shipment (scheduled)'] - df['Days for shipping (real)']

for i in range(0, len(df)):
  if df['Delay'][i] < 0:
    df['Delay'][i] = 1
  else:
    df['Delay'][i] = 0

df['Delay'].value_counts()

In [4]:
df.drop(['Delivery Status', 'Late_delivery_risk', 'Days for shipment (scheduled)', 'Days for shipping (real)', 
         'Unnamed: 0', 'Order Region', 'Department Id', 'Category', 'Order Status'], axis = 1, inplace = True)

In [5]:
from datetime import datetime

df['order date'] = pd.to_datetime(df['order date (DateOrders)'])
df['shipping date'] = pd.to_datetime(df['shipping date (DateOrders)'])

df['order date'] = df['order date'].apply(lambda x: x.timestamp())
df['shipping date'] = df['shipping date'].apply(lambda x: x.timestamp())

df.drop(['order date (DateOrders)', 'shipping date (DateOrders)'], axis = 1, inplace = True)

In [21]:
df.to_csv('/content/drive/MyDrive/University/Deloitte/SupplyChainDataset_delay.csv', index = False)

# Encoding

In [22]:
df = pd.read_csv('/content/drive/MyDrive/University/Deloitte/SupplyChainDataset_delay.csv')

In [23]:
from sklearn.model_selection import train_test_split

X = df.drop('Delay', axis = 1)
y = df['Delay']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [24]:
import pandas as pd
from category_encoders import LeaveOneOutEncoder

# initialize the encoder
enc = LeaveOneOutEncoder(cols=['Customer City', 'Order City'])

# fit and transform the entire dataset
X_train = enc.fit_transform(X_train, y_train)
X_test = enc.transform(X_test)

In [26]:
from sklearn.preprocessing import OneHotEncoder

# Select columns for one-hot encoding
one_hot_cols = [0, 3, 5, 6, 9]
# Type, Category Name, Customer Segment, Department Name, Market, Order Status

# Fit one-hot encoder to training data
one_hot_encoder = OneHotEncoder(handle_unknown="ignore")

# Apply one-hot encoder to training and test data
X_train_one_hot = one_hot_encoder.fit_transform(X_train.iloc[:, one_hot_cols])
X_test_one_hot = one_hot_encoder.transform(X_test.iloc[:, one_hot_cols])

# Remove original columns from training and test data
X_train = X_train.drop(X_train.columns[one_hot_cols], axis=1)
X_test = X_test.drop(X_test.columns[one_hot_cols], axis=1)

# Concatenate one-hot encoded columns with remaining data
X_train = pd.concat([pd.DataFrame(X_train_one_hot.toarray()), X_train.reset_index(drop=True)], axis=1)
X_test = pd.concat([pd.DataFrame(X_test_one_hot.toarray()), X_test.reset_index(drop=True)], axis=1)

In [27]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

# Shipping Mode
custom_order = ['Same Day', 'First Class', 'Second Class', 'Standard Class']
le.fit(custom_order)
X_train['Shipping Mode'] = le.fit_transform(X_train['Shipping Mode'])
X_test['Shipping Mode'] = le.transform(X_test['Shipping Mode'])

In [29]:
from sklearn.preprocessing import StandardScaler

X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

scaler = StandardScaler()

X_train[X_train.columns[73:]] = scaler.fit_transform(X_train[X_train.columns[73:]])
X_test[X_test.columns[73:]] = scaler.transform(X_test[X_test.columns[73:]])

In [30]:
import pandas as pd
import numpy as np
import pickle

# Split the dataset into features and target
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_train = np.ravel(y_train)

X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

# Models

In [32]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import pickle

model = LogisticRegression(C=10, solver='lbfgs', max_iter=1000)

model.fit(X_train, y_train)

with open('/content/drive/MyDrive/University/Deloitte/model_delay/lr_no_late.pkl', 'wb') as f:
  pickle.dump(model, f)

In [33]:
import pickle
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score

with open('/content/drive/MyDrive/University/Deloitte/model_delay/lr_no_late.pkl', 'rb') as f:
  model = pickle.load(f)

y_pred = model.predict(X_test)

print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Recall: {recall_score(y_test, y_pred)}')
print(f'Precision: {precision_score(y_test, y_pred)}')
print(f'F1: {f1_score(y_test, y_pred)}')
print(f'Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}')

Accuracy: 0.9678428982938179
Recall: 0.9889232852858663
Precision: 0.9564017401880526
F1: 0.9723906684739958
Confusion Matrix: 
[[14498   932]
 [  229 20445]]
