In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import gc
import pickle

In [2]:
# Load the CSV files
order_list = pd.read_csv('preprocessed_OrderList.csv')
freight_rates = pd.read_csv('preprocessed_FreightRates.csv')
wh_costs = pd.read_csv('preprocessed_WhCosts.csv')
wh_capacities = pd.read_csv('preprocessed_WhCapacities.csv')
products_per_plant = pd.read_csv('preprocessed_ProductsPerPlant.csv')
vmi_customers = pd.read_csv('preprocessed_VmiCustomers.csv')
plant_ports = pd.read_csv('preprocessed_PlantPorts.csv')


In [3]:
# Data Preprocessing and Feature Engineering

# Merge necessary data to the order list
order_list = order_list.merge(products_per_plant, on=['Plant Code', 'Product ID'], how='left')
order_list = order_list.merge(plant_ports, on='Plant Code', how='left')
order_list = order_list.merge(vmi_customers, on='Plant Code', how='left')
order_list = order_list.merge(wh_costs, left_on='Plant Code', right_on='WH', how='left')
order_list = order_list.merge(wh_capacities, left_on='Plant Code', right_on='Plant ID', how='left')
order_list = order_list.merge(freight_rates, left_on=['Carrier', 'Origin Port', 'Destination Port'],
                              right_on=['Carrier', 'orig_port_cd', 'dest_port_cd'], how='left')


In [4]:
# Data Cleaning
order_list.fillna(0, inplace=True)  # Fill missing values with 0

In [5]:
# Feature Selection and Target Variable
X = order_list.drop(columns=['Order ID', 'Order Date', 'Customer', 'Product ID', 'Carrier', 'Destination Port', 'Plant Code'])
y = order_list['Plant Code']  # Assuming the target is to predict the 'Plant Code'

In [6]:
# One-hot encoding for categorical variables
X = pd.get_dummies(X, drop_first=True)

In [7]:
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# Using a smaller subset of the data for GridSearchCV
X_train_sample = X_train.sample(frac=0.1, random_state=42)
y_train_sample = y_train.loc[X_train_sample.index]

In [9]:
# Pipeline creation
pipeline = Pipeline([
    ('feature_selection', SelectKBest(score_func=f_classif)),
    ('classifier', RandomForestClassifier(random_state=42))
])


In [10]:
# Simplified Hyperparameters for GridSearch
param_grid = {
    'feature_selection__k': [10, 20, 30],
    'classifier__n_estimators': [50, 100],
    'classifier__max_depth': [None, 10, 20],
    'classifier__min_samples_split': [2, 5]
}


In [11]:
# GridSearchCV for model selection
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=1, verbose=2)


In [12]:
# Fit the model
grid_search.fit(X_train_sample, y_train_sample)


Fitting 5 folds for each of 36 candidates, totalling 180 fits


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.7s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=None, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.7s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.7s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.9s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.8s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.8s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.8s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.6s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.6s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.6s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.8s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=10, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.8s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.6s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=2, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=10; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.1s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=20; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.2s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=50, feature_selection__k=30; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=10; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.3s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=20; total time=   0.4s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.5s


  f = msb / msw
  f = msb / msw


[CV] END classifier__max_depth=20, classifier__min_samples_split=5, classifier__n_estimators=100, feature_selection__k=30; total time=   0.6s


  f = msb / msw
  f = msb / msw


GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('feature_selection', SelectKBest()),
                                       ('classifier',
                                        RandomForestClassifier(random_state=42))]),
             n_jobs=1,
             param_grid={'classifier__max_depth': [None, 10, 20],
                         'classifier__min_samples_split': [2, 5],
                         'classifier__n_estimators': [50, 100],
                         'feature_selection__k': [10, 20, 30]},
             verbose=2)

In [13]:
# Best model evaluation
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

In [14]:
print("Best Parameters:", grid_search.best_params_)
print("Classification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50, 'feature_selection__k': 10}
Classification Report:
               precision    recall  f1-score   support

     PLANT03       1.00      1.00      1.00     39444
     PLANT08       1.00      1.00      1.00       407
     PLANT09       1.00      1.00      1.00        42
     PLANT12       1.00      1.00      1.00      1602
     PLANT13       1.00      1.00      1.00       341
     PLANT16       1.00      1.00      1.00        45

    accuracy                           1.00     41881
   macro avg       1.00      1.00      1.00     41881
weighted avg       1.00      1.00      1.00     41881



In [15]:
# View the best model
print("Best Model:\n", best_model)

Best Model:
 Pipeline(steps=[('feature_selection', SelectKBest()),
                ('classifier',
                 RandomForestClassifier(n_estimators=50, random_state=42))])


In [16]:
# Save the best model using pickle
with open('best_route_assignment_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

In [17]:
# Load the model using pickle
with open('best_route_assignment_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [18]:
# Test the loaded model
y_pred_loaded = loaded_model.predict(X_test)
print("Classification Report for Loaded Model:\n", classification_report(y_test, y_pred_loaded))

Classification Report for Loaded Model:
               precision    recall  f1-score   support

     PLANT03       1.00      1.00      1.00     39444
     PLANT08       1.00      1.00      1.00       407
     PLANT09       1.00      1.00      1.00        42
     PLANT12       1.00      1.00      1.00      1602
     PLANT13       1.00      1.00      1.00       341
     PLANT16       1.00      1.00      1.00        45

    accuracy                           1.00     41881
   macro avg       1.00      1.00      1.00     41881
weighted avg       1.00      1.00      1.00     41881



In [19]:
# Clean up memory
del X_train, X_test, y_train, y_test, X_train_sample, y_train_sample
gc.collect()

188