<a id="section1"></a>
# <font color="#004D7F" size=5> 1. System setup</font>

```
    sudo pip3 install lazypredict
```

In [None]:
import joblib

import pandas as pd
import numpy as np

from lazypredict.Supervised import LazyClassifier, LazyRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import log_loss

np.random.seed(42)

# <font color="#004D7F" size=5> 2. Lazy Classifier</font>

## <font color="#004D7F" size=5> 2.1. Dataset Preparation</font>

In [None]:
dataset = "Covertype"

if dataset == "HELOC":
  dataset_path = "../datasets/HELOC/heloc.csv"
  df=pd.read_csv(dataset_path, delimiter=',')

  column_to_move = df.pop('RiskPerformance')
  df['RiskPerformance'] = column_to_move

  class_col = df.iloc[:,-1]
  df = df.iloc[: , :-1]

  label_encoder = LabelEncoder()
  class_col_encoded = label_encoder.fit_transform(class_col)


if dataset == "Dengue":
  dataset_path = "../datasets/Dengue/dengue_chikunguya_bin.csv"
  df=pd.read_csv(dataset_path, delimiter=',')

  column_to_move = df.pop('CLASSI_FIN')
  df['CLASSI_FIN'] = column_to_move

  class_col = df.iloc[:,-1]
  df = df.iloc[: , :-1]

  label_encoder = LabelEncoder()
  class_col_encoded = label_encoder.fit_transform(class_col)


if dataset == "Covertype":
  dataset_path = "../datasets/Covertype/covtype.csv"
  df=pd.read_csv(dataset_path, delimiter=',')

  column_to_move = df.pop('54')
  df['54'] = column_to_move

  class_col = df.iloc[:,-1]
  df = df.iloc[: , :-1]

  label_encoder = LabelEncoder()
  class_col_encoded = label_encoder.fit_transform(class_col)

if dataset == "Gas":
  dataset_path = "../datasets/Gas/gas.csv"
  df=pd.read_csv(dataset_path, delimiter=',')

  column_to_move = df.pop('Class')
  df['Class'] = column_to_move

  class_col = df.iloc[:,-1]
  df = df.iloc[: , :-1]

  label_encoder = LabelEncoder()
  class_col_encoded = label_encoder.fit_transform(class_col)


df['class'] = class_col_encoded
df

In [None]:
labels = label_encoder.classes_

for label, integer_value in zip(labels, range(len(labels))):
    print(f"Label: {label} -> Integer Value: {integer_value}")

In [None]:
df_x = df.drop('class', axis = 1)
df_y = df['class']

<a id="section22"></a>
## <font color="#004D7F" size=5> 2.2. Preparing Experimentation</font>

In [None]:
X_train, X_val, y_train, y_val = train_test_split(df_x, df_y, test_size = 0.40, random_state = 42,stratify=df_y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size = 0.50, random_state = 42,stratify=y_val)

<a id="section23"></a>
## <font color="#004D7F" size=5> 2.3. Model Benchmarking</font>

In [None]:
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

models

In [None]:
model_dictionary = clf.provide_models(X_train, X_val, y_train, y_val)

In [None]:
best_model = model_dictionary['ExtraTreesClassifier']
model_path = f'../models/{dataset}/best_classical_model_{dataset}.pkl'
joblib.dump(best_model, model_path)

In [None]:
predictions = best_model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print(f"Test Accuracy: {accuracy:.4f}")

# <font color="#004D7F" size=5> 3. Lazy Regressor</font>

## <font color="#004D7F" size=5> 3.1. Dataset Preparation</font>

In [None]:
dataset = "Puma"
dataset_path = "../datasets/Puma/puma8NH.csv"

df=pd.read_csv(dataset_path, delimiter=',')

df

In [None]:
df_x = df.drop('class', axis = 1)
df_y = df['class']

## <font color="#004D7F" size=5> 3.2. Preparing Experimentation</font>

In [None]:
X_train, X_val, y_train, y_val = train_test_split(df_x, df_y, test_size = 0.40, random_state = 42)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size = 0.50, random_state = 42)

## <font color="#004D7F" size=5> 3.3. Model Benchmarking</font>

In [None]:
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

models

In [None]:
model_dictionary = reg.provide_models(X_train, X_val, y_train, y_val)

In [None]:
best_model = model_dictionary['GradientBoostingRegressor']
model_path = f'../models/{dataset}/best_classical_model_{dataset}.pkl'
joblib.dump(best_model, model_path)

In [None]:
predictions = best_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)

rmse_value = np.sqrt(mse)

print(f"Test rmse_result: {rmse_value:.4f}")