# Hyper Parameter tuning:

In [None]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from models.logistic_regression import LogisticRegression
from models.weighted_lr import WeightedLogisticRegression
from models.random_forest import RandomForest
from evaluator.model_evaluator import ModelEvaluator
from training.crossval import CrossValidator

X_train = pd.read_csv('../dataset/X_train.csv').values
X_test = pd.read_csv('../dataset/X_test.csv').values
y_train = pd.read_csv('../dataset/y_train.csv').values.ravel()
y_test = pd.read_csv('../dataset/y_test.csv').values.ravel()

In [46]:
param_grid_lr = {
    'eta': [0.0001,0.001, 0.01],
    'lambda_reg': [0.0, 0.01,0.001,],
    'epochs': [100,500,750,1000]
}

param_grid_wlr = {
    'eta': [0.0001,0.001, 0.01],
    'lambda_reg': [0.0, 0.01],
    'epochs': [100,500,750,1000],
    'class_weights': [{0: 1, 1: 5}, {0: 1, 1: 10},{0:1, 1:1}]
}

param_grid_rf = {
    'n_estimators': [5, 10, 25, 50],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 4, 8]
}


#### Parameter tuning logistic Regression

In [32]:
from sklearn.model_selection import ParameterGrid

best_params = None
best_f2_score = 0.0

for params in ParameterGrid(param_grid_lr):
    lr = LogisticRegression(eta=params['eta'], epochs=params['epochs'], lambda_reg=params['lambda_reg'])
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
    
    if metrics['f2_score'] > best_f2_score:
        best_f2_score = metrics['f2_score']
        best_params = params

print("Best Parameters:", best_params)
print("Best F2 Score:", best_f2_score)

Best Parameters: {'epochs': 500, 'eta': 0.0001, 'lambda_reg': 0.0}
Best F2 Score: 0.7407407407407408


- After my own further tweaks i will use (lr = LogisticRegression(eta=0.0001, epochs=1000, lambda_reg=0.01))  just so i dont sacrifice accuracy/precision too much

In [50]:
lr = LogisticRegression(eta=0.0001, epochs=1000, lambda_reg=0.01)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print("LR Test Metrics:", metrics)

LR Test Metrics: {'accuracy': np.float64(0.7009943181818182), 'precision': np.float64(0.46474820143884893), 'recall': np.float64(0.8682795698924731), 'f1_score': np.float64(0.60543580131209), 'f2_score': np.float64(0.7398076042143839)}


#### Parameter tuning Weighted logistic regression

In [47]:
best_params_wlr = None
best_f2_score_wlr = 0.0

for params in ParameterGrid(param_grid_wlr):
    wlr = WeightedLogisticRegression(
        eta=params['eta'],
        epochs=params['epochs'],
        lambda_reg=params['lambda_reg'],
        class_weights=params['class_weights']
    )
    wlr.fit(X_train, y_train)
    y_pred = wlr.predict(X_test)
    metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
    
    if metrics['f2_score'] > best_f2_score_wlr:
        best_f2_score_wlr = metrics['f2_score']
        best_params_wlr = params

print("Best Parameters for WLR:", best_params_wlr)
print("Best F2 Score for WLR:", best_f2_score_wlr)

Best Parameters for WLR: {'class_weights': {0: 1, 1: 10}, 'epochs': 1000, 'eta': 0.01, 'lambda_reg': 0.0}
Best F2 Score for WLR: 0.75


In [None]:
cw = {0: 1, 1: 10}
wlr = WeightedLogisticRegression(eta = 0.001,epochs=1000, lambda_reg = 0.0,class_weights= cw)
wlr.fit(X_train, y_train)
y_pred = wlr.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print(metrics)

cw = {0: 1, 1: 5}
wlr = WeightedLogisticRegression(eta = 0.01,epochs=1000, lambda_reg = 0,class_weights= cw)
wlr.fit(X_train, y_train)
y_pred = wlr.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print(metrics)

{'accuracy': np.float64(0.6633522727272727), 'precision': np.float64(0.4346153846153846), 'recall': np.float64(0.9112903225806451), 'f1_score': np.float64(0.5885416666666666), 'f2_score': np.float64(0.7473544973544973)}
{'accuracy': np.float64(0.6818181818181818), 'precision': np.float64(0.44878706199460916), 'recall': np.float64(0.8951612903225806), 'f1_score': np.float64(0.5978456014362658), 'f2_score': np.float64(0.7466367713004484)}


- i will use :cw = {0: 1, 1: 10} : eta = 0.001,epochs=1000, lambda_reg = 0.0,

In [52]:
lr = LogisticRegression(eta=0.0001, epochs=1000, lambda_reg=0.01)
lr_cv_results = CrossValidator.cross_validate(lr, X_train, y_train, folds=10, random_state=42)
print("LR Mean Metrics:", lr_cv_results['mean_metrics'])

LR Mean Metrics: {'accuracy': np.float64(0.6931922797888574), 'precision': np.float64(0.4583261271346162), 'recall': np.float64(0.8429953879688474), 'f1_score': np.float64(0.5930347347557381), 'f2_score': np.float64(0.7209470827987887)}


##### Random forest

In [None]:
best_params_rf = None
best_f2_score_rf = 0.0

for params in ParameterGrid(param_grid_rf):
    rf = RandomForest(
        n_estimators=params['n_estimators'],
        max_depth=params['max_depth'],
        min_samples_split=params['min_samples_split']
    )
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
    
    if metrics['f2_score'] > best_f2_score_rf:
        best_f2_score_rf = metrics['f2_score']
        best_params_rf = params

print("Best Parameters for RF:", best_params_rf)
print("Best F2 Score for RF:", best_f2_score_rf)

In [55]:
rf = RandomForest(n_estimators=10, max_depth=10, min_samples_split=2)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print("Random Forest Metrics:", metrics)

Random Forest Metrics: {'accuracy': np.float64(0.7982954545454546), 'precision': np.float64(0.6527777777777778), 'recall': np.float64(0.5053763440860215), 'f1_score': np.float64(0.5696969696969698), 'f2_score': np.float64(0.5292792792792793)}


In [56]:
rf = RandomForest(n_estimators=50, max_depth=None, min_samples_split=2)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print("Random Forest Metrics:", metrics)

Random Forest Metrics: {'accuracy': np.float64(0.7876420454545454), 'precision': np.float64(0.6327272727272727), 'recall': np.float64(0.46774193548387094), 'f1_score': np.float64(0.5378670788253477), 'f2_score': np.float64(0.4934770277935337)}


In [59]:
rf = RandomForest(n_estimators=100, max_depth=10, min_samples_split=2)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
metrics = ModelEvaluator.calculate_metrics(y_test, y_pred)
print("Random Forest Metrics:", metrics)

Random Forest Metrics: {'accuracy': np.float64(0.8061079545454546), 'precision': np.float64(0.6601941747572816), 'recall': np.float64(0.5483870967741935), 'f1_score': np.float64(0.5991189427312775), 'f2_score': np.float64(0.5676126878130217)}
