In [None]:
import pandas as pd

def read_csv_to_dataframe(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print("An error occurred:", e)
        return None
    

In [None]:
data= read_csv_to_dataframe("/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/Data/102_Data.csv")
path_smote="/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/smote"
path_strat="/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/strat"
path_smote_strat="/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/smote_strat"

In [None]:
from sklearn.model_selection import train_test_split
X=data.iloc[:,:-1]
y=data.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
# use random search instead of grid search for hyperparameter optimization
from sklearn.model_selection import RandomizedSearchCV

def tune_LGBM_hyperparameters(X_train, y_train, X_test, y_test):
    param_dist = {
        'n_estimators': (10, 1000,100),
        'max_depth': (1, 20),
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'subsample': (0.1, 1.0),
        'colsample_bytree': (0.1, 1.0),
        'reg_alpha': (1e-9, 1000, 'log-uniform'),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
    }

    lgbm_classifier = LGBMClassifier(random_state=42)

    scaler = StandardScaler()


    X_train_scaled = scaler.fit_transform(X_train)


    X_test_scaled = scaler.transform(X_test)

    cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    bayes_search = RandomizedSearchCV(
        lgbm_classifier, param_dist, n_iter=50, cv=cv_strategy, scoring='accuracy', random_state=42
    )
    
    bayes_search.fit(X_train, y_train)

    print(f"Best Hyperparameters: {bayes_search.best_params_}")
    print(f"Best Accuracy: {bayes_search.best_score_}")

    y_train_pred = bayes_search.best_estimator_.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    print(f"Train Accuracy: {train_accuracy}")

    y_test_pred = bayes_search.best_estimator_.predict(X_test_scaled)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Test Accuracy: {test_accuracy}")
    return bayes_search.best_params_

params=tune_LGBM_hyperparameters(X_train, y_train, X_test, y_test)


In [None]:
from lightgbm import LGBMClassifier

lgbm=LGBMClassifier(**params)

In [None]:
from Utility_model_training import evaluate_classifier_with_stratified_kfold, evaluate_classifier_with_kfold_smote, evaluate_classifier_with_stratified_smote
evaluate_classifier_with_stratified_smote(X_train, y_train, X_test, y_test, lgbm, num_folds=10,save_path=path_smote_strat,model_name="lgbm_classifier_smote_stratified")
evaluate_classifier_with_stratified_kfold(X_train, y_train, X_test, y_test, lgbm, num_folds=10,save_path=path_strat,model_name="lgbm_classifier_stratified")
evaluate_classifier_with_kfold_smote(X_train, y_train, X_test, y_test, lgbm, num_folds=10,save_path=path_smote,model_name="lgbm_classifier_smote")

In [None]:
path_smote_strat_ROC = "/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/smote_strat/ROC_Curves"
path_strat_ROC = "/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/strat/ROC_Curves"
path_smote_ROC = "/home/nalin21478/BTP/ML-food-Processing/ML in Food Processing/models_102/smote/ROC_Curves"
from Utility_ROC_Curves import plot_roc_curves_multiclass_smote_strat,plot_roc_curves_multiclass_smote, plot_roc_curves_multiclass_strat
plot_roc_curves_multiclass_smote_strat(lgbm, X, y, n_splits=10, save_folder=path_smote_strat_ROC, model_name="lgbm_KFold_SMOTE_strat")
plot_roc_curves_multiclass_strat(lgbm, X, y, n_splits=10, save_folder=path_strat_ROC, model_name="lgbm_KFold_Strat")
plot_roc_curves_multiclass_smote(lgbm, X, y, n_splits=10, save_folder=path_smote_ROC, model_name="lgbm_KFold_SMOTE")
