In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from fairness.pre_processing import categorical_to_numeric_converter
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA

In [2]:
dataset = pd.read_csv('./dataset/ull/ULL_dataset.csv', sep=',')

In [3]:
output_columns = ['score_MAT', 'level_MAT', 'score_LEN', 'level_LEN', 'score_ING', 'level_ING']

In [None]:
for output_column in output_columns:
    X = dataset.loc[:, dataset.columns!=output_column]
    y = dataset[output_column]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    
    standard_scaler_train = StandardScaler()
    X_train = standard_scaler_train.fit_transform(X_train)

    standard_scaler_test = StandardScaler()
    X_test = standard_scaler_test.fit_transform(X_test)
    
    pca = PCA(n_components=0.85)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    
    estimator = RandomForestClassifier()

    parameters =  {
        'min_samples_leaf': [5, 7, 9, 11],
        'n_estimators': [200, 500],
        'max_depth' : [10, 20, 50, 80, 100, 150],
        'criterion' :['gini', 'entropy']
    }
    
    grid_search = GridSearchCV(
        estimator=estimator,
        param_grid=parameters,
        scoring='accuracy', 
        return_train_score=True,
        cv=3
    )
    
    grid_search_classifier = grid_search.fit(X_train, y_train)
    
    y_pred = grid_search_classifier.predict(X_test)
    
    print("OUTPUT COLUMN: ", output_column)
    print("Accuracy score on Test set: ", accuracy_score(y_test, y_pred))
    print("Accuracy score on Train set: ", accuracy_score(y_train, grid_search_classifier.predict(X_train)))
    print(classification_report(y_test, y_pred))