In [292]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import make_scorer, get_scorer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from scikeras.wrappers import KerasClassifier





In [293]:
df = pd.read_csv(r"..\oblig3\student_performance.csv", delimiter=";")


In [294]:
encoder = OneHotEncoder(sparse_output=False)

targets = encoder.fit_transform(df[['Target']])
features = pd.DataFrame(StandardScaler().fit_transform(df.drop('Target', axis=1)), columns=df.drop('Target', axis=1).columns)


In [295]:
df_targets = pd.DataFrame(targets, columns=encoder.get_feature_names_out(['Target']))

In [296]:
print(targets.shape)

(4424, 3)


In [318]:

from numpy import average


global_random_state = 15

scoring = {
    'accuracy': get_scorer('accuracy'),
    'precision': make_scorer(precision_score, average='macro', zero_division=1.0),
    'recall': make_scorer(recall_score, average='macro', zero_division=1.0),
    'f1': make_scorer(f1_score, average='macro'),
    'roc_auc': make_scorer(roc_auc_score, multi_class='ovr', average='macro', response_method='predict')   
}

def evaluate(estimator, X, y):
    scores = {}
    for (name,scorer) in scoring.items():
        scores[name] = scorer(estimator, X, y) 
    return scores

def train(features, targets, estimator, params, scoring=scoring, refit='f1', random_state=global_random_state, outer_splits=5, inner_splits=4):

    outer_cv = KFold(n_splits=outer_splits, shuffle=True, random_state=global_random_state)
    inner_cv = KFold(n_splits=inner_splits, shuffle=True, random_state=global_random_state)

    scores_train = []
    scores_test = []
    estimators = []
    cv_results = []

    # Loop through all test folds
    for (train_index, test_index) in outer_cv.split(features, targets):
        
        grid = GridSearchCV(
            estimator,
            params, 
            scoring=scoring, 
            refit=refit,
            error_score='raise', 
            cv=inner_cv)
        grid.fit(features.iloc[train_index], targets[train_index])        
        
        evaluation_train = evaluate(grid, features.iloc[train_index], targets[train_index])
        evaluation_test = evaluate(grid, features.iloc[test_index], targets[test_index])
        
        scores_train.append(evaluation_train)
        scores_test.append(evaluation_test)
        
        estimators.append(grid.best_estimator_)
        cv_results.append(pd.DataFrame(grid.cv_results_))
        print("*")

    return estimators, pd.DataFrame(scores_train), pd.DataFrame(scores_test), pd.concat(cv_results, names=['test_split'], keys=range(outer_splits))

def print_estimators(estimators):
    for estimatior in estimators:
        print(estimatior)

def create_model(optimizer="adam", loss='categorical_crossentropy', activation='relu', layers=1, neurons=120):
    model = Sequential()
    model.add(Input(shape=(36,)))
    for layer in range(1,layers+1):
        model.add(Dense(int(neurons/layer), activation=activation))
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return model


In [325]:
param_grid = {
    'batch_size': [10],
    'epochs': [10],     
    'model__optimizer': ['adam'], #, 'sgd', 'rmsprop'
    'model__activation': ['relu'], #, 'tanh', 'sigmoid'
    'model__loss': ['categorical_crossentropy'], #, 'sparse_categorical_crossentropy'
    'model__layers': [2, 3, 4],
    'model__neurons': [40, 80, 120],
}

kearas_model = KerasClassifier(model=create_model, verbose=0)

keras_estimators, keras_scores_train, keras_scores_test, keras_cv_results = train(features, targets, kearas_model, param_grid, outer_splits=2, inner_splits=2)

*
*


In [326]:
keras_scores_test

Unnamed: 0,accuracy,precision,recall,f1,roc_auc
0,0.737342,0.671973,0.665638,0.668352,0.76352
1,0.754069,0.683061,0.641965,0.643383,0.750274


In [327]:
keras_estimators


[KerasClassifier(
 	model=<function create_model at 0x0000024F7890F560>
 	build_fn=None
 	warm_start=False
 	random_state=None
 	optimizer=rmsprop
 	loss=None
 	metrics=None
 	batch_size=10
 	validation_batch_size=None
 	verbose=0
 	callbacks=None
 	validation_split=0.0
 	shuffle=True
 	run_eagerly=False
 	epochs=10
 	class_weight=None
 	model__activation=relu
 	model__layers=3
 	model__loss=categorical_crossentropy
 	model__neurons=80
 	model__optimizer=adam
 ),
 KerasClassifier(
 	model=<function create_model at 0x0000024F7890F560>
 	build_fn=None
 	warm_start=False
 	random_state=None
 	optimizer=rmsprop
 	loss=None
 	metrics=None
 	batch_size=10
 	validation_batch_size=None
 	verbose=0
 	callbacks=None
 	validation_split=0.0
 	shuffle=True
 	run_eagerly=False
 	epochs=10
 	class_weight=None
 	model__activation=relu
 	model__layers=2
 	model__loss=categorical_crossentropy
 	model__neurons=40
 	model__optimizer=adam
 )]

In [301]:



#grid = GridSearchCV(estimator=kearas_model, verbose=1, scoring=scoring, return_train_score=True, cv=5, param_grid=param_grid, refit='f1', error_score='raise')

In [302]:
#grid_result = grid.fit(features, df_targets)