In [None]:
#Libraries import

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report
from numpy import empty
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
import copy
import os
import sys
%run evaluation/metrics.ipynb

"""Perform classification models on data.

    Args:
      model: Classifier.
      data: dataset (dataframe).
      folds: Number of folds to perform cross validation.
"""

def classifiers(model, data, folds, imp_name, dataset_name, mech):
    os.chdir(f"C:\Anaconda3\Scripts\MALTA\src")
    kf = KFold(n_splits=folds)
    best = 0
    
    #Get the input features
    columns = data.columns
    class_name = columns[-1] #Get name of the last column (class)
    columns_tmp = list(columns)
    columns_tmp.remove(class_name)

    #Get the number of classes
    num_classes = np.size(data[class_name].unique())
    
    x, x_test, y, y_test = train_test_split(data[columns_tmp], data[class_name], test_size=0.1)

    fold = 1

    #precision_valid = np.array([])
    #recall_valid = np.array([])
    #fscore_valid = np.array([])
    
    precision_valid = empty(num_classes)
    recall_valid = empty(num_classes)
    fscore_valid = empty(num_classes)
    fscore_interval = [] #List used to calculate the confidence interval in cross-validation 

    for train_index, valid_index in kf.split(x):
            x_train = x.iloc[train_index].loc[:]
            y_train = y.iloc[train_index]

            x_valid = x.iloc[valid_index].loc[:]
            y_valid = y.iloc[valid_index]

            #Model training and validation
            model.fit(x_train, y_train)
            predict = model.predict(x_valid)

            
            #Computing the metrics
            precision,recall,fscore,support = classifier_metrics(y_valid, predict)

            #precision_valid = np.append(precision_valid, precision, axis=None)
            #recall_valid = np.append(recall_valid, precision, axis=None)
            #fscore_valid = np.append(fscore_valid, precision, axis=None)
            #print(precision_valid)

            if (precision.shape != precision_valid.shape):
                # redimensionando o segundo dataframe para a nova forma
                precision = np.resize(precision, precision_valid.shape)
                        
            if (recall.shape != recall_valid.shape):
                # redimensionando o segundo dataframe para a nova forma
                recall = np.resize(recall, recall_valid.shape)
                
            if (fscore.shape != fscore_valid.shape):
                # redimensionando o segundo dataframe para a nova forma
                fscore = np.resize(fscore, fscore_valid.shape)
            
            
            
            precision_valid = np.add(precision_valid,precision)
            recall_valid = np.add(recall_valid,recall)
            fscore_valid = np.add(fscore_valid,fscore) 
            print("F1 Score valid: ", fscore_valid)
                    
            fscore_interval.append(mean_fscore(fscore)) #Append metrics for confidence interval calculation
            
            #Get the best model from cross-validation
            accuracy = np.mean(y_valid == predict)
            if accuracy > best:          
                best = accuracy
                best_model = copy.deepcopy(model)

            fold += 1
    
    
    print('---------------------------- Model ---------------------------------')
    print(model)
    print('--------------------------------------------------------------------')

    print('------------------ Cross Validation Results ------------------------')  
    print('--------------------------------------------------------------------')
        
    precisionV = pd.Series(np.round(precision_valid/folds,num_classes))
    recallV = pd.Series(np.round(recall_valid/folds,num_classes))
    fscoreV = pd.Series(np.round(fscore_valid/folds,num_classes))
    
    precision_valid = np.zeros(precision_valid.shape)
    recall_valid = np.zeros(recall_valid.shape)
    fscore_valid = np.zeros(fscore_valid.shape)
      
    valid_result = pd.concat([(pd.concat([precisionV, recallV], axis=1)), fscoreV], axis = 1)
    valid_result.columns = ['Precision', 'Recall', 'F1 Score']
    print(valid_result)
    
    model_name = type(model).__name__
    
    file = f'reports/classifiers_results/{dataset_name}/{imp_name}/valid_' + model_name + '_' + imp_name + '_' + mech + '.csv'
    if (os.path.exists(file)):
        result_p = pd.read_csv(f'reports/classifiers_results/{dataset_name}/{imp_name}/valid_' + model_name + '_' + imp_name + '_' + mech + '.csv', sep=';', error_bad_lines=False, encoding="latin-1")
        valid_result = pd.concat([result_p, valid_result], axis = 1)
    
    valid_result.to_csv(f'reports/classifiers_results/{dataset_name}/{imp_name}/valid_' + model_name + '_' + imp_name + '_' + mech + '.csv', sep=';',index=False)
 
    precision_valid = np.array([])
    recall_valid = np.array([])
    fscore_valid = np.array([])

    print('--------------------------------------------------------------------')
    print('------------ 95% Confidence Interval Result - F1 Score -----------------')  
    print('--------------------------------------------------------------------')
    print(interval_confidence(fscore_interval)) 


    
    print('--------------------------------------------------------------------')

    print('----------------------- Tests Results-------------------------------')
    print('--------------------------------------------------------------------')      
    p = best_model.predict(x_test) #Test with the best model from cross-validation
    
    metrics_summary = precision_recall_fscore_support(y_test, p, average=None)
        
    precisionT = pd.Series(np.round(metrics_summary[0],num_classes))
    recallT = pd.Series(np.round(metrics_summary[1],num_classes))
    fscoreT = pd.Series(np.round(metrics_summary[2],num_classes))
    
    test_result = pd.concat([(pd.concat([precisionT, recallT], axis=1)), fscoreT], axis = 1)
    test_result.columns = ['Precision', 'Recall', 'F1 Score']
    print(test_result)
    
    file = f'reports/classifiers_results/{dataset_name}/{imp_name}/test_' + model_name + '_' + imp_name + '_' + mech + '.csv'
    #Check if csv file already exists
    if (os.path.exists(file)):
        result_p = pd.read_csv( f'reports/classifiers_results/{dataset_name}/{imp_name}/test_' + model_name + '_' + imp_name + '_' + mech + '.csv', sep=';', error_bad_lines=False, encoding="latin-1")
        test_result = pd.concat([result_p, test_result], axis = 1) 
  
    test_result.to_csv( f'reports/classifiers_results/{dataset_name}/{imp_name}/test_' + model_name + '_' + imp_name + '_' + mech + '.csv', sep=';',index=False)
    
    print('--------------------------------------------------------------------')
    
    sys.stdout.flush()
    
    