**Index**

- [Developed ANN](#Developed-ANN)
- [ANN from SciKit Learn](#ANN-from-SciKit-Learn)
- [Decision Tree](#Decision-Tree)
- [Support Vector Machine](#Support-Vector-Machine)
- [K-Nearest Neighbors](#K-Nearest-Neighbors)
- [Naive Bayes](#Naive-Bayes)
- [Logistic Regression](#Logistic-Regression)

In [1]:
using DataFrames
using Plots
using StatsPlots
using CSV

In [22]:
# Load custom functions from provided files
include("preprocessing.jl")
include("metrics.jl")
include("training.jl")



trainClassEnsemble (generic function with 8 methods)

In [3]:
# Set the random seed for reproducibility
Random.seed!(42)

# Load the dataset
dataset_path = "dataset.csv"
data = CSV.read(dataset_path, DataFrame)

Row,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Nacionality,Mother's qualification,Father's qualification,Mother's occupation,Father's occupation,Displaced,Educational special needs,Debtor,Tuition fees up to date,Gender,Scholarship holder,Age at enrollment,International,Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (approved),Curricular units 1st sem (grade),Curricular units 1st sem (without evaluations),Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Int64,Float64,Int64,Int64,Int64,Int64,Int64,Float64,Int64,Float64,Float64,Float64,String15
1,1,8,5,2,1,1,1,13,10,6,10,1,0,0,1,1,0,20,0,0,0,0,0,0.0,0,0,0,0,0,0.0,0,10.8,1.4,1.74,Dropout
2,1,6,1,11,1,1,1,1,3,4,4,1,0,0,0,1,0,19,0,0,6,6,6,14.0,0,0,6,6,6,13.6667,0,13.9,-0.3,0.79,Graduate
3,1,1,5,5,1,1,1,22,27,10,10,1,0,0,0,1,0,19,0,0,6,0,0,0.0,0,0,6,0,0,0.0,0,10.8,1.4,1.74,Dropout
4,1,8,2,15,1,1,1,23,27,6,4,1,0,0,1,0,0,20,0,0,6,8,6,13.4286,0,0,6,10,5,12.4,0,9.4,-0.8,-3.12,Graduate
5,2,12,1,3,0,1,1,22,28,10,10,0,0,0,1,0,0,45,0,0,6,9,5,12.3333,0,0,6,6,6,13.0,0,13.9,-0.3,0.79,Graduate
6,2,12,1,17,0,12,1,22,27,10,8,0,0,1,1,1,0,50,0,0,5,10,5,11.8571,0,0,5,17,5,11.5,5,16.2,0.3,-0.92,Graduate
7,1,1,1,12,1,1,1,13,28,8,11,1,0,0,1,0,1,18,0,0,7,9,7,13.3,0,0,8,8,8,14.345,0,15.5,2.8,-4.06,Graduate
8,1,9,4,11,1,1,1,22,27,10,10,1,0,0,0,1,0,22,0,0,5,5,0,0.0,0,0,5,5,0,0.0,0,15.5,2.8,-4.06,Dropout
9,1,1,3,10,1,1,15,1,1,10,10,0,0,0,1,0,1,21,1,0,6,8,6,13.875,0,0,6,7,6,14.1429,0,16.2,0.3,-0.92,Graduate
10,1,1,1,10,1,1,1,1,14,5,8,1,0,1,0,0,0,18,0,0,6,9,5,11.4,0,0,6,14,2,13.5,0,8.9,1.4,3.51,Dropout


In [4]:
# Separate features and target
target_column = :Target
inputs = select(data, Not(target_column))
targets = data[!, target_column];

# Training

In [9]:
inputs = Float32.(Matrix(inputs))

# Define the categories and their mapping
label_mapping = Dict("Dropout" => 0, "Graduate" => 1, "Enrolled" => 2)

# Encode the targets
targets_label_encoded = [label_mapping[label] for label in targets]

println("Encoded targets: ", targets_label_encoded)

# To decode later, create a reverse mapping
reverse_mapping = Dict(v => k for (k, v) in label_mapping)
decoded_targets = [reverse_mapping[code] for code in targets_label_encoded]

println("Decoded targets: ", decoded_targets)


k = 5
N = size(inputs, 1)
Random.seed!(42)
fold_indices = crossValidation(targets, k)
metrics_to_save = [:accuracy, :precision, :recall, :f1_score];

Encoded targets: [0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 2, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, 1, 0, 0, 1, 2, 1, 1, 2, 2, 2, 0, 1, 1, 0, 2, 2, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 2, 1, 1, 1, 1, 2, 2, 0, 1, 0, 0, 1, 2, 1, 1, 2, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 2, 1, 1, 0, 2, 2, 2, 1, 1, 0, 1, 0, 1, 1, 0, 1, 2, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0, 0, 1, 1, 2, 1, 2, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 2, 1, 1, 0, 2, 1, 1, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 2, 1, 0, 1, 1, 2, 0, 0, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 2, 2, 1, 0, 1, 1, 1, 0, 1, 1, 2, 0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1, 2, 1, 1, 1

### Developed ANN

In [13]:
hyperparameters = Dict(
    "topology" => [10, 10],
    "learningRate" => 0.01,
    "maxEpochs" => 100,
    "repetitionsTraining" => 10,
    "validationRatio" => 0.15,
    "maxEpochsVal" => 10,
    "minLoss" => 0.0001
)

println("Training developed ANN")

general_results, class_results = modelCrossValidation(
    :ANN,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training developed ANN
Mean results for fold 1:
	accuracy: 0.83732681440415
		Class 1: 0.8488713318284425
		Class 2: 0.8329571106094807
		Class 3: 0.8287810383747178
	precision: 0.7543142503305811
		Class 1: 0.7399763858501089
		Class 2: 0.7848136672588153
		Class 3: 0.6952296534416196
	recall: 0.7553047404063205
		Class 1: 0.8231578947368423
		Class 2: 0.9187782805429865
		Class 3: 0.17924528301886794
	f1_score: 0.7139195989820467
		Class 1: 0.7784032373605024
		Class 2: 0.846020782333983
		Class 3: 0.23111104565238905
Mean results for fold 2:
	accuracy: 0.8135169331928884
		Class 1: 0.8415819209039548
		Class 2: 0.7925423728813559
		Class 3: 0.8216949152542373
	precision: 0.6653517983367581
		Class 1: 0.6792450095312383
		Class 2: 0.7457358111910524
		Class 3: 0.4170788067592087
	recall: 0.7279096045197739
		Class 1: 0.7538732394366197
		Class 2: 0.9282805429864254
		Class 3: 0.12452830188679245
	f1_score: 0.6722913690723156
		Class 1: 0.7139618852603653
		Class 2: 0.8227507107372934

### ANN from SciKit Learn

In [14]:
hyperparameters = Dict(
    :hidden_layer_sizes => [10, 10],
    :max_iter => 100,
    :early_stopping => true,
    :validation_fraction => 0.15,
    :tol => 0.0001,
    :learning_rate_init => 0.01
)

println("Training ANN model")

general_results, class_results = modelCrossValidation(
    :scikit_ANN,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training ANN model
Mean results for fold 1:
	accuracy: 0.8412106558504756
		Class 1: 0.8555304740406321
		Class 2: 0.8386004514672686
		Class 3: 0.8227990970654627
	precision: 0.7381135888222811
		Class 1: 0.7679180887372014
		Class 2: 0.8008048289738431
		Class 3: 0.5104166666666666
	recall: 0.7584650112866818
		Class 1: 0.7894736842105263
		Class 2: 0.9004524886877828
		Class 3: 0.3081761006289308
	f1_score: 0.7423021009288068
		Class 1: 0.7785467128027682
		Class 2: 0.8477103301384451
		Class 3: 0.38431372549019605
Mean results for fold 2:
	accuracy: 0.8400676689329375
		Class 1: 0.8305084745762712
		Class 2: 0.8723163841807909
		Class 3: 0.8090395480225989
	precision: 0.7390820774344621
		Class 1: 0.7829457364341085
		Class 2: 0.8301158301158301
		Class 3: 0.45454545454545453
	recall: 0.7559322033898305
		Class 1: 0.9140271493212669
		Class 2: 0.7570422535211268
		Class 3: 0.31446540880503143
	f1_score: 0.742146708443458
		Class 1: 0.8434237995824634
		Class 2: 0.7918968692449354
	

### Decision Tree

In [16]:
hyperparameters = Dict(
    :max_depth => 5,
    :criterion => "gini",
    :min_samples_split => 2,
)

println("Training DT model")

general_results, class_results = modelCrossValidation(
    :DT,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training DT model
Mean results for fold 1:
	accuracy: 0.8349953375558601
		Class 1: 0.8171557562076749
		Class 2: 0.8656884875846501
		Class 3: 0.8295711060948081
	precision: 0.7387693895860018
		Class 1: 0.7447552447552448
		Class 2: 0.82421875
		Class 3: 0.5689655172413793
	recall: 0.7562076749435666
		Class 1: 0.9638009049773756
		Class 2: 0.7403508771929824
		Class 3: 0.20754716981132076
	f1_score: 0.7246665896541603
		Class 1: 0.8402366863905327
		Class 2: 0.7800369685767097
		Class 3: 0.30414746543778803
Mean results for fold 2:
	accuracy: 0.8402553544639153
		Class 1: 0.8305084745762712
		Class 2: 0.8621468926553673
		Class 3: 0.8282485875706215
	precision: 0.7533604794322419
		Class 1: 0.7654545454545455
		Class 2: 0.8584070796460177
		Class 3: 0.5321100917431193
	recall: 0.7604519774011299
		Class 1: 0.9524886877828054
		Class 2: 0.6830985915492958
		Class 3: 0.36477987421383645
	f1_score: 0.7458180375153362
		Class 1: 0.8487903225806451
		Class 2: 0.7607843137254903
		Class 3

### Support Vector Machine

In [18]:
hyperparameters = Dict(
    :kernel => "linear",
    :C => 1.0,
    :gamma => "auto",
    :probability => true,
)

println("Training SVM model")

general_results, class_results = modelCrossValidation(
    :SVC,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training SVM model
Mean results for fold 1:
	accuracy: 0.8502743963026563
		Class 1: 0.8600451467268623
		Class 2: 0.8510158013544018
		Class 3: 0.8306997742663657
	precision: 0.7558854729087078
		Class 1: 0.7992565055762082
		Class 2: 0.8051181102362205
		Class 3: 0.5412844036697247
	recall: 0.7708803611738149
		Class 1: 0.7543859649122807
		Class 2: 0.9253393665158371
		Class 3: 0.3710691823899371
	f1_score: 0.7582416615418175
		Class 1: 0.7761732851985559
		Class 2: 0.8610526315789473
		Class 3: 0.4402985074626865
Mean results for fold 2:
	accuracy: 0.8490816815091449
		Class 1: 0.8361581920903954
		Class 2: 0.880225988700565
		Class 3: 0.8293785310734463
	precision: 0.7591112935125428
		Class 1: 0.7850287907869482
		Class 2: 0.8423076923076923
		Class 3: 0.5384615384615384
	recall: 0.7728813559322034
		Class 1: 0.9253393665158371
		Class 2: 0.7711267605633803
		Class 3: 0.3522012578616352
	f1_score: 0.7591190407131005
		Class 1: 0.8494288681204569
		Class 2: 0.8051470588235294
		Cl

### K-Nearest Neighbors

In [19]:
hyperparameters = Dict(
    :n_neighbors => 5,
    :weights => "uniform",
    :metric => "euclidean",
)

println("Training KNN model")

general_results, class_results = modelCrossValidation(
    :KNN,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training KNN model
Mean results for fold 1:
	accuracy: 0.7916511676492619
		Class 1: 0.7776523702031602
		Class 2: 0.7787810383747178
		Class 3: 0.8205417607223476
	precision: 0.6675603442544094
		Class 1: 0.7397260273972602
		Class 2: 0.336283185840708
		Class 3: 0.7404580152671756
	recall: 0.6884875846501128
		Class 1: 0.8552036199095022
		Class 2: 0.2389937106918239
		Class 3: 0.6807017543859649
	f1_score: 0.6740579923213379
		Class 1: 0.7932843651626442
		Class 2: 0.27941176470588236
		Class 3: 0.7093235831809872
Mean results for fold 2:
	accuracy: 0.8037345590347601
		Class 1: 0.7819209039548023
		Class 2: 0.8429378531073446
		Class 3: 0.7943502824858757
	precision: 0.692493126432572
		Class 1: 0.749498997995992
		Class 2: 0.7675276752767528
		Class 3: 0.4
	recall: 0.7096045197740114
		Class 1: 0.8461538461538461
		Class 2: 0.7323943661971831
		Class 3: 0.2893081761006289
	f1_score: 0.6978579781290131
		Class 1: 0.7948990435706694
		Class 2: 0.7495495495495496
		Class 3: 0.3357664

### Naive Bayes

In [23]:
hyperparameters = Dict(
    :var_smoothing => 1e-9
)

println("Training NB model")

general_results, class_results = modelCrossValidation(
    :NB,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training NB model
Mean results for fold 1:
	accuracy: 0.7994435640436384
		Class 1: 0.8306997742663657
		Class 2: 0.781038374717833
		Class 3: 0.7945823927765236
	precision: 0.6913456927583177
		Class 1: 0.7472527472527473
		Class 2: 0.756198347107438
		Class 3: 0.4108527131782946
	recall: 0.7031602708803611
		Class 1: 0.7157894736842105
		Class 2: 0.8280542986425339
		Class 3: 0.3333333333333333
	f1_score: 0.6956066570452416
		Class 1: 0.7311827956989246
		Class 2: 0.7904967602591793
		Class 3: 0.3680555555555556
Mean results for fold 2:
	accuracy: 0.8036962558651728
		Class 1: 0.8305084745762712
		Class 2: 0.7898305084745763
		Class 3: 0.7943502824858757
	precision: 0.6958844942135557
		Class 1: 0.7392857142857143
		Class 2: 0.770042194092827
		Class 3: 0.4122137404580153
	recall: 0.7073446327683616
		Class 1: 0.7288732394366197
		Class 2: 0.8257918552036199
		Class 3: 0.33962264150943394
	f1_score: 0.7004867644144345
		Class 1: 0.7340425531914894
		Class 2: 0.7969432314410481
		Clas

### Logistic Regression

In [25]:
hyperparameters = Dict(
    :max_iter => 200
)

println("Training LR model")

general_results, class_results = modelCrossValidation(
    :LR,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training LR model
Mean results for fold 1:
	accuracy: 0.8473801140387976
		Class 1: 0.8555304740406321
		Class 2: 0.8465011286681715
		Class 3: 0.835214446952596
	precision: 0.7511067792418931
		Class 1: 0.7643097643097643
		Class 2: 0.8072289156626506
		Class 3: 0.5714285714285714
	recall: 0.7686230248306998
		Class 1: 0.7964912280701755
		Class 2: 0.9095022624434389
		Class 3: 0.3270440251572327
	f1_score: 0.7522738729781638
		Class 1: 0.7800687285223367
		Class 2: 0.8553191489361701
		Class 3: 0.41600000000000004
Mean results for fold 2:
	accuracy: 0.846819240958856
		Class 1: 0.8757062146892656
		Class 2: 0.8361581920903954
		Class 3: 0.8248587570621468
	precision: 0.7496602187159088
		Class 1: 0.8107142857142857
		Class 2: 0.7928994082840237
		Class 3: 0.5204081632653061
	recall: 0.768361581920904
		Class 1: 0.7992957746478874
		Class 2: 0.9095022624434389
		Class 3: 0.32075471698113206
	f1_score: 0.752746599851334
		Class 1: 0.8049645390070923
		Class 2: 0.8472075869336144
		Clas

In [26]:
hyperparameters = Dict(
    :class_weight => "balanced"
)

println("Training balanced LR model")

general_results, class_results = modelCrossValidation(
    :LR,
    hyperparameters,
    inputs,
    targets,
    fold_indices;
    metricsToSave=metrics_to_save,
    normalizationType=:zeromean
);

Training balanced LR model
Mean results for fold 1:
	accuracy: 0.8400590576257714
		Class 1: 0.8623024830699775
		Class 2: 0.8408577878103838
		Class 3: 0.7979683972911964
	precision: 0.7802508123928651
		Class 1: 0.8196078431372549
		Class 2: 0.8716049382716049
		Class 3: 0.4557522123893805
	recall: 0.7505643340857788
		Class 1: 0.7333333333333333
		Class 2: 0.7986425339366516
		Class 3: 0.6477987421383647
	f1_score: 0.7608428247767465
		Class 1: 0.7740740740740741
		Class 2: 0.833530106257379
		Class 3: 0.535064935064935
Mean results for fold 2:
	accuracy: 0.8414095566408121
		Class 1: 0.8779661016949153
		Class 2: 0.8372881355932204
		Class 3: 0.7875706214689265
	precision: 0.7751786933932051
		Class 1: 0.852
		Class 2: 0.8497652582159625
		Class 3: 0.430622009569378
	recall: 0.751412429378531
		Class 1: 0.75
		Class 2: 0.8190045248868778
		Class 3: 0.5660377358490566
	f1_score: 0.7604591502182504
		Class 1: 0.7977528089887641
		Class 2: 0.8341013824884792
		Class 3: 0.4891304347826