In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.optimizers import Adam

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import sys

import matplotlib.pyplot as plt

In [3]:
#folder_repository = "/Users/harun/Desktop/Uni/4. Semester/Data Mining/DataMining_TeamA"
folder_repository = "C:/Users/andre/Documents/Github/DataMining_TeamA"

folder_data = f"{folder_repository}/resources/data_classification"
folder_labels = f"{folder_repository}/resources/data_classification/labels"
folder_output = f"{folder_repository}/output/classification"

In [16]:
def accuracy(Y_test, predictions, dataset):
    # extract neurons with most activation
    predicted_classes = []
    for prediction in predictions:
        predicted_classes.append(np.argmax(prediction))

    accuracy = accuracy_score(Y_test, predicted_classes)
    print(f"{dataset}: {accuracy}")
    return accuracy

In [24]:
def model_arch(data, dataset, testsplit: float, activation, hidden_layers: int, neurons: int, learn_rate_adam: float, epochs: int, batch_size: int, metrics: list):      

    # Split datasets into train and test
    X_train, X_test, Y_train, Y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=testsplit, shuffle = True)

    # labels are intepretated as ordinal, so the model need to know, that the labels are categories
    Y_train_encoded = to_categorical(Y_train)
    Y_test_encoded = to_categorical(Y_test)

        # Create NN model
    model = Sequential()
    for i in range(hidden_layers):
        model.add(Dense(neurons, activation=activation))
    model.add(Dense(30, activation='softmax'))
        
    optimizer=Adam(learning_rate=learn_rate_adam)

    # compile model
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)

        # train model
    model.fit(X_train, Y_train_encoded, epochs=epochs, batch_size=batch_size, verbose=0)
        
    predictions = model.predict(X_test)
    acc = accuracy(Y_test, predictions, dataset[:2])
    #feature_importance(model)
    return {"model":model, "acc":acc}
        

In [25]:
dataset = 'x0_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

print(dataset)
#data.drop(data.columns[[0,1,2,3,4,5,9,10,14,15,19,20,21,22,23,24]], axis=1, inplace=True)
#print(data.head())

model_arch(data,
           dataset,
           testsplit=0.2, 
           activation='relu', 
           hidden_layers=2, 
           neurons=128, 
           learn_rate_adam=0.001, 
           epochs=24, 
           batch_size=30, 
           metrics=['accuracy'])


x0_with_labels.csv
x0: 0.990269716626835


{'model': <keras.src.engine.sequential.Sequential at 0x26e8a6c9b20>,
 'acc': 0.990269716626835}

In [30]:
dataset = 'x0_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

models = []
for i in range(25):
    print(i)
    models.append(model_arch(data,
           dataset,
           testsplit=0.2, 
           activation='relu', 
           hidden_layers=2, 
           neurons=128, 
           learn_rate_adam=0.001, 
           epochs=24, 
           batch_size=30, 
           metrics=['accuracy']))
    
    

0
x0: 0.9895868897234551
1
x0: 0.9889040628200751
2
x0: 0.9909525435302151
3
x0: 0.9875384090133151
4
x0: 0.9877091157391601
5
x0: 0.9877091157391601
6
x0: 0.9885626493683851
7
x0: 0.9849778081256402
8
x0: 0.9885626493683851
9
x0: 0.9894161829976101
10
x0: 0.9871969955616251
11
x0: 0.9889040628200751
12
x0: 0.9878798224650052
13
x0: 0.990269716626835
14
x0: 0.9854899283031752
15
x0: 0.9890747695459201
16
x0: 0.9889040628200751
17
x0: 0.9899283031751451
18
x0: 0.990269716626835
19
x0: 0.9887333560942301
20
x0: 0.9868555821099352
21
x0: 0.9866848753840901
22
x0: 0.9897575964493001
23
x0: 0.9897575964493001
24
x0: 0.9900990099009901


In [31]:
current_acc = 0
current_model = models[0]["model"]
for d in models:
    if d["acc"] > current_acc:
        current_acc=d["acc"]
        current_model=d["model"]
print(current_acc)

0.9909525435302151
