In [3]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.optimizers import Adam

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import sys

import matplotlib.pyplot as plt

In [4]:
#folder_repository = "/Users/harun/Desktop/Uni/4. Semester/Data Mining/DataMining_TeamA"
folder_repository = "C:/Users/andre/Documents/Github/DataMining_TeamA"

folder_data = f"{folder_repository}/resources/data_classification"
folder_labels = f"{folder_repository}/resources/data_classification/labels"
folder_output = f"{folder_repository}/output/classification"

In [5]:
def accuracy(Y_test, predictions, dataset):
    # extract neurons with most activation
    predicted_classes = []
    for prediction in predictions:
        predicted_classes.append(np.argmax(prediction))

    accuracy = accuracy_score(Y_test, predicted_classes)
    print(f"{dataset}: {accuracy}")

In [18]:
def model_arch(data, dataset, testsplit: float, activation, hidden_layers: int, neurons: int, learn_rate_adam: float, epochs: int, batch_size: int, metrics: list):      

    # Split datasets into train and test
    X_train, X_test, Y_train, Y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=testsplit, shuffle = True)

    # labels are intepretated as ordinal, so the model need to know, that the labels are categories
    Y_train_encoded = to_categorical(Y_train)
    Y_test_encoded = to_categorical(Y_test)

        # Create NN model
    model = Sequential()
    for i in range(hidden_layers):
        model.add(Dense(neurons, activation=activation))
    model.add(Dense(30, activation='softmax'))
        
    optimizer=Adam(learning_rate=learn_rate_adam)

    # compile model
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)

        # train model
    model.fit(X_train, Y_train_encoded, epochs=epochs, batch_size=batch_size, verbose=0)
        
    predictions = model.predict(X_test)
    accuracy(Y_test, predictions, dataset[:2])
    #feature_importance(model)
    return model
        

In [47]:
dataset = 'x0_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

print(dataset)
#data.drop(data.columns[[0,1,2,3,4,5,9,10,14,15,19,20,21,22,23,24]], axis=1, inplace=True)
print(data.head())

model_arch(data,
           dataset,
           testsplit=0.3, 
           activation='relu', 
           hidden_layers=2, 
           neurons=64, 
           learn_rate_adam=0.001, 
           epochs=8, 
           batch_size=30, 
           metrics=['accuracy'])


x0_with_labels.csv
          6         7         8        11        12        13        16  \
0  1.118922  1.031271  0.140322  0.906914  0.019883  1.144917  1.024906   
1  1.151829  1.129700 -0.141736  0.841082 -0.174482  1.020378  0.875244   
2  0.899079 -0.009652 -0.000686  1.039476 -0.013493  0.250556 -0.012422   
3 -0.052427 -0.102169  1.027937 -0.046632  1.032304  1.032806  0.137392   
4  0.121280  0.016012  0.000900  0.787623  1.072714  0.186112  1.101517   

         17        18  Labels  
0  1.000246  0.067353       1  
1  0.031351 -0.148845      24  
2  1.163990  1.071173      17  
3  0.956810  0.965369      14  
4  1.096771  0.850321      14  
x0: 0.9770114942528736


<keras.src.engine.sequential.Sequential at 0x222dccdf670>

In [None]:
filename=f"{folder_output}/acc2.csv"
mode="w"
sys.stdout = open(filename, mode)


testsplit=[0.15, 0.25, 0.5] #[0.1, 0.15, 0.2, 0.25, 0.3, 0.5]
activation=['relu', 'sigmoid', 'linear'] #['relu', 'sigmoid', 'tanh', 'linear']
hidden_layers=[2, 3, 4, 5]
neurons=[32, 64, 128]
learn_rate_adam=[0.0001]
epochs=[4, 8, 12, 16, 20]
batch_size=[32, 64, 128]
metrics=[['accuracy'], ['top_k_accuracy']] #[['accuracy'], ['top_k_accuracy'], ['true_positives']]

print(f"testsplit;activation;hidden_layers;neurons;learn_rate_adam;epochs;batch_size;metrics;x0;acc_x0;x1;acc_x1;x2;acc_x2;0")

for t in testsplit:
    for a in activation:
        for h in hidden_layers:
            for n in neurons:
                for l in learn_rate_adam:
                    for e in epochs:
                        for b in batch_size:
                            for m in metrics:
                                print(f"{t};{a};{h};{n};{l};{e};{b};{m};", end="")
                                model_arch(testsplit=t, activation=a, hidden_layers=h , neurons=n, learn_rate_adam=l, epochs=e, batch_size=b, metrics=m)
                                print("0")