In [3]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.optimizers import Adam

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import sys

import matplotlib.pyplot as plt

In [4]:
#folder_repository = "/Users/harun/Desktop/Uni/4. Semester/Data Mining/DataMining_TeamA"
folder_repository = "C:/Users/andre/Documents/Github/DataMining_TeamA"

folder_data = f"{folder_repository}/resources/data_classification"
folder_labels = f"{folder_repository}/resources/data_classification/labels"
folder_output = f"{folder_repository}/output/classification"

In [5]:
def accuracy(Y_test, predictions, dataset):
    # extract neurons with most activation
    predicted_classes = []
    for prediction in predictions:
        predicted_classes.append(np.argmax(prediction))

    accuracy = accuracy_score(Y_test, predicted_classes)
    print(f"{dataset}: {accuracy}")

In [18]:
def model_arch(data, dataset, testsplit: float, activation, hidden_layers: int, neurons: int, learn_rate_adam: float, epochs: int, batch_size: int, metrics: list):      

    # Split datasets into train and test
    X_train, X_test, Y_train, Y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=testsplit, shuffle = True)

    # labels are intepretated as ordinal, so the model need to know, that the labels are categories
    Y_train_encoded = to_categorical(Y_train)
    Y_test_encoded = to_categorical(Y_test)

        # Create NN model
    model = Sequential()
    for i in range(hidden_layers):
        model.add(Dense(neurons, activation=activation))
    model.add(Dense(30, activation='softmax'))
        
    optimizer=Adam(learning_rate=learn_rate_adam)

    # compile model
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=metrics)

        # train model
    model.fit(X_train, Y_train_encoded, epochs=epochs, batch_size=batch_size, verbose=0)
        
    predictions = model.predict(X_test)
    accuracy(Y_test, predictions, dataset[:2])
    #feature_importance(model)
    return model
        

In [47]:
dataset = 'x0_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

print(dataset)
data.drop(data.columns[[0,1,2,3,4,5,9,10,14,15,19,20,21,22,23,24]], axis=1, inplace=True)
print(data.head())

model_arch(data,
           dataset,
           testsplit=0.3, 
           activation='relu', 
           hidden_layers=2, 
           neurons=64, 
           learn_rate_adam=0.001, 
           epochs=8, 
           batch_size=30, 
           metrics=['accuracy'])


x0_with_labels.csv
          6         7         8        11        12        13        16  \
0  1.118922  1.031271  0.140322  0.906914  0.019883  1.144917  1.024906   
1  1.151829  1.129700 -0.141736  0.841082 -0.174482  1.020378  0.875244   
2  0.899079 -0.009652 -0.000686  1.039476 -0.013493  0.250556 -0.012422   
3 -0.052427 -0.102169  1.027937 -0.046632  1.032304  1.032806  0.137392   
4  0.121280  0.016012  0.000900  0.787623  1.072714  0.186112  1.101517   

         17        18  Labels  
0  1.000246  0.067353       1  
1  0.031351 -0.148845      24  
2  1.163990  1.071173      17  
3  0.956810  0.965369      14  
4  1.096771  0.850321      14  
x0: 0.9770114942528736


<keras.src.engine.sequential.Sequential at 0x222dccdf670>

In [46]:
dataset = 'x1_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

print(dataset)
print(data.head())

model_arch(data,
           dataset,
           testsplit=0.3, 
           activation='relu', 
           hidden_layers=2, 
           neurons=128, 
           learn_rate_adam=0.001, 
           epochs=8, 
           batch_size=30, 
           metrics=['accuracy'])

x1_with_labels.csv
          0         1         2         3         4         5         6  \
0  0.839236  1.026642  0.959892  1.112407  1.004854 -0.024214  0.926802   
1 -0.018542  0.889793  0.940000  0.147768 -0.134966  0.065973  0.994073   
2  0.110265 -0.074504  0.026439 -0.152501 -0.153533  0.148818 -0.149221   
3  0.861938  1.194261  0.994547 -0.110406  0.117239  0.981351  1.031873   
4  0.939356  1.090016 -0.111590  0.159667  1.176273  0.980706  0.926147   

          7         8  Labels  
0 -0.076529  1.051811      21  
1  0.938900  0.199534      12  
2  0.935089 -0.043243      28  
3  0.012474 -0.089401      15  
4  0.956374  0.078384      20  
x1: 0.967280163599182


<keras.src.engine.sequential.Sequential at 0x222dcbdd160>

In [45]:
dataset = 'x2_with_labels.csv'
data = pd.read_csv(f"{folder_data}/{dataset}")
df = pd.DataFrame(data)

print(dataset)
data.drop(data.columns[[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,22,26,27,28,29,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48]], axis=1, inplace=True)
print(data.head())

model_arch(data,
           dataset,
           testsplit=0.3, 
           activation='relu', 
           hidden_layers=2, 
           neurons=128, 
           learn_rate_adam=0.001, 
           epochs=8, 
           batch_size=30, 
           metrics=['accuracy'])

x2_with_labels.csv
         16        17        18        23        24        25        30  \
0  0.958406  0.038049 -0.024188  0.001503  0.966833 -0.136161  0.885701   
1  1.038711  1.139491  0.020461 -0.128193  1.088710  0.961542  0.911665   
2 -0.006220  1.228976  0.907557  0.023742  1.117061  0.223210  1.123130   
3  0.026585  0.767043  0.985374  0.082045  1.225849  0.844652  1.166751   
4  1.058880  0.982245  0.927112 -0.074164  0.955560  1.063187 -0.213305   

         31        32  Labels  
0 -0.054966  0.017958       7  
1  0.973231 -0.130260      20  
2  0.865701  0.962437      18  
3  1.025292  1.036790       8  
4  1.282447  1.089340       8  
x2: 0.7934560327198364


<keras.src.engine.sequential.Sequential at 0x222d92a9a60>