In [10]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [2]:
# Open the CSV file
df = pd.read_csv('Data_Preproc_OneHotEncoding.csv')
df.head()


Unnamed: 0,cap-diameter,stem-height,stem-width,0,1,2,3,4,5,6,...,79,80,81,82,83,84,85,86,87,88
0,15.26,16.95,17.09,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,16.6,17.99,18.19,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,14.07,17.8,17.74,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,14.17,15.77,15.98,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,14.64,16.53,17.2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [3]:
# Read the labels
print("The Shape of One-Hot Encoded Data is: ", df.shape)
labels = pd.read_csv('Labels.csv')

The Shape of One-Hot Encoded Data is:  (61069, 92)


In [5]:
# Extract values from the DataFrame
X = df.values
y = labels.values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [16]:
mlp = MLPClassifier(hidden_layer_sizes=(10, 5), 
                    activation='relu', 
                    random_state=42, 
                    early_stopping=True, 
                    max_iter=100,
                    batch_size=128,
                    verbose=True)

mlp.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 0.62321974
Validation score: 0.750709
Iteration 2, loss = 0.45175187
Validation score: 0.841738
Iteration 3, loss = 0.32745739
Validation score: 0.893691
Iteration 4, loss = 0.24925821
Validation score: 0.917485
Iteration 5, loss = 0.19609047
Validation score: 0.939315
Iteration 6, loss = 0.16085304
Validation score: 0.951321
Iteration 7, loss = 0.13616556
Validation score: 0.953285
Iteration 8, loss = 0.11823820
Validation score: 0.960926
Iteration 9, loss = 0.10396530
Validation score: 0.965073
Iteration 10, loss = 0.09118083
Validation score: 0.963545
Iteration 11, loss = 0.07955139
Validation score: 0.975551
Iteration 12, loss = 0.06817555
Validation score: 0.981227
Iteration 13, loss = 0.05836032
Validation score: 0.984065
Iteration 14, loss = 0.04983447
Validation score: 0.985156
Iteration 15, loss = 0.04331576
Validation score: 0.987776
Iteration 16, loss = 0.03869693
Validation score: 0.989522
Iteration 17, loss = 0.03370960
Validation score: 0.990613
Iterat

In [17]:
# Predict the labels of the test set: y_pred
y_pred = mlp.predict(X_test)

# Compute and print the confusion matrix and classification report
print("Accuracy: {}".format(mlp.score(X_test, y_test)))

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred))

# Print the classification report
print(classification_report(y_test, y_pred))


Accuracy: 0.9979696096410794
[[6764   17]
 [  14 8473]]
              precision    recall  f1-score   support

           e       1.00      1.00      1.00      6781
           p       1.00      1.00      1.00      8487

    accuracy                           1.00     15268
   macro avg       1.00      1.00      1.00     15268
weighted avg       1.00      1.00      1.00     15268



In [14]:
# PCA with 51 components
from sklearn.decomposition import PCA

pca = PCA(n_components=51)
pca.fit(X)
X_pca = pca.transform(X)

# Split the data into training and testing sets
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(X_pca, y, test_size=0.25, random_state=42)

mlp_pca = MLPClassifier(hidden_layer_sizes=(10, 5),
                        activation='relu',
                        random_state=42,
                        early_stopping=True,
                        max_iter=100,
                        batch_size=128,
                        verbose=True)

mlp_pca.fit(X_train_pca, y_train_pca)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 0.59851891
Validation score: 0.796333
Iteration 2, loss = 0.41215731
Validation score: 0.878629
Iteration 3, loss = 0.30617104
Validation score: 0.907662
Iteration 4, loss = 0.23559579
Validation score: 0.928618
Iteration 5, loss = 0.18304190
Validation score: 0.943462
Iteration 6, loss = 0.14530654
Validation score: 0.954813
Iteration 7, loss = 0.11864977
Validation score: 0.964418
Iteration 8, loss = 0.09980906
Validation score: 0.970312
Iteration 9, loss = 0.08463902
Validation score: 0.975551
Iteration 10, loss = 0.07401303
Validation score: 0.979480
Iteration 11, loss = 0.06440196
Validation score: 0.981227
Iteration 12, loss = 0.05678358
Validation score: 0.982973
Iteration 13, loss = 0.05089071
Validation score: 0.982537
Iteration 14, loss = 0.04617216
Validation score: 0.984283
Iteration 15, loss = 0.04244176
Validation score: 0.986248
Iteration 16, loss = 0.03844934
Validation score: 0.986684
Iteration 17, loss = 0.03540565
Validation score: 0.987557
Iterat

In [15]:
# Predict the labels of the test set: y_pred
y_pred_pca = mlp_pca.predict(X_test_pca)

# Compute and print the confusion matrix and classification report
print("Accuracy: {}".format(mlp_pca.score(X_test_pca, y_test_pca)))

# Print the confusion matrix
print(confusion_matrix(y_test_pca, y_pred_pca))

# Print the classification report
print(classification_report(y_test_pca, y_pred_pca))

Accuracy: 0.9984935813466073
[[6772    9]
 [  14 8473]]
              precision    recall  f1-score   support

           e       1.00      1.00      1.00      6781
           p       1.00      1.00      1.00      8487

    accuracy                           1.00     15268
   macro avg       1.00      1.00      1.00     15268
weighted avg       1.00      1.00      1.00     15268

