# Compare Perform PCA and LDA & CNN

# Install and import relevant libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout

KeyboardInterrupt: 

# Load Data for PCA and LDA

In [3]:
# Charger le DataFrame initial
x1 = pd.read_csv("../Ressources/Datasets/stand_norm_e1.txt", header=None, delimiter=' ', names=[
    'Feature1',
    'Feature2',
    'Feature3',
    'Feature4',
    'Feature5',
    'Feature6',
    'Feature7'
])
x2 = pd.read_csv("../Ressources/Datasets/stand_norm_e2.txt",  header=None, delimiter=' ', names=[
    'Feature8',
    'Feature9',
    'Feature10',
    'Feature11',
    'Feature12',
    'Feature13',
    'Feature14'
])

# Charger les nouvelles valeurs depuis y2_e1.txt
new_values = pd.read_csv("../Ressources/Datasets/y2_e1.txt", header=None, names=['Class'], delimiter=' ')

# Concaténer les DataFrames avec les colonnes 'Class' et 'New_Class'
df = pd.concat([new_values, x1, x2], axis=1)

df.head(len(df))

Unnamed: 0,Class,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,Feature10,Feature11,Feature12,Feature13,Feature14
0,0.0,35.533354,220.110174,185.322913,714.120693,3719.146472,0.199611,0.807951,31.626118,345.489585,465.024867,353.379868,679.766033,0.199611,1.172721
1,0.0,12.153222,290.799141,371.267847,825.465771,235.087734,0.233832,0.710784,14.583007,297.938838,238.719070,1180.971365,1441.838840,0.233832,0.871765
2,0.0,15.687117,245.596011,432.266516,969.556869,4529.009906,0.266850,0.607620,14.940141,375.341165,439.139407,1028.171307,2469.411191,0.266850,0.786855
3,0.0,4.130666,266.347021,1400.323393,1503.693799,2897.194341,0.298691,0.572977,31.846588,304.508444,594.597040,1049.210472,1477.424605,0.298691,0.852576
4,0.0,1.666385,72.802457,107.825008,1057.444093,2901.767221,0.329382,0.613656,5.950176,94.237243,73.547132,396.930317,669.509993,0.329382,0.856799
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12805,0.0,3.851420,115.558659,38.865765,512.790147,1403.891146,1.683803,0.981003,20.521331,255.291245,69.638338,58.645717,248.293827,1.683803,1.334752
12806,0.0,26.924435,195.108723,36.567537,559.356964,287.251317,1.742832,1.241980,46.152025,280.984170,22.588877,97.207134,97.334776,1.742832,1.388819
12807,0.0,10.201896,92.691810,60.630080,111.331959,426.887229,1.803927,1.228706,18.222472,124.216958,91.341961,56.843374,493.068512,1.803927,1.363538
12808,0.0,24.657509,70.277027,23.014987,58.710847,748.486167,1.867127,1.103956,42.625600,143.469790,41.622062,86.254182,290.279950,1.867127,1.278275


# Load Data for CNN with 2 dataset concatenated

In [32]:
dataset_e1 = pd.read_csv("../Ressources/Datasets/stand_norm_e1.txt", header=None, delimiter=' ', names=[
    'Feature1',
    'Feature2',
    'Feature3',
    'Feature4',
    'Feature5',
    'Feature6',
    'Feature7'
])
dataset_e2 = pd.read_csv("../Ressources/Datasets/stand_norm_e2.txt",  header=None, delimiter=' ', names=[
    'Feature8',
    'Feature9',
    'Feature10',
    'Feature11',
    'Feature12',
    'Feature13',
    'Feature14'
])
new_values = pd.read_csv("y2_e1.txt", header=None, names=['Class'], delimiter=' ')

# Concaténe les DataFrames avec les colonnes 'Class' et 'New_Class'
dataset = pd.concat([new_values, dataset_e1, dataset_e2], axis=1)

dataset.head(len(dataset))

# Split Data into Features and Target variable for PCA and LDA

In [33]:
X_pca_lda = df.drop('Class', axis=1)
y_pca_lda = df['Class']

# Split Data for CNN

In [34]:
X_cnn = dataset.iloc[:, 1:5].values
y_cnn = dataset['Class'].values
le = LabelEncoder()
y_cnn_encoded = le.fit_transform(y_cnn)
y_cnn_encoded = tf.keras.utils.to_categorical(y_cnn_encoded)

# Standardize Features for PCA and LDA

In [35]:
scaler = StandardScaler()
X_scaled_pca_lda = scaler.fit_transform(X_pca_lda)

# Split Data into Training and Testing sets for PCA and LDA

In [36]:
X_train_pca_lda, X_test_pca_lda, y_train_pca_lda, y_test_pca_lda = train_test_split(X_scaled_pca_lda, y_pca_lda, test_size=0.2)

# Split Data into Training and Testing sets for CNN

In [37]:
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X_cnn, y_cnn_encoded, test_size=0.2, random_state=42)

# Define CNN Model

In [38]:
model = Sequential()
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same', input_shape=(X_train_cnn.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_train_cnn.shape[1], activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_cnn, y_train_cnn, epochs=20, batch_size=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7a63d4e8f040>

# Define PCA Model

In [39]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_pca_lda)
X_test_pca = pca.transform(X_test_pca_lda)

# Define LDA Model

In [40]:
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train_pca_lda, y_train_pca_lda)
X_test_lda = lda.transform(X_test_pca_lda)

# Evaluate Models

In [41]:
pca_score = accuracy_score(y_test_pca_lda, lda.predict(X_test_pca_lda))
lda_score = accuracy_score(y_test_pca_lda, lda.predict(X_test_pca_lda))
cnn_loss, cnn_accuracy = model.evaluate(X_test_cnn, y_test_cnn, verbose=0)

# Compare Performance

In [42]:
if pca_score > lda_score and pca_score > cnn_accuracy:
    print("PCA performs the best with a score of {:.2f}".format(pca_score))
elif lda_score > pca_score and lda_score > cnn_accuracy:
    print("LDA performs the best with a score of {:.2f}".format(lda_score))
else:
    print("CNN performs the best with an accuracy of {:.2f}%".format(cnn_accuracy*100))

CNN performs the best with an accuracy of 77.91%


Lors de la concaténation des deux ensembles de données, à savoir stand_norm_e1 et *stand_norm_e2*, nous observons une diminution des performances, avec une précision maximale obtenue par le réseau de neurones convolutifs (CNN) de **77,91 %**. En revanche, lorsque le premier ensemble de données stand_norm_e1 est utilisé isolément, la performance du CNN s'améliore, atteignant une précision de **78,81 %.**