# Compare Perform PCA and LDA & CNN

# Install and import relevant libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout

# Load Data for PCA and LDA

In [None]:
df = pd.read_csv("../Resources/Datasets/stand_norm_e1.txt", delimiter='\s+')
new_values = pd.read_csv("../Resources/Datasets/y2_e1.txt", header=None, names=['New_Class'], delimiter='\s+')
df['Class'] = new_values['New_Class']

# Load Data for CNN

In [4]:
dataset_e1 = pd.read_csv("../Resources/Datasets/stand_norm_e1.txt", delimiter='\s+')
dataset_e2 = pd.read_csv("../Resources/Datasets/stand_norm_e2.txt", delimiter='\s+')
dataset = pd.concat([dataset_e1, dataset_e2], axis=0)
new_values = pd.read_csv("y2_e1.txt", header=None, names=['New_Class'], delimiter='\s+')
dataset['Class'] = new_values['New_Class']

# Split Data into Features and Target variable for PCA and LDA

In [None]:
X_pca_lda = df.drop('Class', axis=1)
y_pca_lda = df['Class']

# Split Data for CNN

In [6]:
X_cnn = dataset.iloc[:, 1:5].values
y_cnn = dataset['Class'].values

# Standardize Features for PCA and LDA

In [7]:
scaler = StandardScaler()
X_scaled_pca_lda = scaler.fit_transform(X_pca_lda)

# Split Data into Training and Testing sets for PCA and LDA

In [8]:
X_train_pca_lda, X_test_pca_lda, y_train_pca_lda, y_test_pca_lda = train_test_split(X_scaled_pca_lda, y_pca_lda, test_size=0.2)

# Split Data into Training and Testing sets for CNN

In [None]:
X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X_cnn, y_cnn_encoded, test_size=0.2, random_state=42)

# Define CNN Model

In [10]:
model = Sequential()
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same', input_shape=(X_train_cnn.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_train_cnn.shape[1], activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_cnn, y_train_cnn, epochs=20, batch_size=20, verbose=1)

# Define PCA Model

In [11]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_pca_lda)
X_test_pca = pca.transform(X_test_pca_lda)

# Define LDA Model

In [12]:
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train_pca_lda, y_train_pca_lda)
X_test_lda = lda.transform(X_test_pca_lda)

# Evaluate Models

In [13]:
pca_score = accuracy_score(y_test_pca_lda, lda.predict(X_test_pca_lda))
lda_score = accuracy_score(y_test_pca_lda, lda.predict(X_test_pca_lda))
cnn_loss, cnn_accuracy = model.evaluate(X_test_cnn, y_test_cnn, verbose=0)

# Compare Performance

In [None]:
if pca_score > lda_score and pca_score > cnn_accuracy:
    print("PCA performs the best with a score of {:.2f}".format(pca_score))
elif lda_score > pca_score and lda_score > cnn_accuracy:
    print("LDA performs the best with a score of {:.2f}".format(lda_score))
else:
    print("CNN performs the best with an accuracy of {:.2f}%".format(cnn_accuracy*100))