# Principal Component Analysis

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('wine.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# 

## Splitting the dataset

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature scaling

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_fs = sc.fit_transform(X_train)
X_test_fs = sc.transform(X_test)

## Applying the PCA

In [5]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 2)
X_train_pca = pca.fit_transform(X_train_fs)
X_test_pca = pca.transform(X_test_fs)

## Training the logistic regression on the train set

In [6]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train_pca, y_train)
# classifier.fit(X_train_fs, y_train)

LogisticRegression(random_state=0)

## Applying the logistic regression model on the train set

In [7]:
y_train_re = y_train.reshape(len(y_train), 1)

y_train_pred = classifier.predict(X_train_pca)
# y_train_pred = classifier.predict(X_train_fs)
y_train_pred = y_train_pred.reshape(len(y_train_pred), 1)


y_train_pred_proba = classifier.predict_proba(X_train_pca)
# y_train_pred_proba = classifier.predict_proba(X_train_fs)

## Model performance on the train set

In [8]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, matthews_corrcoef, cohen_kappa_score, roc_auc_score

cm_train = confusion_matrix(y_train, y_train_pred)
cr_train = classification_report(y_train, y_train_pred)
print(cm_train)
print(cr_train)

acc_train = accuracy_score(y_train, y_train_pred)
# prec_train = precision_score(y_train, y_train_pred)
ck_train = cohen_kappa_score(y_train, y_train_pred)
mcc_train = matthews_corrcoef(y_train, y_train_pred)
# roc_auc_train = roc_auc_score(y_train, y_train_pred)
# print(f'Accuracy = {acc_train:.2f}; Precision = {prec_train:.2f}; Kappa = {ck_train:.2f}; MCC = {mcc_train:.2f}; ROC AUC = {roc_auc_train:.2f}')
print(f'Accuracy = {acc_train:.2f}; Kappa = {ck_train:.2f}; MCC = {mcc_train:.2f}')

[[43  2  0]
 [ 2 52  1]
 [ 0  0 42]]
              precision    recall  f1-score   support

           1       0.96      0.96      0.96        45
           2       0.96      0.95      0.95        55
           3       0.98      1.00      0.99        42

    accuracy                           0.96       142
   macro avg       0.97      0.97      0.97       142
weighted avg       0.96      0.96      0.96       142

Accuracy = 0.96; Kappa = 0.95; MCC = 0.95


## Applying the logistic regression to the test set

In [9]:
y_test_re = y_test.reshape(len(y_test), 1)

y_test_pred = classifier.predict(X_test_pca)
# y_test_pred = classifier.predict(X_test_fs)
y_test_pred = y_test_pred.reshape(len(y_test_pred), 1)

y_test_pred_proba = classifier.predict_proba(X_test_pca)
# y_test_pred_proba = classifier.predict_proba(X_test_fs)

## Model performance on the test set

In [10]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, matthews_corrcoef, cohen_kappa_score, roc_auc_score

cm_test = confusion_matrix(y_test, y_test_pred)
cr_test = classification_report(y_test, y_test_pred)
print(cm_test)
print(cr_test)

acc_test = accuracy_score(y_test, y_test_pred)
# prec_test = precision_score(y_test, y_test_pred)
ck_test = cohen_kappa_score(y_test, y_test_pred)
mcc_test = matthews_corrcoef(y_test, y_test_pred)
# roc_auc_test = roc_auc_score(y_test, y_test_pred)
# print(f'Accuracy = {acc_test:.2f}; Precision = {prec_test:.2f}; Kappa = {ck_test:.2f}; MCC = {mcc_test:.2f}; ROC AUC = {roc_auc_test:.2f}')
print(f'Accuracy = {acc_test:.2f}; Kappa = {ck_test:.2f}; MCC = {mcc_test:.2f}')

[[14  0  0]
 [ 1 15  0]
 [ 0  0  6]]
              precision    recall  f1-score   support

           1       0.93      1.00      0.97        14
           2       1.00      0.94      0.97        16
           3       1.00      1.00      1.00         6

    accuracy                           0.97        36
   macro avg       0.98      0.98      0.98        36
weighted avg       0.97      0.97      0.97        36

Accuracy = 0.97; Kappa = 0.96; MCC = 0.96
