In [14]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the Wine dataset
wine = load_wine()
X, y = wine.data, wine.target

print(pd.DataFrame(X))
X.shape

        0     1     2     3      4     5     6     7     8      9     10  \
0    14.23  1.71  2.43  15.6  127.0  2.80  3.06  0.28  2.29   5.64  1.04   
1    13.20  1.78  2.14  11.2  100.0  2.65  2.76  0.26  1.28   4.38  1.05   
2    13.16  2.36  2.67  18.6  101.0  2.80  3.24  0.30  2.81   5.68  1.03   
3    14.37  1.95  2.50  16.8  113.0  3.85  3.49  0.24  2.18   7.80  0.86   
4    13.24  2.59  2.87  21.0  118.0  2.80  2.69  0.39  1.82   4.32  1.04   
..     ...   ...   ...   ...    ...   ...   ...   ...   ...    ...   ...   
173  13.71  5.65  2.45  20.5   95.0  1.68  0.61  0.52  1.06   7.70  0.64   
174  13.40  3.91  2.48  23.0  102.0  1.80  0.75  0.43  1.41   7.30  0.70   
175  13.27  4.28  2.26  20.0  120.0  1.59  0.69  0.43  1.35  10.20  0.59   
176  13.17  2.59  2.37  20.0  120.0  1.65  0.68  0.53  1.46   9.30  0.60   
177  14.13  4.10  2.74  24.5   96.0  2.05  0.76  0.56  1.35   9.20  0.61   

       11      12  
0    3.92  1065.0  
1    3.40  1050.0  
2    3.17  1185.0  
3    3.

(178, 13)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# Apply PCA for dimensionality reduction
pca = PCA(n_components=5)
X_train_reduced = pca.fit_transform(X_train)
X_test_reduced = pca.transform(X_test)

#Apply PCA for dimensionality reduction on Scaled data
pca_ = PCA(n_components=5)
X_train_reduced_ = pca_.fit_transform(X_train_scaled)
X_test_reduced_ = pca_.transform(X_test_scaled)

In [17]:
# Build and train the Gaussian Naive Bayes classifier
# Model for Unscaled Data
gnb = GaussianNB() 
gnb.fit(X_train_reduced, y_train)

# Model for Scaled Data
gnb_ = GaussianNB() 
gnb_.fit(X_train_reduced_, y_train)

# Model for The Original Data (Non-PCA)
gnb1 = GaussianNB()
gnb1.fit(X_train, y_train)

In [18]:
# Predictions on training and test data
y_train_pred_ = gnb_.predict(X_train_reduced_)
y_test_pred_ = gnb_.predict(X_test_reduced_)

y_train_pred = gnb.predict(X_train_reduced)
y_test_pred = gnb.predict(X_test_reduced)

y_train_pred1 = gnb1.predict(X_train)
y_test_pred1 = gnb1.predict(X_test)

# Calculate accuracy
train_accuracy_ = accuracy_score(y_train_, y_train_pred_)
test_accuracy_ = accuracy_score(y_test_, y_test_pred_)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

train_accuracy1 = accuracy_score(y_train, y_train_pred1)
test_accuracy1 = accuracy_score(y_test, y_test_pred1)

print(f"Training Accuracy for the scaled dataset: {train_accuracy_:.4f}")
print(f"Test Accuracy for the scaled dataset: {test_accuracy_:.4f}")
print()
print(f"Training Accuracy after PCA, unscaled: {train_accuracy:.4f}")
print(f"Test Accuracy after PCA, unscaled: {test_accuracy:.4f}")
print()
print(f"Training Accuracy for the original data, unscaled: {train_accuracy1:.4f}")
print(f"Test Accuracy for the original data, unscaled: {test_accuracy1:.4f}")

Training Accuracy for the scaled dataset: 0.9648
Test Accuracy for the scaled dataset: 1.0000

Training Accuracy after PCA, unscaled: 0.9366
Test Accuracy after PCA, unscaled: 1.0000

Training Accuracy for the original data, unscaled: 0.9718
Test Accuracy for the original data, unscaled: 1.0000


In [20]:
pca.explained_variance_ratio_

array([9.97808300e-01, 1.99598478e-03, 1.07916251e-04, 5.64421443e-05,
       1.36651225e-05])