# PCA Dimensionality Reduction with Wine Dataset 

In [None]:
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ydata_profiling import ProfileReport

%matplotlib inline

from IPython.display import display
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Load and pre-process data

In [None]:
# Load wine dataset
wine = datasets.load_wine()

# Print relevant information
print("Data", np.shape(wine.data))
print("Features names", wine.feature_names)
print("Target", np.shape(wine.target))
print("Target names", wine.target_names)

In [None]:
# Convert data to pandas DataFrame
df = pd.DataFrame(wine.data, columns=wine.feature_names)
display(df.head())
display(df.describe())

# To display the report in a Jupyter notebook, run:
profile = ProfileReport(df, title="Profiling Report")
profile.to_file(output_file="profiling.html")

### Vizualizations

In [None]:
# Load wine dataset
wine = datasets.load_wine()

# Target variable
y = wine.target

# Features reduced through PCA
X_norm = StandardScaler().fit_transform(wine.data)
X_reduced = PCA(n_components=3).fit_transform(X_norm)


# Plotting the first three features
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(wine.data[:, 0], wine.data[:, 1], wine.data[:, 2], c=y, cmap='viridis', s=50)
ax.set_xlabel("1st feature: " + wine.feature_names[0])
ax.set_ylabel("2nd feature: " + wine.feature_names[1])
ax.set_zlabel("3rd feature: " + wine.feature_names[2])

ax.set_box_aspect([1,1,1])  # equal aspect ratio
plt.title('First Three Features')
plt.show()

# Plotting the PCA-reduced features
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(X_reduced[:, 0], X_reduced[:, 1], X_reduced[:, 2], c=y, cmap='viridis', s=50)
ax.set_xlabel('1st eigenvector')
ax.set_ylabel('2nd eigenvector')
ax.set_zlabel('3rd eigenvector')

ax.set_box_aspect([1,1,1])  # equal aspect ratio
plt.title('PCA-Reduced Features')
plt.show()

In [None]:
plt.figure()
plt.title("First two features")
plt.scatter(wine.data[:, 0], wine.data[:, 1], c=wine.target, cmap="Set1")
plt.xlabel("1st feature")
plt.ylabel("2nd feature")

plt.figure()
plt.title("First two PCA components")
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=wine.target, cmap="Set1")
plt.xlabel("1st eigenvector")
plt.ylabel("2nd eigenvector")