# Unsupervised Learning | Dimensionality Reduction (PCA)

Today's Outline:
- Intuition
- Full Case-study

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

==========

## Principle Component Analysis (PCA) | Intuition (Breast Cancer)

Scikit-Learn Decomposition Module: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition

Visualizing PCA (Demo): https://setosa.io/ev/principal-component-analysis/

PCA Visualization Using Plotly: https://plotly.com/python/pca-visualization/

### Creating Data

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()

In [None]:
cancer.keys()

In [None]:
print(cancer['DESCR'])

In [None]:
data = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
data.head()

### Data Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
scaler.fit(data)

In [None]:
scaled_data = scaler.transform(data)
scaled_data

### Data Reduction (PCA)

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
pca.fit(scaled_data)

In [None]:
x_pca = pca.transform(scaled_data)

In [None]:
scaled_data.shape

In [None]:
x_pca.shape

### Interpreting Results

In [None]:
plt.figure(figsize=(10,5))
plt.scatter(x_pca[:,0],x_pca[:,1],c=cancer['target'],cmap='plasma')
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')

In [None]:
pca.components_

In [None]:
pca.explained_variance_

In [None]:
pca.explained_variance_ratio_

In [None]:
data_comp = pd.DataFrame(pca.components_,columns=cancer['feature_names'])
data_comp

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(data_comp,cmap='plasma')

==========

## PCA | Full Case-study (Wine)

Wine Dataset: https://archive.ics.uci.edu/ml/datasets/wine

### Importing Libraries & Dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

In [None]:
dataset = pd.read_csv('data/wine.csv')
dataset.head()

In [None]:
X = dataset.iloc[:, :-1].values
X

In [None]:
y = dataset.iloc[:, -1].values
y

### Extracting Features

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

### Reducing Dimesions (PCA)

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components = 2)

In [None]:
X_train = pca.fit_transform(X_train)

In [None]:
X_test = pca.transform(X_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(X_train[:,0],X_train[:,1],c=y_train,cmap='rainbow')
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')

### Training Another Model (Logistic Regression)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
classifier = LogisticRegression(random_state = 0)

In [None]:
classifier.fit(X_train, y_train)

### Evaluating Final Model

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
y_pred = classifier.predict(X_test)
y_pred

In [None]:
y_test

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
accuracy_score(y_test, y_pred)

==========

# THANK YOU!