In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
iris = sns.load_dataset('iris')
iris.head()

In [None]:
X = iris.drop('species', axis=1)
Y = iris['species']

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=123)

## Unsupervised Learning: Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA

model = PCA(n_components=2)

model.fit(X)

X_2D = model.transform(X)

In [None]:
X.head()

In [None]:
X_2D[:10]

In [None]:
iris['PCA1'] = X_2D[:,0]
iris['PCA2'] = X_2D[:,1]
iris.head()

In [None]:
sns.lmplot('PCA1', 'PCA2', hue='species', data=iris, fit_reg=False);

## Unsupervised Learning: Clustering

In [None]:
from sklearn.mixture import GaussianMixture as GMM

model = GMM(n_components=3, covariance_type='full')

model.fit(X)

gmm_predictions = model.predict(X)

In [None]:
iris['cluster'] = gmm_predictions
iris.head()

In [None]:
iris['cluster'].value_counts()

In [None]:
sns.lmplot('PCA1', 'PCA2', data=iris, hue='species', col='cluster', fit_reg=False);

## Example: Handwritten Numbers

In [None]:
from sklearn.datasets import load_digits
digits = load_digits()

In [None]:
digits

In [None]:
digits['images'].shape

In [None]:
fig, axs = plt.subplots(10, 10)
for i, ax in enumerate([ax for a in axs for ax in a]):
    ax.imshow(digits['images'][i], cmap='binary', interpolation='nearest')
    ax.axis('off')

In [None]:
X = digits['data']
Y = digits['target']

In [None]:
X.shape

In [None]:
Y.shape

In [None]:
from sklearn.manifold import Isomap

iso = Isomap(n_components=2)
iso.fit(X)
projections = iso.transform(X)

projections.shape

In [None]:
fig, ax = plt.subplots()
mappable = ax.scatter(projections[:,0], projections[:,1], c=digits['target'], 
           edgecolor='none', alpha=0.7, cmap=plt.cm.get_cmap('coolwarm', 10))
fig.colorbar(mappable, label='digit label');

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=123)

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()

predictions = model.fit(X_train, Y_train).predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
mat = confusion_matrix(Y_test, predictions)

ax = sns.heatmap(mat, square=True, annot=True, cbar=False)
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual');