# ML model examples

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_context('notebook', font_scale=1.5)

## Dimension reduction

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
bc = load_breast_cancer(as_frame=True)

In [None]:
bc.data.head()

In [None]:
bc.target_names

In [None]:
bc.target.head()

In [None]:
! python3 -m pip install --quiet umap-learn
! python3 -m pip install --quiet phate

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP
from phate import PHATE

In [None]:
dr_models = {
    'PCA': PCA(),
    't-SNE': TSNE(),
    'UMAP': UMAP(),
    'PHATE': PHATE(verbose=0),
}

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
fig, axes = plt.subplots(2,2,figsize=(8,8))
axes = axes.ravel()

for i, (k, v) in enumerate(dr_models.items()):
    X = v.fit_transform(scaler.fit_transform(bc.data))
    target = bc.target
    ax = axes[i]
    ax.scatter(X[:, 0], X[:, 1], c=target)
    ax.set_xlabel(f'{k}1')
    ax.set_ylabel(f'{k}2')
    ax.set_xticks([])
    ax.set_yticks([])

#### A3.2 Clustering

- K-means
- Agglomerative hierarchical clustering
- Mixture models

In [None]:
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.mixture import GaussianMixture

In [None]:
cl_models = {
    'true': None,
    'k-means': KMeans(n_clusters=2),
    'ahc': AgglomerativeClustering(n_clusters=2),
    'gmm': GaussianMixture(n_components=2),
}

In [None]:
pca = PCA()
X = pca.fit_transform(scaler.fit_transform(bc.data))

In [None]:
fig, axes = plt.subplots(2,2,figsize=(8, 8))
axes = axes.ravel()

for i, (k, v) in enumerate(cl_models.items()):
    if i == 0:
        y = bc.target
    else:
        y = v.fit_predict(scaler.fit_transform(bc.data))
    target = y
    ax = axes[i]
    ax.scatter(X[:, 0], X[:, 1], c=target)
    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(k)

#### A3.3 Supervised learning

- Nearest neighbor
![img](https://res.cloudinary.com/dyd911kmh/image/upload/f_auto,q_auto:best/v1531424125/KNN_final_a1mrv9.png)
- Linear models
![img](https://static.javatpoint.com/tutorial/machine-learning/images/machine-learning-polynomial-regression.png)
- Support vector machines
![img](https://upload.wikimedia.org/wikipedia/commons/thumb/7/72/SVM_margin.png/300px-SVM_margin.png)
- Trees
![img](https://3qeqpr26caki16dnhd19sv6by6v-wpengine.netdna-ssl.com/wp-content/uploads/2016/02/Example-Decision-Tree.png)
- Neural networks
![img](https://ml-cheatsheet.readthedocs.io/en/latest/_images/dynamic_resizing_neural_network_4_obs.png)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

### Proprocess data

In [None]:
X = bc.data
y = bc.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
pd.Series(y_test).value_counts(normalize=True)

In [None]:
sl_modles = dict(
    dummy = DummyClassifier(strategy='prior'),
    knn = KNeighborsClassifier(),
    lr = LogisticRegression(),
    svc = SVC(),
    nn = MLPClassifier(max_iter=500),
)

In [None]:
for name, clf in sl_modles.items():
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(f'{name}: {score:.3f}')