# Practice Question

## GMM

1.1. Build a GMM on the Iris data (first 2 features) and tune your model for different covariance types.

1.2. Plot the results as shown in the hands on.

## LDA

2.1. Perform Linear Discriminant Analysis on the Iris Data (first 2 features). 

2.2. Plot the results as shown in the hands on.

In [11]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

# Import libraries here

In [2]:
# Load the iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target

In [33]:
#split the dataset into train, valid and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=5)

In [34]:
# select the best covariance_type hyperparameter within ['full', 'tied', 'diag', 'spherical']
# Train GMM model with the best covariance_type on the training and validation set
# Find Test accuracy on the test data.

hyperparameter = ['full', 'tied', 'diag', 'spherical']

for hp in hyperparameter:
    clf = GaussianMixture(n_components=3, covariance_type=hp)
    clf.fit(X_train, y_train)
    pred_val = clf.predict(X_val)
    print ('Test accuracy = ' + str(accuracy_score(y_val, pred_val)))


Test accuracy = 0.2916666666666667
Test accuracy = 0.625
Test accuracy = 0.125
Test accuracy = 0.125


In [20]:
# Plot the decision boundary.

# Visualize (Thanks Ron Weiss for this code.)

colors = ['navy', 'turquoise', 'darkorange']

def make_ellipses(gmm, ax):
    for n, color in enumerate(colors):
        
        if gmm.covariance_type == 'full':
            covariances = gmm.covariances_[n][:2, :2]
        elif gmm.covariance_type == 'tied':
            covariances = gmm.covariances_[:2, :2]
        elif gmm.covariance_type == 'diag':
            covariances = np.diag(gmm.covariances_[n][:2])
        elif gmm.covariance_type == 'spherical':
            covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
            
        v, w = np.linalg.eigh(covariances)
        u = w[0] / np.linalg.norm(w[0])
        angle = np.arctan2(u[1], u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        ell = mpl.patches.Ellipse(gmm.means_[n, :2], v[0], v[1], 180 + angle, color=color)
        ell.set_alpha(0.5)
        ax.add_artist(ell)



In [21]:
for n, color in enumerate(colors):
    print(n, color)

0 navy
1 turquoise
2 darkorange


In [None]:

clf = GaussianMixture(n_components=3, covariance_type='diag')
clf.fit(X_train, y_train)
pred_test = clf.predict(X_test)

# Plot
plt.figure(figsize=(10,10))
plt.ylim([-1,3])
plt.xlim([11,15])
plt.xlabel('Alcohol', fontsize=15)
plt.ylabel('Hue', fontsize=15)
h = plt.subplot()
make_ellipses(clf, h)

# Plot train data with dots
for n, color in enumerate(colors):
    train_data = pd.DataFrame(X_train)
    plt.scatter(train_data[0], train_data[1], s=10, color=color)

# Plot the test data with crosses
for n, color in enumerate(colors):
    test_data = X_test[y_test == n]
    plt.scatter(test_data[0], test_data[1], marker='x', color=color)

plt.title('Gaussian Mixture Model', fontsize=15)

plt.show()


In [None]:
plt.figure(figsize=(10,10))
plt.ylim([-1,3])
plt.xlim([11,15])
plt.xlabel('Alcohol', fontsize=15)
plt.ylabel('Hue', fontsize=15)
h = plt.subplot()
make_ellipses(clf, h)

# Plot train data with dots
for n, color in enumerate(colors):
    train_data = pd.DataFrame(X_train)
    plt.scatter(train_data[0], train_data[1], s=10, color=color)

# Plot the test data with crosses
for n, color in enumerate(colors):
    test_data = X_test[y_test == n]
    plt.scatter(test_data[0], test_data[1], marker='x', color=color)

plt.title('Gaussian Mixture Model', fontsize=15)

plt.show()
