# Apprentissage supervisé : analyse discriminante

# Table of contents
1. [Nuage de points](#part1)
1. [Analyse linéaire discriminante](#part2)
1. [Analyse quadratique discriminante](#part3)


In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

# Nuage de points <a id="part1"></a>


In [None]:
def covariance(sigma1=1., sigma2=1., theta=0.):
    """
        Covariance matrix with eigenvalues sigma1 and sigma2, rotated by the angle theta.
    """
    rotation = np.array([[np.cos(theta), -np.sin(theta)],
                        [np.sin(theta), np.cos(theta)]])
    cov = np.array([[sigma1, 0.],
                   [0, sigma2]])
    return rotation.dot(cov.dot(rotation.T))

<div class="alert alert-block alert-info">
    Construire une matrice de covariance.
    Quelles sont les valeurs de ses composantes.
    <br>
    Simuler un jeu de données gaussien à partir de cette covariance.
<!-- <br> -->
</div>

In [None]:
# Answer
from scipy.stats import multivariate_normal

# Define cov with the function covariance
# Todo

# End todo

X = multivariate_normal.rvs(cov=cov, size=100)

<div class="alert alert-block alert-info">
    Afficher le jeu de données généré et sa moyenne.
<!-- <br> -->
</div>

In [None]:
# Answer
print("Covariance matrix:")
print(cov)

# Empirical mean mu
# Todo

# End todo

# Plot the data with plt.scatter
# Todo

# End todo
plt.scatter(mu[0], mu[1], c='k', marker='o', s=100)
plt.axis('equal');

<div class="alert alert-block alert-info">
    Faire varier la matrice de covariance.
<!-- <br> -->
</div>

In [None]:
# Answer

<div class="alert alert-block alert-info">

Charger le jeu de données `iris` et afficher le nombre de classes.    
<!-- <br> -->

</div>

In [None]:
# Answer
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

# Print the number of classes
# Todo

# End todo

<div class="alert alert-block alert-info">
    Afficher les deux premières classes en fonction des deux premières variables explicatives.
    Peut-on considérer les classes gaussiennes ?
<!-- <br> -->
</div>

In [None]:
# Answer
# Define sub-arrays X_sub and y_sub
# Todo

# End todo

plt.scatter(X_sub[:, 0], X_sub[:, 1], c=y_sub, cmap='plasma')

# Analyse linéaire discriminante <a id="part2"></a>


In [None]:
def gaussian_sample(mu=[0, 0], sigma1=1., sigma2=1., theta=0., n=50):
    cov = covariance(sigma1, sigma2, theta)
    x = multivariate_normal.rvs(mean=mu, cov=cov, size=n)
    return x

<div class="alert alert-block alert-info">
    Générer un jeu de données à partir de la fonction précédente.  
    Calculer sa moyenne et sa matrice de covariance empiriques.
<!-- <br> -->
</div>

In [None]:
# Answer

<div class="alert alert-block alert-info">
Créer puis afficher un jeu de données constitué de deux classes gaussiennes.    
<!-- <br> -->
</div>

In [None]:
# Answer
# The two datasets X1 and X2 with different means
# Todo

# End todo

X = np.r_[X1, X2]
y = np.r_[np.ones(X1.shape[0]), -np.ones(X2.shape[0])]

# Todo

# End todo

In [None]:
def plot_frontiere(clfs, data=None, data_labels=None, label=None, num=500, figure=True):
    """
        Plot the frontiere fun(x)=0 of the classifier clf within the same range as the one
        of the data.
        Input:
            clfs: classifier or list of classifiers
            data: input data (X)
            data_labels: data labels (y)
            label: classifier labels as a list
            num: discretization parameter
            figure: create a new figure
    """
    if not hasattr(clfs, '__iter__'):
        clfs = [clfs]
    if label is not None and not hasattr(label, '__iter__'):
        label = [label]
        
    xmin, ymin = data.min(axis=0)
    xmax, ymax = data.max(axis=0)
    x, y = np.meshgrid(np.linspace(xmin, xmax, num), np.linspace(ymin, ymax))
    
    if figure:
        plt.figure(figsize=(7, 7))
#     plt.scatter(*data.T, c=data_labels, cmap='plasma')
    for icl, cl in enumerate(np.unique(data_labels)):
        plt.scatter(*data[data_labels==cl].T, label=f'Class {cl}')
        
    for i, clf in enumerate(clfs):
        z = clf.decision_function(np.c_[x.ravel(), y.ravel()]).reshape(x.shape)
        cs = plt.contour(x, y, z, [0], colors='r')
        if label is not None:
            cs.levels = [label[i]]
            plt.gca().clabel(cs)
    if figure:
        plt.axis('image')
    minx, miny = data[:, 0].min(), data[:, 1].min()
    diffx, diffy = data[:, 0].max() - minx, data[:, 1].max() - miny
    plt.axis([minx - 0.1*diffx, minx + 1.1*diffx, miny - 0.1*diffy, miny + 1.1*diffy])
    plt.legend(loc="best")

<div class="alert alert-block alert-info">
Afficher la frontière obtenue par l'analyse linéaire discriminante ainsi que le segment défini par les moyennes des deux classes.
<!-- <br> -->
</div>

In [None]:
# Answer
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Linear discriminant analysis
lda = LinearDiscriminantAnalysis()
# Fit the model
# Todo

# End todo

print("LDA parameters:")
print(lda.coef_, lda.intercept_)

# Means mu1 and mu2 for the two classes
# Todo

# End todo

plot_frontiere(lda, X, y)
plt.plot([mu1[0], mu2[0]], [mu1[1], mu2[1]], 'ko-')

# Analyse quadratique discriminante <a id="part3"></a>


<div class="alert alert-block alert-info">
Compléter le code suivant pour comparer analyses discriminantes linéaire et quadratique dans diverses situations.    
<!-- <br> -->
</div>

In [None]:
# Answer
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

qda = QuadraticDiscriminantAnalysis()

# Gassian parameters
mu1 = mu = [0, 0]
mu2 = [5, 3]

plt.figure(figsize=(10, 20))
for (p1, p2) in [((1, 1, 0), ) * 2,
                  ((1, 5, 0), ) * 2,
                  ((1, 5, np.pi/6), ) * 2,
                  ((1, 5, 0), (5, 1, 0)),
                  ((1, 5, 0), (5, 1, np.pi/3))]:
    # Dataset
    X1 = gaussian_sample(mu1, *p1)
    X2 = gaussian_sample(mu2, *p2)
    X = np.r_[X1, X2]
    Y = np.r_[np.ones(X1.shape[0]), -np.ones(X2.shape[0])]
    
    # Discriminant analysis
    # Todo

    # End todo
    
    # Class means
    # Todo

    # End todo
    
    # Plot frontieres and class means
    # Todo

    # End todo