# Integrantes
* ### David Herrera
* ### Estid Lozano

In [201]:
# Imports
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
import openml
import pandas as pd
import scipy.stats

## Exercise 1
### 1.1 Write a probabilistic learner LDA that builds models for binary classification via the linear discriminant analysis. Prediction should be made assuming a (1-dimensional) normal distribution for each class with means and variances according to the built model. When returning probabilities, normalize the densities assigned to each class so that the vector sums up to 1.

In [214]:
class LDA():
    def train(self, _X, _Y):
        data = {}
        for x, y in zip(_X, _Y):
            if y not in data.keys():
                data[y] = []
            data[y].append(x)
        self.data = data.copy()
        mean = {}
        centerData = data
        S = {}

        for key in data:
            mean[key] = np.mean(data[key], axis=0)
            centerData[key] -= mean[key].T
            S[key] = np.dot(centerData[key].T, centerData[key])

        S = np.sum(list(S.values()), axis=0)
        S_1 = np.linalg.inv(S)

        self.means = mean
        means = list(mean.values())
        diffMean = means[0]-means[1]

        B = np.outer(diffMean, diffMean)
        S_1B = np.dot(S_1, B)
        w = None
        if np.linalg.det(S):
            w = np.dot(S_1, diffMean)
            w = w/np.linalg.norm(w)
        else:
            values, vectors = np.linalg.eig(S_1B)
            w = vectors[:, values.argmax()]
        self.w = w

    def predict(self, _X):
        res = []
        for x in _X:
            resTemp = []
            x = np.dot(self.w, x)
            for key in self.data:
                projectedPoints = np.array([np.dot(self.w, x) for x in self.data[key]])
                # projectedMean=np.mean(projectedPoints)
                projectedMean = np.dot(self.w, self.means[key])
                variance = np.var(projectedPoints)
                normal = scipy.stats.norm(projectedMean, variance).pdf(x)
                resTemp.append(normal)
            resTemp = resTemp/np.linalg.norm(resTemp)
            res.append(list(self.data.keys())[resTemp.argmax()])
        return res


df = pd.read_csv('iris.csv')
x = df.iloc[:, :2].to_numpy()
y = df.replace("virginica", "versicolor").values[:, -1]

model = LDA()
model.train(x, y)
model.predict([x[0],x[-1],x[10]])


['setosa', 'versicolor', 'setosa']

### 1.2 Now implement the kernel-based logic in a KernelLDA classifier. The kernel should be passed as an argument kernel at initialization time, which accepts two elements of the input space and produces their similarity value.

### 1.3 Empirically check that the two algorithm have the same behavior if you use the linear kernel.

## Exercise 2

### 2.1 Implement generators for the polynomial kernel and the Gaussian kernel (so that you can choose the parameters c, q and σ when producing the kernel function).

### 2.2 Write a function to show a projection line w for some given dataset. The intercept should be chosen so that the line passes the mean of the data.

### 2.3 Implement the feature map belonging to the quadratic homogeneous kernel. Consider the PCA iris dataset with two classes. Explicitly transform the dataset with the feature map of the quadratic kernel, apply the LDA in the new dataset, and visualize the solution in a 3D plot

### 2.4 Create a function that takes a 2D database X with the ground truth labels y and a prediction vector yˆ. Create a scatter plot in which the different classes get different symbols, and they are scattered in green if the prediction is correct and in red if the prediction is wrong. Get predictions for the standard LDA and the Kernel LDA with different kernels (try also different parameters for each kernel) and plot the predictions for the Iris PCA dataset. Which algorithm produces best results?