# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [84]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [89]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
print(x.shape, y.shape)


(1000, 3) (1000,)


In [93]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    n = y.shape[0]
    if y.ndim == 1:
      y = y.reshape(-1,1)
    data = np.hstack((x,y))
    data_suffe = np.random.permutation(data)
    X_train = data_suffe[:round(n*train_size),:-1]
    y_train = data_suffe[:round(n*train_size):,-1]
    X_test = data[round(n*train_size): ,:-1]
    y_test = data[round(n*train_size):,-1]

    return X_train, y_train, X_test, y_test
     

In [94]:
X_train, y_train, X_test, y_test= split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [95]:
def phi_function(y,k):

  # we can use np.unique(y) to see our different class which the output will be array([0, 1])so we have class 0 and class 1
  if k == 1 :
    s = np.sum(y==1)
    
  else:
    s= np.sum(y==0)
  return s
 
 

In [96]:
def mu_function(x, y,k ):
  n,d = x.shape
  k = len(np.unique(y))
  mu = np.zeros((k,d))
  x1 = np.zeros((d,1))
  s = 0 #number of example in our class 1 
  for i in range(n):
    if y[i]== 1:
    
      mu[1] += x[i,:]/phi_function(y,1)

    else:
      mu[0] += x[i,:]/phi_function(y,0)
  return mu
    

In [98]:
mu_function(x, y,1 )

array([[ 0.99515416,  1.04188282,  0.99941748],
       [-1.01309105,  0.95696514, -0.93218425]])

In [99]:
def covariance(x ,mu):
  n,d = x.shape
  cov= np.zeros((d,d))
  for i in range(d):
    for j in range(d):
      cov[i,j]= (1/n)*np.sum((x[:,i] - mu[i])*(x[:,j] - mu[j]).T)

  return cov

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
 

In [100]:
np.cov(x,rowvar=False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [101]:
covariance(x, np.mean(x, axis = 0))

array([[1.84310829, 0.02787855, 1.00037396],
       [0.02787855, 1.0007055 , 0.05533637],
       [1.00037396, 0.05533637, 1.74657168]])

In [118]:
#from _typeshed import NoneType
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    def _init_(self):
      self.phi = None
      self.sigma = None
      self.mu = None
  
  def fit(self,x,y):
    k= np.unique(y).shape[0] # Number of class.
    d=  x.shape[1]#input dim
    m=x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma=  np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(d)# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
    for clas in range(k):
      self.phi[clas] = np.sum(clas==y)/m
      self.mu[clas] = np.mean(x[clas==y] , axis =0)
      self.sigma[clas] = covariance(x[clas==y] ,self.mu[clas])
      


  def predict_proba(self,x):
    from math import pi
    # reshape or flatt x.
    
    d= x.shape[1]
    k_class= 2 # Number of classes we have in our case it's k = 2
    p_x_y0 = np.zeros((x.shape[0],k_class))
    for clas in range(k_class):

      
      a = 1/((2*pi)**(d/2))
      detsig = np.linalg.det(self.sigma[clas])**(1/2)
      inversig = np.linalg.inv(self.sigma[clas])
      for i in range(x.shape[0]):

        z= (x[i] -self.mu[clas])@inversig.T@(x[i]-self.mu[clas])

      ## START THE LEARNING: estimate mu, phi and sigma.
        p_x_y0 = a*detsig*np.exp((-(1/2)*z))*self.phi[clas]
    
    return p_x_y0
  def predict(self,x):
    ypred = self.predict_proba(x)
    ypreds = np.argmax(ypred, axis =1)
  def accuracy(self, y, ypreds):
    acc = np.mean((y==ypreds))*100
    return acc

In [119]:
model= GDA()
model.fit(X_train,y_train)

In [120]:
yproba= model.predict_proba(X_test)
yproba

1.3098224401380994e-08

In [121]:
ypreds= model.predict(X_test)
ypreds


AxisError: ignored

In [None]:
model.accuracy(y_test, ypreds)