# GDA Implementation.

Implement the Gaussian Discriminant Analysis (GDA) learning algorithm following the steps as discussed in class.

INSTRUCTION: Rename your notebook as: <br>
`firstName_LastName_Live_coding_GDA.ipynb`.

Notes: 
* Do not use any built-in functions to complete a task;
* Do not import additional libraries.

In [112]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [113]:
# Generate data
def generate_data():
  x, y = make_classification(n_samples= 1000, n_features=3, n_redundant=0, 
                           n_informative=3, random_state=1, 
                           n_clusters_per_class=1)
  
  return x,y

x,y= generate_data()
print(x.shape, y.shape)


(1000, 3) (1000,)


In [114]:
def split_data(x,y, train_size= 0.8):
    # shuffle the data to randomize the train/test split
    n = y.shape[0]
    if y.ndim == 1:
      y = y.reshape(-1,1)
    data = np.hstack((x,y))
    data_suffe = np.random.permutation(data)
    X_train = data_suffe[:round(n*train_size),:-1]
    y_train = data_suffe[:round(n*train_size):,-1]
    X_test = data[round(n*train_size): ,:-1]
    y_test = data[round(n*train_size):,-1]

    return X_train, y_train, X_test, y_test
     

In [115]:
X_train, y_train, X_test, y_test= split_data(x,y, train_size= 0.8) # split your data into x_train, x_test, y_train, y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(800, 3) (800,) (200, 3) (200,)


In [116]:
def phi_function(y,k):

  # we can use np.unique(y) to see our different class which the output will be array([0, 1])so we have class 0 and class 1
  if k == 1 :
    s = np.sum(y==1)
    
  else:
    s= np.sum(y==0)
  return s
 
 

In [117]:
def mu_function(x, y,k ):
  n,d = x.shape
  k = len(np.unique(y))
  mu = np.zeros((k,d))
  x1 = np.zeros((d,1))
  s = 0 #number of example in our class 1 
  for i in range(n):
    if y[i]== 1:
    
      mu[1] += x[i,:]/phi_function(y,1)

    else:
      mu[0] += x[i,:]/phi_function(y,0)
  return mu
    

In [118]:
mu_function(x, y,1 )

array([[ 0.99515416,  1.04188282,  0.99941748],
       [-1.01309105,  0.95696514, -0.93218425]])

In [119]:
def covariance(x ,mu):
  n,d = x.shape
  cov= np.zeros((d,d))
  for i in range(d):
    for j in range(d):
      cov[i,j]= (1/n)*np.sum((x[:,i] - mu[i])*(x[:,j] - mu[j]).T)

  return cov

  # Easy way: cov= np.cov(x, rowvar=0) but do not use it. One can use it to assess his/her result.
 

In [120]:
np.cov(x,rowvar=False)

array([[1.84495325, 0.02790646, 1.00137533],
       [0.02790646, 1.00170721, 0.05539176],
       [1.00137533, 0.05539176, 1.74832   ]])

In [121]:
covariance(x, np.mean(x, axis = 0))

array([[1.84310829, 0.02787855, 1.00037396],
       [0.02787855, 1.0007055 , 0.05533637],
       [1.00037396, 0.05533637, 1.74657168]])

In [133]:
#from _typeshed import NoneType
class GDA:
  def __init__(self):
    ## set mu, phi and sigma to None
    def _init_(self):
      self.phi = None
      self.sigma = None
      self.mu = None
  
  def fit(self,x,y):
    k= np.unique(y).shape[0] # Number of class.
    d=  x.shape[1]#input dim
    m=x.shape[0] # Number of examples.
    
    ## Initialize mu, phi and sigma
    self.mu= np.zeros((k,d))#: kxd, i.e., each row contains an individual class mu.
    self.sigma=  np.zeros((k,d,d))#: kxdxd, i.e., each row contains an individual class sigma.
    self.phi= np.zeros(d)# d-dimension

    ## START THE LEARNING: estimate mu, phi and sigma.
    for clas in range(k):
      self.phi[clas] = np.sum(clas==y)/m
      self.mu[clas] = np.mean(x[clas==y] , axis =0)
      self.sigma[clas] = covariance(x[clas==y] ,self.mu[clas])
      


  def predict_proba(self,x):
    from math import pi
    # reshape or flatt x.
    
    d= x.shape[1]
    k_class= self.mu.shape[0]# Number of classes we have in our case it's k = 2
    p_x_y0 = np.zeros((x.shape[0],k_class))
    for clas in range(k_class):
     
      a = 1/((2*pi)**(d/2))
      detsig = np.linalg.det(self.sigma[clas])**(1/2)
      inversig = np.linalg.inv(self.sigma[clas])
      for i in range(x.shape[0]):

        z= (x[i] -self.mu[clas])@inversig.T@(x[i]-self.mu[clas])

      ## START THE LEARNING: estimate mu, phi and sigma.
        p_x_y0[i,clas] = (1/(a*detsig))* (np.exp((-(1/2)*z)))*self.phi[clas]
    
    return p_x_y0
  def predict(self,x):
    ypred = self.predict_proba(x)
    ypreds = np.argmax(ypred, axis =1)
    return ypreds
    
  def accuracy(self, y, ypreds):
    acc = np.mean(y==ypreds)*100
    return acc

In [134]:
model= GDA()
model.fit(X_train,y_train)
#model.predict_proba(X_train)

In [135]:
yproba= model.predict_proba(X_test)
yproba

array([[2.95290894e+001, 5.32457306e-001],
       [1.65251463e+001, 9.24384604e-005],
       [1.89188504e+001, 1.29415709e-005],
       [2.35759442e+001, 6.19838880e-002],
       [3.29681374e-065, 1.00125123e+001],
       [2.12342006e-022, 1.24155947e+000],
       [7.26150949e-001, 3.88100845e-003],
       [6.36819342e+000, 1.40361317e-005],
       [5.81489844e+000, 1.31664466e-005],
       [3.85232545e+000, 2.47746144e-002],
       [3.64852220e-039, 7.46168360e-001],
       [4.11287395e-001, 5.97733482e-002],
       [2.92483887e-021, 8.62486589e+000],
       [1.60220653e-003, 6.40397305e-001],
       [7.21171240e-039, 1.07335383e+001],
       [2.33437642e-080, 6.97059435e+000],
       [6.23551225e+000, 1.62443807e-001],
       [1.31651031e-001, 4.12261674e+000],
       [1.45911323e-001, 1.17050533e-001],
       [3.64537884e-006, 9.57753846e+000],
       [1.29235621e+000, 1.23392948e-014],
       [3.36704770e+001, 2.57427354e-002],
       [2.92950904e-129, 1.73551185e+000],
       [1.6

In [136]:
ypreds= model.predict(X_test)
ypreds


array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 0])

In [137]:
model.accuracy(y_test, ypreds)

96.5

## **Linear Models Regression**

In [130]:
from pickle import STRING
class LogisticRegression:
  '''
  The goal of this class is to create a LogisticRegression class, 
  that we will use as our model to classify data point into a corresponding class
  '''
  def __init__(self,lr,n_epochs):
    self.lr = lr
    self.n_epochs = n_epochs
    self.train_losses = []
    self.w = None
    self.weight = []
    self.num_iters = 10000

  def add_ones(self, x):
    ##### WRITE YOUR CODE HERE #####
   
    one = np.ones((x.shape[0],1))
   
    return np.hstack((one,x))
    #### END CODE ####

  def sigmoid(self, x):
    ##### WRITE YOUR CODE HERE ####
     
     return 1/(1+np.exp(-x@self.w))
    #### END CODE ####

  def cross_entropy(self, x, y_true):
    ##### WRITE YOUR CODE HERE #####
    y_pred = self.sigmoid(x)
    loss = - np.mean(y_true* np.log(y_pred) +(1-y_true)* np.log(1- y_pred))
    return loss
    #### END CODE ####
  
  def predict_proba(self,x):  #This function will use the sigmoid function to compute the probalities
    ##### WRITE YOUR CODE HERE #####
    x= self.add_ones(x)
    proba = self.sigmoid(x)
    return proba
    #### END CODE ####

  def predict(self,x):
    ##### WRITE YOUR CODE HERE #####
    probas = self.predict_proba(x)
    #treshold=0.5
    output = [0 if p<0.5 else 1 for p in  probas]
    return output
    #### END CODE ####

  def fit(self,x,y):
    # Add ones to x
    x= self.add_ones(x)
    # reshape y if needed
    y= y.reshape(-1,1)

    # Initialize w to zeros vector >>> (x.shape[1])
    
    self.w = np.zeros((x.shape[1], 1))
    for epoch in range(self.n_epochs):
      # make predictions
      ypred = self.sigmoid(x)

      #compute the gradient
      dl = (-1/x.shape[0])*(x.T@(y-ypred))
      #update rule
      self.w = self.w -  self.lr*dl

      #Compute and append the training loss in a list
      loss = self.cross_entropy(x,y)
      self.train_losses.append(loss)

      # if epoch%100 == 0:
      #   print(f'loss for epoch {epoch}  : {loss}')

  def accuracy(self,y_true, y_pred):
    ##### WRITE YOUR CODE HERE #####
    acc = np.mean(y_pred ==y_true)*100
    return acc
    #### END CODE ####

In [131]:
model = LogisticRegression(0.01,n_epochs=10000)
model.fit(X_train,y_train)

In [132]:
ypred_train = model.predict(X_train)
acc = model.accuracy(y_train,ypred_train)
print(f"The training accuracy is: {acc}")
print(" ")

ypred_test = model.predict(X_test)
acc = model.accuracy(y_test,ypred_test)
print(f"The test accuracy is: {acc}")

The training accuracy is: 94.75
 
The test accuracy is: 94.5
