<center> <img src="logo.png" width="200"></center> 

<center> <h1>Logistic Regression with Gradient Ascent</h1> </center>
<center> <h1>Handwritten Digits Classification</h1> </center>


## Gradient Ascent algorithm
From our [video](https://www.youtube.com/watch?v=TM1lijyQnaI&t=810s) on logistic regression, we derived the equation to update the logistic regression model parameters as:    


\begin{equation}
\theta^{+} = \theta^{-} + \alpha (y_{i} - h(x_{i}) )\bar{x}
\end{equation}

This maximizes the following log likelihood function

\begin{equation}
J(x, \theta, y) = \sum_{i=1}^{m}y_i\log(h(x_{i})) + (1 - y_i)\log(1 - h(x_{i}))
\end{equation}

where our hypothesis is a sigmoid function
\begin{equation}
h(x_i) = \frac{1}{1 + e^{\theta^T \bar{x}}}
\end{equation}

### Batch gradient Ascent
```FOR j FROM 0 -> max_iteration: 
    FOR i FROM 0 -> m: 
        theta += (alpha) * (y[i] - h(x[i])) * x_bar
    ENDLOOP
ENDLOOP
```

## Multi-class Classificaton with one-vs-all (one-vs-rest)
If you have n-classes, we train n-classifiers and given a new data point we predict using all the classifiers and choose the one with the highest probability

In [None]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [None]:
digits = load_digits()

f, axarr = plt.subplots(3,3)
axarr[0,0].imshow(digits.images[0])
axarr[0,1].imshow(digits.images[1])
axarr[0,2].imshow(digits.images[2])
axarr[1,0].imshow(digits.images[3])
axarr[1,1].imshow(digits.images[4])
axarr[1,2].imshow(digits.images[5])
axarr[2,0].imshow(digits.images[6])
axarr[2,1].imshow(digits.images[7])
axarr[2,2].imshow(digits.images[8])
plt.show()

In [None]:
class LogisticRegression():
    """Class for training and using a model for logistic regression"""
    
    def set_values(self, initial_params, alpha=0.01, max_iter=5000, class_of_interest=0):
        """Set the values for initial params, step size, maximum iteration, and class of interest"""
        self.params = initial_params
        self.alpha = alpha
        self.max_iter = max_iter
        self.class_of_interest = class_of_interest
    
    @staticmethod
    def _sigmoid(x):
        """Sigmoide function"""
        
        return 1.0 / (1.0 + np.exp(-x))
    
    def predict(self, x_bar, params):
        """predict the probability of a class"""  
                
        return self._sigmoid(np.dot(params, x_bar))
    
    def _compute_cost(self, input_var, output_var, params):
        """Compute the log likelihood cost"""
        
        cost = 0
        for x, y in zip(input_var, output_var):
            x_bar = np.array(np.insert(x, 0, 1))
            y_hat = self.predict(x_bar, params)
            
            y_binary = 1.0 if y == self.class_of_interest else 0.0
            cost += y_binary * np.log(y_hat) + (1.0 - y_binary) * np.log(1 - y_hat)
            
        return cost
    
    def train(self, input_var, label, print_iter = 5000):
        """Train the model using batch gradient ascent"""
        
        iteration = 1
        while iteration < self.max_iter:
            if iteration % print_iter == 0:
                print(f'iteration: {iteration}')
                print(f'cost: {self._compute_cost(input_var, label, self.params)}')
                print('--------------------------------------------')
            
            for i, xy in enumerate(zip(input_var, label)):
                x_bar = np.array(np.insert(xy[0], 0, 1))
                y_hat = self.predict(x_bar, self.params)
                
                y_binary = 1.0 if xy[1] == self.class_of_interest else 0.0
                gradient = (y_binary - y_hat) * x_bar
                self.params += self.alpha * gradient
            
            iteration +=1
        
        return self.params

    def test(self, input_test, label_test):
        """Test the accuracy of the model using test data"""
        self.total_classifications = 0
        self.correct_classifications = 0
        
        for x,y in zip(input_test, label_test):
            self.total_classifications += 1
            x_bar = np.array(np.insert(x, 0, 1))
            y_hat = self.predict(x_bar, self.params)
            y_binary = 1.0 if y == self.class_of_interest else 0.0
            
            if y_hat >= 0.5 and  y_binary == 1:
                # correct classification of class_of_interest
                self.correct_classifications += 1
              
            if y_hat < 0.5 and  y_binary != 1:
                # correct classification of an other class
                self.correct_classifications += 1
                
        self.accuracy = self.correct_classifications / self.total_classifications
            
        return self.accuracy

In [None]:
# split the data to training and test sets
digits_train, digits_test, digits_label_train, digits_label_test =\
train_test_split(digits.data, digits.target, test_size=0.20)

In [None]:
# train a classifier for the ZERO digit
alpha = 1e-2
params_0 = np.zeros(len(digits.data[0]) + 1)

max_iter = 10000
digits_regression_model_0 = LogisticRegression()
digits_regression_model_0.set_values(params_0, alpha, max_iter, 0)

params =\
digits_regression_model_0.train(digits_train / 16.0, digits_label_train, 1000)


In [None]:
# accuracy
digits_accuracy = digits_regression_model_0.test(digits_test / 16.0, digits_label_test)
print(f'Accuracy of prediciting a ZERO digit in test set: {digits_accuracy}')

In [None]:
# train a classifier for the ONE digit
alpha = 1e-2
params_0 = np.zeros(len(digits.data[0]) + 1)

max_iter = 10000
digits_regression_model_1 = LogisticRegression()
digits_regression_model_1.set_values(params_0, alpha, max_iter, 1)

params =\
digits_regression_model_1.train(digits_train / 16.0, digits_label_train, 1000)

In [None]:
#accuracy
digits_accuracy = digits_regression_model_1.test(digits_test / 16.0, digits_label_test)
print(f'Accuracy of prediciting a ONE digit in test set: {digits_accuracy}')

In [None]:
# train a classifier for the TWO digit
alpha = 1e-2
params_0 = np.zeros(len(digits.data[0]) + 1)

max_iter = 10000
digits_regression_model_2 = LogisticRegression()
digits_regression_model_2.set_values(params_0, alpha, max_iter, 2)

params =\
digits_regression_model_2.train(digits_train / 16.0, digits_label_train, 1000)

In [None]:
digits_accuracy = digits_regression_model_2.test(digits_test / 16.0, digits_label_test)
print(f'Accuracy of prediciting a TWO digit in test set: {digits_accuracy}')

In [None]:
# train a classifier for the EIGHT digit
alpha = 1e-2
params_0 = np.zeros(len(digits.data[0]) + 1)

max_iter = 10000
digits_regression_model_8 = LogisticRegression()
digits_regression_model_8.set_values(params_0, alpha, max_iter, 8)

params =\
digits_regression_model_8.train(digits_train / 16.0, digits_label_train, 1000)

In [None]:
digits_accuracy = digits_regression_model_8.test(digits_test / 16.0, digits_label_test)
print(f'Accuracy of prediciting a EIGHT digit in test set: {digits_accuracy}')