In [70]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd

In [71]:
def min_max_normalize(lst):
    """
        Helper function for movielens dataset, not useful for discrete multi class clasification.

        Return:
        Normalized list x, in range [0, 1]
    """
    maximum = max(lst)
    minimum = min(lst)
    toreturn = []
    for i in range(len(lst)):
        toreturn.append((lst[i]- minimum)/ (maximum - minimum))
    return toreturn

In [193]:
def z_standardize(X_inp):
    """
        Z-score Standardization.
        Standardize the feature matrix, and store the standarize rule.

        Parameter:
        X_inp: Input feature matrix.

        Return:
        Standardized feature matrix.
    """
    std_list = []
    mean_list = []
    toreturn = X_inp.copy()
    for i in range(X_inp.shape[1]):
        std = np.std(X_inp[:, i])               # ------ Find the standard deviation of the feature
        mean = np.mean(X_inp[:, i])             # ------ Find the mean value of the feature
        std_list.append(std)
        mean_list.append(mean)
        temp = []
        for j in np.array(X_inp[:, i]):
            
            """    
                #TODO: 1. implement the standardize function
            """
        
            temp += [(j - mean) / std]
        toreturn[:, i] = temp
    return toreturn, mean_list, std_list

In [342]:
def sigmoid(x):
    """ 
        Sigmoid Function

        Return:
        transformed x.
    """
    """    
        #TODO: 2. implement the sigmoid function
    """
    return (1 / (1 + np.exp(-x)) - 0.5) * 2

In [356]:
class Logistic_Regression():
    
    def __init__(self):
        """
            Some initializations, if neccesary
        """
        
        self.model_name = 'Logistic Regression'
        self.mean = None
        self.std = None
    
    def fit(self, X_train, y_train):
        """
            Save the datasets in our model, and do normalization to y_train
            
            Parameter:
                X_train: Matrix or 2-D array. Input feature matrix.
                Y_train: Matrix or 2-D array. Input target value.
        """
        
        self.X = X_train
        self.y = y_train
        
        count = 0
        uni = np.unique(y_train)
        for y in y_train:
            if y == min(uni):
                self.y[count] = -1
            else:
                self.y[count] = 1
            count += 1        
        
        n,m = X_train.shape
        self.theta = np.zeros(m)
        self.b = 0

    
    def gradient(self, X_inp, y_inp, theta, b):
        """
            Calculate the grandient of Weight and Bias, given sigmoid_yhat, true label, and data

            Parameter:
                X_inp: Matrix or 2-D array. Input feature matrix.
                y_inp: Matrix or 2-D array. Input target value.
                theta: Matrix or 1-D array. Weight matrix.
                b: int. Bias.

            Return:
                grad_theta: gradient with respect to theta
                grad_b: gradient with respect to b

        NOTE: There are several ways of implementing the gradient. We are merely providing you one way
        of doing it. Feel free to change the code and implement the way you want.
        """
        
        """
            TODO: 3. Update grad_b and grad_theta using the Sigmoid function
        """
        n = len(y_inp)
        y_hat = sigmoid(np.dot(X_inp, theta) + b)
        error = y_hat - y_inp

        grad_theta = np.dot(X_inp.T, error) / n
        grad_b = np.sum(error) / n
        return grad_theta, grad_b

    def gradient_descent_logistic(self, alpha, num_pass, early_stop=0, standardized = True):
        """
            Logistic Regression with gradient descent method

            Parameter:
                alpha: (Hyper Parameter) Learning rate.
                num_pass: Number of iteration
                early_stop: (Hyper Parameter) Least improvement error allowed before stop. 
                            If improvement is less than the given value, then terminate the function and store the coefficents.
                            default = 0.
                standardized: bool, determine if we standardize the feature matrix.
                
            Return:
                self.theta: theta after training
                self.b: b after training
        """
        
        if standardized:
            self.X, self.mean, self.std = z_standardize(self.X)
            self.mean, self.std = np.array(self.mean), np.array(self.std)        
        
        n, m = self.X.shape

        previous_error = float('inf')

        for i in range(num_pass):    
            
            """
                TODO: 4. Modify the following code to implement gradient descent algorithm
            """
            grad_theta, grad_b = self.gradient(self.X, self.y, self.theta, self.b)
            temp_theta = self.theta - alpha * grad_theta
            temp_b = self.b - alpha * grad_b
            
            previous_y_hat = sigmoid(np.dot(self.X, self.theta) + self.b)
            pre_error = -np.mean(self.y * np.log(previous_y_hat) + (1 - self.y) * np.log(1 - previous_y_hat)) / n
            temp_y_hat = sigmoid(np.dot(self.X, temp_theta) + temp_b)
            temp_error = -np.mean(self.y * np.log(temp_y_hat) + (1 - self.y) * np.log(1 - temp_y_hat)) / n

            if (abs(pre_error - temp_error) < early_stop) | (abs(abs(pre_error - temp_error) / pre_error) < early_stop):
                return temp_theta, temp_b

            self.theta = temp_theta
            self.b = temp_b
            previous_error = temp_error

            """
                TODO: 5. Modify the following code to implement early Stop Mechanism (use Logistic Loss when calculating error)
            """ 
            

        return self.theta, self.b
    
    def predict_ind(self, x: list):
        """
            Predict the most likely class label of one test instance based on its feature vector x.

            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                p: prediction of given data point
        """
        
        """
            TODO: 7. Implement the prediction function
        """

        if self.std is not None and self.mean is not None:
            x = (np.array(x) - self.mean) / self.std
        z = np.dot(x, self.theta) + self.b
        p = sigmoid(z)
        return 1 if p >= 0 else -1
    
    def predict(self, X):
        """
            X is a matrix or 2-D numpy array, represnting testing instances. 
            Each testing instance is a feature vector. 
            
            Parameter:
            x: Matrix, array or list. Input feature point.
            
            Return:
                p: prediction of given data matrix
        """
        
        """
            TODO: 8. Revise the following for-loop to call predict_ind to get predictions.
        """
        
        X = np.array(X)
        if X.ndim == 1:
            return self.predict_ind(X)
        else:
            return np.array([self.predict_ind(xi) for xi in X])

In [357]:
url_Wine = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
#names = ['f_acid', 'v_acid', 'c_acid', 'sugar', 'chlorides', 'f_SO2', 't_SO2', 'density', 'ph', 'sulphates', 'alcohol', 'quality']
wine = pd.read_csv(url_Wine, delimiter=';')

In [358]:
wine5 = wine.loc[wine.quality == 5]
wine6 = wine.loc[wine.quality == 6]
wineall = pd.concat([wine5,wine6])
wineall

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
5,7.4,0.66,0.00,1.8,0.075,13.0,40.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1592,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1593,6.8,0.62,0.08,1.9,0.068,28.0,38.0,0.99651,3.42,0.82,9.5,6
1595,5.9,0.55,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.51,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6


In [359]:
X = np.array(wineall.iloc[:,:10])
Y = np.array(wineall.quality)

In [360]:
count = 0
for y in Y:
    if y == 5:
        Y[count] = -1
    else:
        Y[count] = 1
    count += 1

In [361]:
logit = Logistic_Regression()
logit.fit(X, Y)

In [362]:
g = logit.gradient_descent_logistic(0.001, 1000)

  pre_error = -np.mean(self.y * np.log(previous_y_hat) + (1 - self.y) * np.log(1 - previous_y_hat)) / n
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  temp_error = -np.mean(self.y * np.log(temp_y_hat) + (1 - self.y) * np.log(1 - temp_y_hat)) / n
  pre_error = -np.mean(self.y * np.log(previous_y_hat) + (1 - self.y) * np.log(1 - previous_y_hat)) / n


In [363]:
w, b = g

In [364]:
g

(array([ 0.04046454, -0.17128566,  0.04783394,  0.00464299, -0.06872242,
        -0.02170143, -0.17925502, -0.11326331,  0.04345622,  0.1304435 ]),
 -0.02586344266287643)

In [365]:
hat = np.array(w.dot(z_standardize(X)[0].T) + b)

In [366]:
hat

array([-0.2368987 , -0.58302816, -0.36934459, ...,  0.10199436,
        0.14616467,  0.39585802])

In [367]:
hat1 = sigmoid(hat)
hat1

array([-0.11789848, -0.28352778, -0.1826012 , ...,  0.05095301,
        0.0729525 ,  0.1953842 ])

In [368]:
count = 0
for i in range(len(hat)):
    if hat1[i] < 0:
        if Y[i] == -1:
            count += 1
    else:
        if Y[i] == 1:
            count += 1
count

884

In [369]:
884/1319

0.6702047005307051

In [370]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter = 1000)

lr.fit(X, Y)
sum(lr.predict(X) == Y)

895

In [371]:
lr.coef_

array([[ 0.04088638, -2.29872007, -0.08386073,  0.0431647 , -1.84948297,
         0.03298363, -0.02446246, -0.21921549,  0.97802779,  2.06919349]])

In [372]:
895/1319

0.6785443517816527