# Ekhator Uwailas Notebook

# Question 1 (15 pts)

Implement the fit and predict procedures for the logistic regression (scikit is not allowed) with norm 2 regularization function (and Lambda parameter).
<br>
Use as the input parameters of the gradient ascent the maximum number of iterations (just a constant e.g 100) and the learning factor (e.g. 0.01).



In [31]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np

The sigmoid function is gotten from:
$ P(y=1|x,\theta)=\frac{1}{1+e^{-z}}$<br>
where z = $\theta^Tx$
<br>
The log likelihood function is gotten from:
$ ln L(\theta)=\sum_{i=1}^m y^{(i)}ln l(\theta^Tx^{(i)}) + \sum_{i=1}^m 1-y^{(i)}ln l(-\theta^Tx^{(i)})$<br>
where y_true: $y^{(i)}$<br>
y_pred: $l(\theta^Tx^{(i)})$




In [1]:
class LogisticRegressionWithL2:
    def __init__(self, learning_rate=0.01, max_iterations=1000):
        '''Initialize variables
        Args:
            learning_rate  : Learning Rate
            max_iterations : Max iterations for training weights
        '''
        # Initialising all the parameters
        self.learning_rate  = learning_rate
        self.max_iterations = max_iterations
        self.likelihoods    = []
        
        # Define epsilon because log(0) is undefined
        self.eps = 1e-7

    #Sigmoid function maps predictions to the range of 0 and 1
    def sigmoid(self, z):
        '''Sigmoid function: f:R->(0,1)
        Args:
            z : A numpy array (num_samples,)
        Returns:
            A numpy array where sigmoid function applied to every element
        '''
        #
        sig_z = (1/(1+np.exp(-z)))
        #

        #using an assert to statement to check that after the sigmoid function was applied, it did not alter the input data shape
        assert (z.shape==sig_z.shape), 'Error in sigmoid implementation, data shape altered. Check!'
        return sig_z

    
    def log_likelihood(self, y_true, y_pred):
        '''Calculates maximum likelihood estimate
        Args:
            y_true : Numpy array of actual truth values (num_samples,)
            y_pred : Numpy array of predicted values (num_samples,)
        Returns:
            Log-likelihood, scalar value
        '''
        # Removing 0 or 1 values in y_pred so that log is not undefined
        #this helps to ensure y_pred is within the rang [self.eps, 1-self.eps] to prevent it from being too close to 0 or 1
        y_pred = np.maximum(np.full(y_pred.shape, self.eps), np.minimum(np.full(y_pred.shape, 1-self.eps), y_pred))
        
        #
        likelihood = (y_true*np.log(y_pred)+(1-y_true)*np.log(1-y_pred))
        #
        
        return np.mean(likelihood)
    
    def fit(self, X, y):
        '''Trains logistic regression model using gradient ascent
        to gain maximum likelihood on the training data
        Args:
            X : Numpy array (num_examples, num_features)
            y : Numpy array (num_examples, )
        Returns: VOID
        '''
    
        num_examples = X.shape[0]
        num_features = X.shape[1]
    
        #
    
        # Initialize weights(i.e theta) with appropriate shape
        self.weights = np.zeros((X.shape[1]))

    
        lambda_param = 0.1  # L2 regularization parameter
    
        # Perform gradient ascent
        for i in range(self.max_iterations):
            # Define the linear hypothesis(z) first, to calculate z = theta * x
            z  = np.dot(X,self.weights)
      
            # Output probability value by appplying sigmoid on z
            y_pred = self.sigmoid(z)
        
        
        
            # Calculate the gradient values(i.e the gradient of the log likelihood function)
            #This is a partial derivative of the function with respect to weights(theta)
            # Adding L2 regularization term to the gradient
            gradient = np.mean((y - y_pred) * X.T, axis=1) - 2 * lambda_param * self.weights
        
            # Update the weights using gradient ascent
            self.weights +=  self.learning_rate * gradient
        
            # Calculating log likelihood
            likelihood = self.log_likelihood(y, y_pred)

            self.likelihoods.append(likelihood)

        #

    def predict_proba(self,X):
        '''Predict probabilities for given X.
        sigmoid returns value between 0 and 1.
        Args:
            X : Numpy array (num_samples, num_features)
        Returns:
            probabilities: Numpy array (num_samples,)
        '''
        if self.weights is None:
            raise Exception("Fit the model before prediction")
      
        #
               
        z = np.dot(X,self.weights)
        probabilities = self.sigmoid(z)
        # probabilities.reshape(probabilities.shape[0],1)
        
        #
        
        return probabilities
    
    def predict(self, X, threshold=0.5):
        '''Predict/Classify X in classes
        Args:
            X         : Numpy array (num_samples, num_features)
            threshold : scalar value above which prediction is 1 else 0
        Returns:
            binary_predictions : Numpy array (num_samples,)
        '''
        # Thresholding probability to predict binary values
        
        binary_predictions = np.array(list(map(lambda x: 1 if x>threshold else 0, self.predict_proba(X))))
        
        return binary_predictions

Loading the Iris dataset to test our logistic regression:

In [25]:
iris = datasets.load_iris()
X = iris.data
y = iris.target 
X = X[y != 2]
y = y[y != 2]

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [30]:
# Create a scaler object
sc = StandardScaler()

# Fit the scaler to the training data and transform
X_train_std = sc.fit_transform(X_train)

# Apply the scaler to the test data
X_test_std = sc.transform(X_test)

In [38]:
# Create an instance of LogisticRegressionWithL2
log_reg = LogisticRegressionWithL2(learning_rate=0.01, max_iterations=1000)

# Fit the model to the training data
log_reg.fit(X_train_std, y_train)

# Make predictions on the test data
y_pred_test = log_reg.predict(X_test_std)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_test)
print("Accuracy:", accuracy)

Accuracy: 1.0


# Question 2 (20 pts)

Use the iris dataset (just the binary class Iris Setosa vs others), the K-fold cross-validation, metrics = accuracy and the logistic regression with L2 regularization.
<br>
You can use scikit.
<br>
Please estimate the best parameter C (the inverse of lambda) used for the regularization term

In [40]:
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [41]:
import seaborn as sns
iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [54]:
X=iris.iloc[:,0:4]
y=iris.iloc[:,4]
# Encode labels
#y = np.array([1 if label == 'setosa' else 0 for label in y])
def val(s):
    if s=='setosa':
        return 1
    return 0
y=np.array([val(x) for x in y])

In [57]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.3, random_state=0)

In [58]:
#scaler
scaler = StandardScaler()
X_train2=scaler.fit_transform(X_train2)
X_test2=scaler.transform(X_test2)

In [60]:
# Define list of regularization parameters
C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000, 10000000]

# Initialize best classifier and best C value
best_classifier = LogisticRegression(random_state=0, class_weight='balanced', penalty='l2')
best_C = 0
best_accuracy = 0
cv = 10

# Perform cross-validation and parameter tuning
for c in C:
    avg_accuracy = 0
    skf = StratifiedKFold(n_splits=cv, random_state=10, shuffle=True)
    for train_index, val_index in skf.split(X_train, y_train):
        X_train_cv = X_train2[train_index]
        X_val = X_train2[val_index]
        y_train_cv = y_train2[train_index]
        y_val = y_train2[val_index]
        clf = LogisticRegression(random_state=0, class_weight='balanced', C=c, penalty='l2')
        clf.fit(X_train_cv, y_train_cv)
        y_pred_val = clf.predict(X_val)
        avg_accuracy += accuracy_score(y_val, y_pred_val)
    avg_accuracy /= cv
    if avg_accuracy > best_accuracy:
        best_accuracy = avg_accuracy
        best_C = c
        best_classifier = clf

# Print best C value and accuracy
print('Best C:', best_C)
print('Best Accuracy:', best_accuracy)

# Testing
y_pred = best_classifier.predict(X_test2)
print(classification_report(y_test2, y_pred))


Best C: 0.001
Best Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        16

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

