# Lab 3 - Logistic Regression 

This example is based on the Breast cancer wisconsin (diagnostic) dataset.

It has 30 numeric, predictive attributes and the class

Ref: [LINK](https://scikit-learn.org/stable/datasets/toy_dataset.html#breast-cancer-dataset)

In [31]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt
import warnings 

#####################
# Implementation

#####################
# The fun and cool sigmoid function - Look at scrible for a vizualization of this Logistic Function

# suppress warnings - This is needed to not get the exp overflow
warnings.filterwarnings('ignore') 

def sigmoid(x):
    return 1/(1+np.exp(-x))

class LogisticRegression():

    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_pred = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear_pred)

            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions-y)

            self.weights = self.weights - self.lr*dw
            self.bias = self.bias - self.lr*db


    def predict(self, X):
        linear_pred = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(linear_pred)
        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

print("-------- DATA -------")
print(f"X_train: {X_train.shape} - {X_train.dtype}")
print(f"y_train: {y_train.shape} - {X_train.dtype}")
print(f"X_test: {X_test.shape} - {X_train.dtype}")
print(f"y_test: {y_test.shape} - {X_train.dtype}")
print("----------------------")
np.set_printoptions(precision=4,suppress=True)
print(f"X_train: {X_train[:4]}")
print(f"y_train: {y_train[:4]}")
print(X_train[2:3] - X_train[3:4])
print(f"Number of positives in the dataset {np.count_nonzero(y_train==1)}")
print("----------------------")

clf = LogisticRegression(lr=0.01)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

acc = accuracy(y_pred, y_test)
print(acc)





-------- DATA -------
X_train: (455, 30) - float64
y_train: (455,) - float64
X_test: (114, 30) - float64
y_test: (114,) - float64
----------------------
X_train: [[ 12.88    18.22    84.45   493.1      0.1218   0.1661   0.0483   0.053
    0.1709   0.0725   0.4426   1.169    3.176   34.37     0.0053   0.0233
    0.014    0.0124   0.0182   0.0033  15.05    24.37    99.31   674.7
    0.1456   0.2961   0.1246   0.1096   0.2582   0.0889]
 [ 11.13    22.44    71.49   378.4      0.0957   0.0819   0.0482   0.0226
    0.203    0.0655   0.28     1.467    1.994   17.85     0.0035   0.0305
    0.0345   0.0102   0.0291   0.0047  12.02    28.26    77.8    436.6
    0.1087   0.1782   0.1564   0.0641   0.3169   0.0803]
 [ 12.63    20.76    82.15   480.4      0.0993   0.1209   0.1065   0.0602
    0.1735   0.0707   0.3424   1.803    2.711   20.48     0.0129   0.0404
    0.051    0.023    0.0214   0.0059  13.33    25.47    89.     527.4
    0.1287   0.225    0.2216   0.1105   0.2226   0.0849]
 [ 12.68   