In [4]:
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [5]:
def sigmoid(z):
  return 1.0/(1.0+np.exp(-z))

In [41]:
def calculate_gradient(theta,X,y):
  m = y.size # number of instances
  return (X.T@(sigmoid(X @ theta)-y))/m

In [24]:
def gradient_descent(X,y,lr=0.1,num_iter=100,tol=1e-7):

  X_b = np.c_[np.ones((X.shape[0],1)),X]
  theta = np.zeros(X_b.shape[1])

  for i in range(num_iter):
    grad = calculate_gradient(theta,X_b,y)
    theta -= lr * grad

    if np.linalg.norm(grad) < tol:
      print("Stopping")
      break

  return theta

In [8]:
def predict_proba(X,theta):
  X_b = np.c_[np.ones((X.shape[0],1)),X]
  return sigmoid(X_b @ theta)

In [10]:
def predict(X,theta,threshold=0.5):
  return (predict_proba(X,theta)>=threshold).astype(int)

In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

std = StandardScaler()

X,y= load_breast_cancer(return_X_y=True)


X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)

In [16]:
X_train_s,X_test_s = std.fit_transform(X_train),std.transform(X_test)

In [42]:
from sklearn.metrics import accuracy_score

theta_hat = gradient_descent(X_train_s,y_train,lr=0.1)

y_pred_train = predict(X_train_s,theta_hat)
y_pred_test = predict(X_test_s,theta_hat)

In [43]:
train_acc = accuracy_score(y_train,y_pred_train)
test_acc = accuracy_score(y_test,y_pred_test)

In [44]:
train_acc,test_acc

(0.9802197802197802, 0.9736842105263158)