In [1]:
from sklearn import datasets
import numpy as np

from typing import *
import copy

In [2]:
iris = datasets.load_iris()

#shape
X = iris.data
y = iris.target

#making the dataset into binary for binary classification
binary_filter = y < 2
X = X[binary_filter]
y = y[binary_filter]

mean = np.mean(X, axis=0)
std = np.std(X, axis=0)

X = (X - mean) / std

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (100, 4)
Shape of y: (100,)


In [3]:
#splitting into test and train
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=55)
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_train:", Y_train.shape)
print("Shape of Y_test:", Y_test.shape)

Shape of X_train: (80, 4)
Shape of X_test: (20, 4)
Shape of Y_train: (80,)
Shape of Y_test: (20,)


In [4]:
X_train = X_train.T 
X_test = X_test.T

Y_train = Y_train.reshape(1, -1)
Y_test = Y_test.reshape(1,-1)

In [5]:
def sigmoid(z: np.ndarray) -> np.ndarray:
    return 1 / (1 + np.exp(-z))

In [9]:
sigmoid(0) #testing

0.5

In [10]:
def initialize(dim: int) -> Tuple[np.ndarray, float]:
    w = np.zeros((dim, 1))
    b = 0.0
    return w, b

In [11]:
def propagate(w: np.ndarray, b: float, X: np.ndarray, Y: np.ndarray) -> Tuple[Dict[str, np.ndarray], float]:
    m = X.shape[1]
    
    assert w.shape == (X.shape[0], 1)  #w -> (num_features, 1)
    assert X.shape[0] == w.shape[0]  #number of features in X == number of rows in w
    assert Y.shape == (1, m)  #Y -> (1, number of examples)
    
    #forward
    A = sigmoid(np.dot(w.T, X) + b)
    A = np.clip(A, 1e-10, 1 - 1e-10)
    cost = -1/m * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))  
    
    #backward
    dw = 1/m * np.dot(X, (A - Y).T)
    db = 1/m * np.sum(A - Y)
    
    cost = np.squeeze(np.array(cost))
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost


In [12]:
def optimize(w: np.ndarray, b: float, X: np.ndarray, Y: np.ndarray, num_iterations: int = 100, learning_rate: float = 0.009, print_cost: bool = False) -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray], List[float]]:
    w = copy.deepcopy(w)
    b = copy.deepcopy(b)
    
    costs = []
    
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - learning_rate * dw
        b = b - learning_rate * db
        
        #printing cost every 100 iterations
        if i % 100 == 0:
            costs.append(cost)
            
            if print_cost:
                print("Cost after iteration %i: %f" % (i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs


In [13]:
w,b = initialize(4)

params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations=200, learning_rate=0.005, print_cost=False)

In [14]:
def predict(w: np.ndarray, b: float, X: np.ndarray) -> np.ndarray:
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    
    A = sigmoid(np.dot(w.T, X) + b)  #activation
    
    for i in range(A.shape[1]):  #converting probabilities to actual predictions
        Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0
    
    return Y_prediction


In [15]:
def model(X_train: np.ndarray, Y_train: np.ndarray, X_test: np.ndarray, Y_test: np.ndarray, num_iterations: int = 2000, learning_rate: float = 0.0001, print_cost: bool = False) -> Dict[str, Any]:
    w, b = initialize(X_train.shape[0])
    
    params, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
    
    w = params["w"]
    b = params["b"]
    
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    
    if print_cost:
        print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
        print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train": Y_prediction_train, 
         "w": w, 
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [16]:
ogistic_model = model(X_train, Y_train, X_test, Y_test, num_iterations=10000, learning_rate=0.005, print_cost=True)

Cost after iteration 0: 0.693147
Cost after iteration 100: 0.437985
Cost after iteration 200: 0.309498
Cost after iteration 300: 0.236510
Cost after iteration 400: 0.190553
Cost after iteration 500: 0.159295
Cost after iteration 600: 0.136778
Cost after iteration 700: 0.119835
Cost after iteration 800: 0.106645
Cost after iteration 900: 0.096096
Cost after iteration 1000: 0.087472
Cost after iteration 1100: 0.080291
Cost after iteration 1200: 0.074221
Cost after iteration 1300: 0.069023
Cost after iteration 1400: 0.064521
Cost after iteration 1500: 0.060584
Cost after iteration 1600: 0.057113
Cost after iteration 1700: 0.054029
Cost after iteration 1800: 0.051270
Cost after iteration 1900: 0.048787
Cost after iteration 2000: 0.046542
Cost after iteration 2100: 0.044500
Cost after iteration 2200: 0.042636
Cost after iteration 2300: 0.040927
Cost after iteration 2400: 0.039355
Cost after iteration 2500: 0.037903
Cost after iteration 2600: 0.036558
Cost after iteration 2700: 0.035309
Cost