In [0]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split 

In [0]:
def sigmoid(X, coeff):
#   sigmoid function
    dot = np.dot(X, coeff)
    sig = 1.0/(1 + np.exp(-dot)) 
    return sig

In [0]:
# Feature scaling
def mean(a):
  mean = float(np.mean(a))
  func = np.vectorize(lambda t: t - mean)
  return func(a)

def std_var(a):
  std = float(np.std(a))
  func = np.vectorize(lambda t: t / std)
  return func(a)

def feature_scaling(X):
  X = np.apply_along_axis(mean, 0, X)
  X = np.apply_along_axis(std_var, 0, X)
  return X

In [0]:
def normalizeX(X):
#   Change as per notation to get: w0x0 + w1x1 + w2x2 + ...
# i.e. add an extra feature vector x0 for bias
# Also do feature scaling
  shape = (X.shape[0], 1)
  
  newX = feature_scaling(X)
  newX = np.hstack((np.ones(shape), newX))
  
  return newX

In [0]:
# To find theta: Gradient descent
def gradient(X, y, theta):
  m = y.shape[0]
  sig = sigmoid(X, theta) 
  loss = sig - y
  grad =  1/m * np.dot(X.T, loss)
  return grad 

def cost_function(X, y, theta):
  sig = sigmoid(X, theta) 
  c1 = y * np.log(sig) 
  c2 = (1 - y) * np.log(1 - sig) 
  final = -c1 - c2 
  me = np.mean(final)
  return me

def gradient_descent(X, y, coeff, learning_rate = 0.01, min_cost_change = 0.0001, max_epochs = 50000):
  epoch = 1 
  current_cost = cost_function(X, y, coeff) 
  cost_change = learning_rate

  while cost_change > min_cost_change and epoch < max_epochs:
    prev_cost = current_cost
    grad = gradient(X, y, coeff)
    coeff = coeff - (learning_rate * grad) 
    current_cost = cost_function(X, y, coeff) 
    cost_change = prev_cost - current_cost
    epoch += 1
  
  return coeff, epoch 

In [0]:
def sig_under_threshold(X, coeff):
  sig = sigmoid(X, coeff)
  return np.where(sig >= 0.5, sig, float("-inf")) 
  
def predict_y(X, coeff_list): 
    prob_mat = sig_under_threshold(X, coeff_list[0])
    prob_mat = np.hstack((prob_mat, sig_under_threshold(X, coeff_list[1])))
    prob_mat = np.hstack((prob_mat, sig_under_threshold(X, coeff_list[2])))

    max_prob = np.argmax(prob_mat, 1)
    return max_prob

In [0]:
if __name__ == "__main__":
  dataset = datasets.load_iris()
  X = dataset.data 
  y = dataset.target
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify = y) 
  X_train = normalizeX(X_train)

  y0_train = np.array(y_train == 0, dtype= int).reshape((-1,1))
  y1_train = np.array(y_train == 1, dtype= int).reshape((-1,1))
  y2_train = np.array(y_train == 2, dtype= int).reshape((-1,1))
  
  y_train_all = [y0_train, y1_train, y2_train]
  
#   Initial values
  coeff_list = []
  classes = [0, 1, 2]
  for cl in classes:
    coeff = np.zeros((X_train.shape[1], 1))
    coeff, epoch = gradient_descent(X_train, y_train_all[cl], coeff)
    coeff_list.append(coeff)


In [0]:
X_test = normalizeX(X_test)
y_estimate = predict_y(X_test, coeff_list)

In [0]:
y_true = y_test
accuracy = np.sum(y_true == y_estimate)/len(y_true)
print("Accuracy:", accuracy)

Accuracy: 0.7777777777777778


In [0]:
y_true = y_test
def true_pos(y, y_pred):
    return np.sum((y == 1) & (y_pred == 1))

def true_neg(y, y_pred):
    return np.sum((y == 0) & (y_pred == 0))

In [0]:
from sklearn.linear_model import LogisticRegression

X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.4) 
model = LogisticRegression(solver = 'lbfgs', max_iter=50000, multi_class='multinomial')
model.fit(X, y)
y2_pred = model.predict(X2_train)
accuracy2 = (true_pos(y2_train, y2_pred) + true_neg(y2_train, y2_pred))/len(y2_train)
print(accuracy2)

0.7
