In [1]:
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib.pyplot as plt
import copy
import math

%matplotlib inline

# Preparing the data

In [2]:
# fetch dataset 
iris = fetch_ucirepo(id=53)

In [3]:
iris

{'data': {'ids': None,
  'features':      sepal length  sepal width  petal length  petal width
  0             5.1          3.5           1.4          0.2
  1             4.9          3.0           1.4          0.2
  2             4.7          3.2           1.3          0.2
  3             4.6          3.1           1.5          0.2
  4             5.0          3.6           1.4          0.2
  ..            ...          ...           ...          ...
  145           6.7          3.0           5.2          2.3
  146           6.3          2.5           5.0          1.9
  147           6.5          3.0           5.2          2.0
  148           6.2          3.4           5.4          2.3
  149           5.9          3.0           5.1          1.8
  
  [150 rows x 4 columns],
  'targets':               class
  0       Iris-setosa
  1       Iris-setosa
  2       Iris-setosa
  3       Iris-setosa
  4       Iris-setosa
  ..              ...
  145  Iris-virginica
  146  Iris-virginica
  147  

In [3]:
# data (as pandas dataframes) 
X = iris.data.features
y = iris.data.targets

In [4]:
print(X[:5])
print(X.shape)
print(type(X))

   sepal length  sepal width  petal length  petal width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2
(150, 4)
<class 'pandas.core.frame.DataFrame'>


In [5]:
print(y[:5])
print(y.shape)
print(type(y))
print(len(y))

         class
0  Iris-setosa
1  Iris-setosa
2  Iris-setosa
3  Iris-setosa
4  Iris-setosa
(150, 1)
<class 'pandas.core.frame.DataFrame'>
150


In [6]:
le = LabelEncoder()
label = le.fit_transform(y['class'])

In [7]:
y.drop('class', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [8]:
y['class'] = label

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y['class'] = label


In [9]:
y[:5]

Unnamed: 0,class
0,0
1,0
2,0
3,0
4,0


In [10]:
X_np = X.to_numpy()
y_np = y.to_numpy()

In [11]:
X_np[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [12]:
y_np[:5]

array([[0],
       [0],
       [0],
       [0],
       [0]])

In [13]:
y_np = y_np.reshape(-1)

In [14]:
y_np.shape

(150,)

# Logistic Regression

## Basic Approach

In [44]:
def sigmoid(z): # Sigmoid function
    return 1/(1+np.exp(-z))

In [45]:
sigmoid(0)

0.5

In [46]:
def compute_cost(X, y, w, b, lambda_=1): # Cost function 
    m, n = X.shape
    cost = 0

    for i in range(m):
        z = np.dot(X[i], w) + b
        f_wb = sigmoid(z)
        cost += -y[i]*np.log(f_wb) - (1-y[i])*np.log(1-f_wb)
    total_cost = cost / m
    reg_cost = sum(np.square(w))
    
    return total_cost + (lambda_/(2 * m)) * reg_cost

In [48]:
m, n = X_np.shape

# Compute and display cost with w initialized to zeroes
initial_w = np.zeros(n)
initial_b = 0.
lambda_ = 0.5
cost = compute_cost(X_np, y_np, initial_w, initial_b, lambda_)
print(f'Cost at initial w (zeros): {cost:.3f}')

Cost at initial w (zeros): 0.693


In [49]:
test_w = np.array([0.2, 0.2, 0.2, 0.2])
test_b = -24.
lambda_ = 0.5
cost = compute_cost(X_np, y_np, test_w, test_b, lambda_ = 0.5)

print(f'Cost at test w,b: {cost:.3f}')

Cost at test w,b: 20.762


In [50]:
def compute_gradient(X, y, w, b, lambda_=1):
    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.

    for i in range(m):
        f_wb_i = sigmoid(np.dot(X[i], w) + b)
        err_i = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j] += err_i * X[i, j] + ((lambda_/m)) * w[j]
        dj_db += err_i
    dj_dw /= m
    dj_db /= m

    return dj_db, dj_dw

In [51]:
# Compute and display gradient with w initialized to zeroes
initial_w = np.zeros(n)
initial_b = 0.
lambda_ = 0.5

dj_db, dj_dw = compute_gradient(X_np, y_np, initial_w, initial_b, lambda_ = 0.5)
print(f'dj_db at initial w (zeros):{dj_db}' )
print(f'dj_dw at initial w (zeros):{dj_dw.tolist()}')

dj_db at initial w (zeros):-0.5
dj_dw at initial w (zeros):[-3.4490000000000007, -1.3790000000000002, -3.241999999999999, -1.1933333333333325]


In [52]:
test_w = np.array([0.2, -0.5, 0.3, -0.4])
test_b = -24
lambda_ = 0.5
dj_db, dj_dw  = compute_gradient(X_np, y_np, test_w, test_b)

print('dj_db at test_w:', dj_db)
print('dj_dw at test_w:', dj_dw.tolist())

dj_db at test_w: -0.999999999943004
dj_dw at test_w: [-6.369333332982542, -2.9093333331662774, -5.119333333078041, -1.7953333332499226]


In [53]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_):
    # number of training examples
    m = len(X)
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)   

        # Update Parameters using w, b, alpha and gradient
        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            cost =  cost_function(X, y, w_in, b_in, lambda_)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history #return w and J,w history for graphing

In [54]:
np.random.seed(42)
intial_w = 0.01 * (np.random.rand(4).reshape(-1,1) - 0.5)
initial_b = -8


# Some gradient descent settings
iterations = 3000
alpha = 0.001

w,b, J_history,_ = gradient_descent(X_np ,y_np, initial_w, initial_b, 
                                   compute_cost, compute_gradient, alpha, iterations, 0)

Iteration    0: Cost     7.92   
Iteration  300: Cost    -1.75   
Iteration  600: Cost    -2.75   
Iteration  900: Cost    -3.55   
Iteration 1200: Cost    -4.31   
Iteration 1500: Cost    -5.07   
Iteration 1800: Cost    -5.82   
Iteration 2100: Cost    -6.58   


  cost += -y[i]*np.log(f_wb) - (1-y[i])*np.log(1-f_wb)


Iteration 2400: Cost     -inf   
Iteration 2700: Cost     -inf   
Iteration 2999: Cost     -inf   


In [68]:
def predict(X, w, b):
    m = X.shape[0]
    p = np.zeros(m)

    for i in range(m):
        z = np.dot(w, X[i]) + b
        f_wb = sigmoid(z)
        if f_wb >= 0.5:
            p[i] = 1
        elif f_wb < 0.5:
            p[i] = 0
        else:
            p[i] = 2

    return p
        

In [69]:
np.random.seed(42)
tmp_w = np.random.randn(4)
tmp_b = 0.3    
tmp_X = np.random.randn(4, 4) - 0.5

tmp_p = predict(tmp_X, tmp_w, tmp_b)
print(f'Output of predict: shape {tmp_p.shape}, value {tmp_p}')

Output of predict: shape (4,), value [1. 0. 0. 0.]


In [70]:
p = predict(X_np, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_np) * 100))

Train Accuracy: 33.333333


## Scikit approach

In [58]:
from sklearn.linear_model import LogisticRegression

In [59]:
clf = LogisticRegression(random_state=0).fit(X_np, y_np)

In [60]:
clf.predict(X_np)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [65]:
print(f'Train Accuracy: {clf.score(X_np, y_np)}')

Train Accuracy: 0.9733333333333334
