In [1]:
# import libraries

import numpy as np
import matplotlib.pyplot as plt
import copy
import math
import pandas as pd

%matplotlib inline

In [2]:
# read the dataset

df = pd.read_csv("dataset/scores.txt", header = None)
df.columns = ["score1","score2","admitted"]
df.head()

Unnamed: 0,score1,score2,admitted
0,34.62366,78.024693,0
1,30.286711,43.894998,0
2,35.847409,72.902198,0
3,60.182599,86.308552,1
4,79.032736,75.344376,1


In [3]:
#convert the pandas df to numpy arrange

X_train = df[["score1","score2"]].to_numpy()
y_train = df["admitted"].astype(float).to_numpy()

#### dataset
- `X_train` contains exam scores on two exams for a student
- `y_train` is the admission decision 
       `y_train = 1` if the student was admitted 
       `y_train = 0` if the student was not admitted

In [4]:
print("First five elements in X_train are:\n", X_train[:5])
print("Type of X_train:",type(X_train))

First five elements in X_train are:
 [[34.62365962 78.02469282]
 [30.28671077 43.89499752]
 [35.84740877 72.90219803]
 [60.18259939 86.3085521 ]
 [79.03273605 75.34437644]]
Type of X_train: <class 'numpy.ndarray'>


In [5]:
print("First five elements in y_train are:\n", y_train[:5])
print("Type of y_train:",type(y_train))

First five elements in y_train are:
 [0. 0. 0. 1. 1.]
Type of y_train: <class 'numpy.ndarray'>


In [6]:
# Check the dimensions of your variables

print ('The shape of X_train is: ' + str(X_train.shape))
print ('The shape of y_train is: ' + str(y_train.shape))
print ('We have m = %d training examples' % (len(y_train)))

The shape of X_train is: (100, 2)
The shape of y_train is: (100,)
We have m = 100 training examples


####  Sigmoid function, cost, gradient and gradient descent

In [7]:
# sigmoid function

def sigmoid(z):
    g = 1 / (1 + (np.exp(-z)))  
    return g

In [8]:
# compute_cost

def compute_cost(X, y, w, b, lambda_= 1):
    m, n = X.shape
    loss_sum = 0.0
    
    for i in range(m):
        z = np.dot(X[i],w) + b
        f_wb = sigmoid(z)
        loss = (-y[i] * np.log(f_wb)) - (1-y[i])* np.log(1-f_wb)
        loss_sum += loss
    
    total_cost = 1/m * loss_sum
   
    return total_cost

In [13]:
m, n = X_train.shape

# Compute and display cost with w initialized to zeroes
initial_w = np.zeros(n)
initial_b = 0.
cost = compute_cost(X_train, y_train, initial_w, initial_b)
print('Cost at initial w (zeros): {:.3f}'.format(cost))

Cost at initial w (zeros): 0.693


In [14]:
# Compute and display cost with non-zero w
test_w = np.array([0.2, 0.2])
test_b = -24.
cost = compute_cost(X_train, y_train, test_w, test_b)

print('Cost at test w,b: {:.3f}'.format(cost))


Cost at test w,b: 0.218


In [15]:
# compute_gradient

def compute_gradient(X, y, w, b, lambda_=None): 
    m, n = X.shape
    dj_dw = np.zeros(w.shape)
    dj_db = 0.0

    for i in range(m):
        z_wb = np.dot(X[i],w) + b
        f_wb = sigmoid(z_wb)
        
        dj_dw_i = (f_wb - y[i]) * X[i]
        dj_dw += dj_dw_i
        
        dj_db_i = f_wb -y[i]
        dj_db += dj_db_i
            
    dj_dw = 1/m * dj_dw
    dj_db = 1/m * dj_db
        
    return dj_db, dj_dw

In [16]:
# Compute and display gradient with w initialized to zeroes
initial_w = np.zeros(n)
initial_b = 0.

dj_db, dj_dw = compute_gradient(X_train, y_train, initial_w, initial_b)
print(f'dj_db at initial w (zeros):{dj_db}' )
print(f'dj_dw at initial w (zeros):{dj_dw.tolist()}' )

dj_db at initial w (zeros):-0.1
dj_dw at initial w (zeros):[-12.00921658929115, -11.262842205513591]


In [17]:
# Compute and display cost and gradient with non-zero w
test_w = np.array([ 0.2, -0.5])
test_b = -24
dj_db, dj_dw  = compute_gradient(X_train, y_train, test_w, test_b)

print('dj_db at test_w:', dj_db)
print('dj_dw at test_w:', dj_dw.tolist())

dj_db at test_w: -0.5999999999991071
dj_dw at test_w: [-44.831353617873795, -44.37384124953979]


#### gradient descent 

find the optimal parameters of a logistic regression model by using gradient descent. 

In [18]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_): 
    
    # number of training examples
    m = len(X)
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w_history = []
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)   

        # Update Parameters using w, b, alpha and gradient
        w_in = w_in - alpha * dj_dw               
        b_in = b_in - alpha * dj_db              
       
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            cost =  cost_function(X, y, w_in, b_in, lambda_)
            J_history.append(cost)

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w_in)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
        
    return w_in, b_in, J_history, w_history #return w and J,w history for graphing

run the gradient descent algorithm above to learn the parameters for our dataset.

In [19]:
np.random.seed(1)
intial_w = 0.01 * (np.random.rand(2).reshape(-1,1) - 0.5)
initial_b = -8


# Some gradient descent settings
iterations = 10000
alpha = 0.001

w,b, J_history,_ = gradient_descent(X_train ,y_train, initial_w, initial_b, 
                                   compute_cost, compute_gradient, alpha, iterations, 0)

print(w,b)

Iteration    0: Cost     1.01   
Iteration 1000: Cost     0.31   
Iteration 2000: Cost     0.30   
Iteration 3000: Cost     0.30   
Iteration 4000: Cost     0.30   
Iteration 5000: Cost     0.30   
Iteration 6000: Cost     0.30   
Iteration 7000: Cost     0.30   
Iteration 8000: Cost     0.30   
Iteration 9000: Cost     0.30   
Iteration 9999: Cost     0.30   
[0.07125349 0.06482881] -8.188614567810179


In [20]:
# predict

def predict(X, w, b): 

    # number of training examples
    m, n = X.shape   
    p = np.zeros(m)
   
    # Loop over each example
    for i in range(m):   
        z_wb = 0.0
        
        
        # Loop over each feature
        for j in range(n): 
            # Add the corresponding term to z_wb
            z_wb_ij = X[i,j] * w[j]
            z_wb += z_wb_ij
        
        # Add bias term 
        z_wb += b
        
        # Calculate the prediction for this example
        f_wb = sigmoid(z_wb)

        # Apply the threshold
        if f_wb >= 0.5:
            p[i] = 1
        else:
            p[i] = 0
        
    return p

#### Predict
- Predict if students with the following scores with be admitted
<table>
    <tr>
        <td>score1</td>
        <td>score2</td>
    </tr>
    <tr>
        <td>70.34</td>
        <td>82.30</td>
    </tr>
    <tr>
        <td>34.89</td>
        <td>90</td>
    </tr>
    <tr>
        <td>81</td>
        <td>56.45</td>
    </tr>
    <tr>
        <td>51.19</td>
        <td>23.98</td>
    </tr>
</table>

In [22]:
# predict

scores_X = np.array([[70.34, 82.30],[34.89, 90], [81,56.45], [51.19, 23.98]])

admission = predict(scores_X, w, b)
print(admission)

[1. 1. 1. 0.]


#### Prediction
<table>
    <tr>
        <td>score1</td>
        <td>score2</td>
        <td>Prediction</td>
    </tr>
    <tr>
        <td>70.34</td>
        <td>82.30</td>
        <td>1</td>
    </tr>
    <tr>
        <td>34.89</td>
        <td>90</td>
        <td>1</td>
    </tr>
    <tr>
        <td>81</td>
        <td>56.45</td>
        <td>1</td>
    </tr>
    <tr>
        <td>51.19</td>
        <td>23.98</td>
        <td>0</td>
    </tr>
</table>

Now let's use this to compute the accuracy on the training set

In [23]:
#Compute accuracy on our training set

p = predict(X_train, w,b)
print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))

Train Accuracy: 92.000000
