In [34]:
import numpy as np 
import pickle
import math
from featureSelection import features_selection

with open('data_train.pkl', 'rb') as f:
    data = pickle.load(f)
    
features_list=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]
X_train, X_val, y_train, y_val = features_selection(features_list)

def sigmoid(Z):
    return 1/(1+np.exp(-Z))


def initialize_parameters(dim):
    W = np.random.rand(dim, 1)
    b = 0
    return W, b

W,b = initialize_parameters(X_train.shape[1])



def compute_gradient(X, Y, W, b):
    

    m,n = X.shape
    dj_dw = np.zeros((n,))                           #(n,)
    dj_db = 0.
    loss=0.
    
    
    for i in range(m):
        A=(np.dot(X[i],W)+b)
       
        loss+=Y[i]*np.log(sigmoid(A[0]))+(1-Y[i])*np.log(1-sigmoid(A[0]))
        Z=sigmoid(A[0])
        for j in range(n):
            dj_dw[j]=dj_dw[j]+(Z-Y[i])*X[i,j]
        dj_db+=(Z-Y[i])
        dj_dw = dj_dw/m                                   #(n,)
        dj_db = dj_db/m  
        
    cost=-loss/m
        
    
    return cost,dj_dw, dj_db




def predict(X_i, W,  b,threshold=0.5):
    A=(np.dot(X_i,W)+b)
    Z=  sigmoid(A[0])
    if Z >threshold:
        return 1
    else:
        return 0
    

def accuracy(X, y, W, b):
    
    m = X.shape[0]
    correct = 0
    for i in range(m):
        if predict(X[i], W, b) == y[i]:
            correct += 1
        
            
    return (correct/m)*100



def gradient_descent(X, Y, W, b, learning_rate, num_iterations):
    J_history = []
    for i in range(num_iterations):
        
        cost,dW,db=compute_gradient(X, Y, W, b)
        W=W-learning_rate*dW
        b=b-learning_rate*db
        
        if i% math.ceil(num_iterations / 10) == 0:
                print(f"Iteration {i:4d}: Cost {cost}   ")
                print(f"Accuracy on training set: {accuracy(X, Y, W, b)}%")
    return W,b

W,b=gradient_descent(X_train, y_train, W, b, 0.5, 2000)


np.savez('model.npz', W=W, b=b, features_list=features_list)



Iteration    0: Cost 7.260476429358543   
Accuracy on training set: 14.6218487394958%
Iteration  200: Cost 7.231812213021109   
Accuracy on training set: 14.6218487394958%


In [2]:
import numpy as np 
import pickle
import math
from featureSelection import features_selection

with open('data_train.pkl', 'rb') as f:
    data = pickle.load(f)
    
features_list=[0,1,2]
X_train, X_val, y_train, y_val = features_selection(features_list)

def sigmoid(Z):
    return 1/(1+np.exp(-Z))


def initialize_parameters(dim):
    W = np.random.rand(dim, 1)
    b = 0
    return W, b

W,b = initialize_parameters(X_train.shape[1])



def compute_gradient(X, Y, W, b):
    

    m,n = X.shape
    dj_dw = np.zeros((n,))                           #(n,)
    dj_db = 0.
    loss=0.
    
    
    for i in range(m):
        A=(np.dot(X[i],W)+b)
       
        loss+=Y[i]*np.log(sigmoid(A[0]))+(1-Y[i])*np.log(1-sigmoid(A[0]))
        Z=sigmoid(A[0])
        for j in range(n):
            dj_dw[j]=dj_dw[j]+(Z-Y[i])*X[i,j]
        dj_db+=(Z-Y[i])
        dj_dw = dj_dw/m                                   #(n,)
        dj_db = dj_db/m  
        
    cost=-loss/m
        
    
    return cost,dj_dw, dj_db


def predict(X_i, W,  b,threshold=0.5):
    A=(np.dot(X_i,W)+b)
    Z=  sigmoid(A[0])
    if Z >threshold:
        return 1
    else:
        return 0
    

def accuracy(X, y, W, b):
    
    m = X.shape[0]
    correct = 0
    for i in range(m):
        if predict(X[i], W, b) == y[i]:
            correct += 1
        
            
    return (correct/m)*100



def gradient_descent(X, Y, W, b, learning_rate, num_iterations):
    J_history = []
    for i in range(num_iterations):
        
        cost,dW,db=compute_gradient(X, Y, W, b)
        W=W-learning_rate*dW
        b=b-learning_rate*db
        
        if i% math.ceil(num_iterations / 10) == 0:
                print(f"Iteration {i:4d}: Cost {cost}   ")
                print(f"Accuracy on training set: {accuracy(X, Y, W, b)}%")
    return W,b

W,b=gradient_descent(X_train, y_train, W, b, 0.5, 2000)


np.savez('model.npz', W=W, b=b, features_list=features_list)



Iteration    0: Cost 1.2326203413764087   
Accuracy on training set: 14.6218487394958%


KeyboardInterrupt: 

In [28]:
#TEST CELL, DONT TOUCH UNTIL DONE
import pickle 
import numpy as np
from featureSelection import features_selection_test_set


model=np.load('model.npz')

W=model['W']
b=model['b']
features_list=model['features_list']


X_test, y_test = features_selection_test_set(features_list)

#RANDOMIZED FOR TESTING PURPOSE, WILL BE IMPORTED FROM TRAINING SOON


def sigmoid(X):
    return 1/(1+np.exp(-X))


def predict(X_i, W,  b,threshold=0.5):
    A=(np.dot(X_i,W)+b)
    Z=  sigmoid(A[0])
    if Z >threshold:
        return 1
    else:
        return 0
    

def accuracy(X, y, W, b):
    
    m = X.shape[0]
    correct = 0
    for i in range(m):
        if predict(X[i], W, b) == y[i]:
            correct += 1
        
            
    return (correct/m)*100


acc=accuracy(X_test, y_test, W, b)
print(acc)


12.705882352941176
