In [20]:
import pandas as pd
import numpy as np


def evaluate_cost(x,y,weights,lambda_):
    h = x @ weights
    h = np.exp(-1*h)
    h = 1 + h
    h = np.power(h,-1)
    cost = np.transpose(-1*y)@np.log(h)-np.transpose(1-y)@np.log(1-h)
    cost = np.sum(cost)
    cost = cost/(len(y))
    cost += lambda_*(sum(sum(np.power(weights[1:],2)))/(2*len(y)))
    return cost    

def evaluate_gradient(x,y,weights,lambda_):
    h = x@weights
    h = np.exp(-1*h)
    h = 1 + h
    h = np.power(h,-1)    
    h = (h - y) 
    grad = np.transpose(x)@h
    for i in range(1,len(x[0])):
        grad[i] += (lambda_*weights[i])
    grad = grad/(len(y))    
    return grad

def update_weights(old_weights, grad, alpha):
    weights = old_weights - alpha*grad
    return weights

str_to_int  = ['marital','default','housing','loan','contact','month','y']
elim = ['education','job','contact','poutcome']
data1=pd.read_csv("bank-full.csv",delimiter=";")
data2=pd.read_csv("bank.csv",delimiter=";")
data = [data1,data2]
data = pd.concat(data)

attr=[]
c=0
for i in range(len(list(data))-1):
    if list(data)[i] in elim:
        continue
    attr.append(data.loc[:,list(data)[i]])
    if list(data)[i] in str_to_int:
        attr[c],un = pd.factorize(attr[c],sort=True)
    if max(attr[c])!=min(attr[c]):
        attr[c] = (attr[c] - min(np.abs(attr[c])))/(max(np.abs(attr[c])) - min(np.abs(attr[c])))
    else:
        attr[c] = (attr[c])/(max(np.abs(attr[c])))
    c+=1
    
attr = list(map(list, zip(*attr)))
y1 = data.loc[:,'y']
y1,un = pd.factorize(y1,sort = True)
y = np.zeros((len(data),1))

for i in range(len(data)):
    y[i][0]=y1[i]
    
x = np.ones([len(y),len(list(data))-len(elim)])

x[:,1:len(list(data))-len(elim)] = attr[:]

x_train = x[:len(data1)]
y_train = y[:len(data1)]

x_test = x[len(data1):len(data)]
y_test = y[len(data1):len(data)]

parameters = {'init_weights':np.ones((len(list(data))-len(elim),1)) , 
             'max_iterations':10000, 
             'alpha': 10, 
            'lambda':1}
# gradient descent
weights = parameters['init_weights']

for i in range(parameters['max_iterations']):
    # evaluate cost and gradient
    cost = evaluate_cost(x_train,y_train,weights,parameters['lambda'])
    grad = evaluate_gradient(x_train,y_train,weights,parameters['lambda'])
    # display
    if(i % 10 == 0):
        print('iteration: {} cost: {} '.format(i, cost))
    # udpate parameters
    weights = update_weights(weights,grad,parameters['alpha'])

h = x_test @ weights
h = np.exp(-1*h)
h = 1 + h
h = np.power(h,-1)
tp = 0
tn = 0
fp = 0
fn = 0
for i in range(len(h)):
    if h[i]>=0.5:
        if y_test[i]==1:
            tp+=1
        else:
            fp+=1
    else:
        if y_test[i]==0:
            tn+=1
        else:
            fn+=1     
            
print('Accuracy (testing set) : {}'.format(((tp+tn)/(tp+tn+fp+fn))))


h = x_train @ weights
h = np.exp(-1*h)
h = 1 + h
h = np.power(h,-1)
tp = 0
tn = 0
fp = 0
fn = 0
for i in range(len(h)):
    if h[i]>=0.5:
        if y_train[i]==1:
            tp+=1
        else:
            fp+=1
    else:
        if y_train[i]==0:
            tn+=1
        else:
            fn+=1     
            
print('Accuracy (training set) : {}'.format(((tp+tn)/(tp+tn+fp+fn))))



iteration: 0 cost: 3.366129782897639 
iteration: 10 cost: 0.3488755241307094 
iteration: 20 cost: 0.3471681892753232 
iteration: 30 cost: 0.3445373599954622 
iteration: 40 cost: 0.3403819983448064 
iteration: 50 cost: 0.3363968194579946 
iteration: 60 cost: 0.33262315173150225 
iteration: 70 cost: 0.3290445452703111 
iteration: 80 cost: 0.32565058357926907 
iteration: 90 cost: 0.32243311839016814 
iteration: 100 cost: 0.31938472156086645 
iteration: 110 cost: 0.3164984298111988 
iteration: 120 cost: 0.3137677599064749 
iteration: 130 cost: 0.31118674583952244 
iteration: 140 cost: 0.30874997865453346 
iteration: 150 cost: 0.30645266781578245 
iteration: 160 cost: 0.30429073565660747 
iteration: 170 cost: 0.30226094551303556 
iteration: 180 cost: 0.3003610566631604 
iteration: 190 cost: 0.29858999099361977 
iteration: 200 cost: 0.2969479797325038 
iteration: 210 cost: 0.29543662529915427 
iteration: 220 cost: 0.2940587581740989 
iteration: 230 cost: 0.29281790074669695 
iteration: 240 c

iteration: 1960 cost: 0.28159166057791046 
iteration: 1970 cost: 0.2815907646668105 
iteration: 1980 cost: 0.2815898834898702 
iteration: 1990 cost: 0.28158901674919007 
iteration: 2000 cost: 0.28158816415418636 
iteration: 2010 cost: 0.28158732542133696 
iteration: 2020 cost: 0.2815865002739987 
iteration: 2030 cost: 0.281585688442162 
iteration: 2040 cost: 0.2815848896622693 
iteration: 2050 cost: 0.28158410367701797 
iteration: 2060 cost: 0.28158333023515364 
iteration: 2070 cost: 0.28158256909132856 
iteration: 2080 cost: 0.28158182000589665 
iteration: 2090 cost: 0.28158108274475757 
iteration: 2100 cost: 0.28158035707919415 
iteration: 2110 cost: 0.2815796427857328 
iteration: 2120 cost: 0.28157893964597225 
iteration: 2130 cost: 0.2815782474464617 
iteration: 2140 cost: 0.28157756597854555 
iteration: 2150 cost: 0.2815768950382556 
iteration: 2160 cost: 0.28157623442614327 
iteration: 2170 cost: 0.2815755839472023 
iteration: 2180 cost: 0.2815749434107179 
iteration: 2190 cost: 

iteration: 3920 cost: 0.28153126756035207 
iteration: 3930 cost: 0.2815311793519134 
iteration: 3940 cost: 0.28153109185601494 
iteration: 3950 cost: 0.28153100506475814 
iteration: 3960 cost: 0.2815309189703886 
iteration: 3970 cost: 0.28153083356525277 
iteration: 3980 cost: 0.2815307488418188 
iteration: 3990 cost: 0.28153066479265054 
iteration: 4000 cost: 0.2815305814104293 
iteration: 4010 cost: 0.2815304986879426 
iteration: 4020 cost: 0.2815304166180886 
iteration: 4030 cost: 0.28153033519384785 
iteration: 4040 cost: 0.28153025440832086 
iteration: 4050 cost: 0.2815301742547016 
iteration: 4060 cost: 0.2815300947262811 
iteration: 4070 cost: 0.28153001581644044 
iteration: 4080 cost: 0.2815299375186685 
iteration: 4090 cost: 0.2815298598265388 
iteration: 4100 cost: 0.28152978273370954 
iteration: 4110 cost: 0.28152970623393264 
iteration: 4120 cost: 0.2815296303210602 
iteration: 4130 cost: 0.28152955498901355 
iteration: 4140 cost: 0.2815294802318086 
iteration: 4150 cost: 0

iteration: 5870 cost: 0.28152196098096305 
iteration: 5880 cost: 0.281521937198122 
iteration: 5890 cost: 0.28152191355753015 
iteration: 5900 cost: 0.28152189005826156 
iteration: 5910 cost: 0.28152186669939055 
iteration: 5920 cost: 0.2815218434800124 
iteration: 5930 cost: 0.28152182039922574 
iteration: 5940 cost: 0.28152179745613115 
iteration: 5950 cost: 0.2815217746498412 
iteration: 5960 cost: 0.2815217519794752 
iteration: 5970 cost: 0.2815217294441545 
iteration: 5980 cost: 0.28152170704300966 
iteration: 5990 cost: 0.2815216847751856 
iteration: 6000 cost: 0.2815216626398215 
iteration: 6010 cost: 0.28152164063607077 
iteration: 6020 cost: 0.28152161876308807 
iteration: 6030 cost: 0.28152159702003443 
iteration: 6040 cost: 0.28152157540608774 
iteration: 6050 cost: 0.2815215539204223 
iteration: 6060 cost: 0.2815215325622183 
iteration: 6070 cost: 0.28152151133066206 
iteration: 6080 cost: 0.28152149022495426 
iteration: 6090 cost: 0.2815214692442883 
iteration: 6100 cost: 

iteration: 7810 cost: 0.28151920070998365 
iteration: 7820 cost: 0.2815191930316854 
iteration: 7830 cost: 0.28151918539728843 
iteration: 7840 cost: 0.28151917780653546 
iteration: 7850 cost: 0.28151917025917134 
iteration: 7860 cost: 0.2815191627549523 
iteration: 7870 cost: 0.2815191552936207 
iteration: 7880 cost: 0.28151914787493104 
iteration: 7890 cost: 0.28151914049863863 
iteration: 7900 cost: 0.28151913316449123 
iteration: 7910 cost: 0.2815191258722467 
iteration: 7920 cost: 0.28151911862166096 
iteration: 7930 cost: 0.28151911141250013 
iteration: 7940 cost: 0.28151910424451754 
iteration: 7950 cost: 0.2815190971174729 
iteration: 7960 cost: 0.281519090031137 
iteration: 7970 cost: 0.28151908298526696 
iteration: 7980 cost: 0.28151907597963216 
iteration: 7990 cost: 0.2815190690139973 
iteration: 8000 cost: 0.2815190620881295 
iteration: 8010 cost: 0.28151905520180676 
iteration: 8020 cost: 0.28151904835479313 
iteration: 8030 cost: 0.28151904154685997 
iteration: 8040 cost

iteration: 9740 cost: 0.28151830027387215 
iteration: 9750 cost: 0.2815182977185576 
iteration: 9760 cost: 0.2815182951777138 
iteration: 9770 cost: 0.28151829265125333 
iteration: 9780 cost: 0.28151829013909424 
iteration: 9790 cost: 0.2815182876411544 
iteration: 9800 cost: 0.28151828515735294 
iteration: 9810 cost: 0.28151828268760887 
iteration: 9820 cost: 0.2815182802318471 
iteration: 9830 cost: 0.2815182777899828 
iteration: 9840 cost: 0.28151827536194146 
iteration: 9850 cost: 0.281518272947639 
iteration: 9860 cost: 0.28151827054700257 
iteration: 9870 cost: 0.28151826815994885 
iteration: 9880 cost: 0.2815182657864051 
iteration: 9890 cost: 0.28151826342629427 
iteration: 9900 cost: 0.28151826107954 
iteration: 9910 cost: 0.28151825874606096 
iteration: 9920 cost: 0.28151825642578626 
iteration: 9930 cost: 0.28151825411864034 
iteration: 9940 cost: 0.28151825182454876 
iteration: 9950 cost: 0.28151824954344185 
iteration: 9960 cost: 0.2815182472752356 
iteration: 9970 cost: 0

Accuracy (training set) : 0.8901152374422154
Precision (training set) : 0.900830237145085
Recall (training set) : 0.9838685436601373
F1-Score (training set) : 0.940520090034002
39922
5289
4000
521
