In [19]:
import math 
import numpy as np
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [20]:
def predict_row(row, coef):
    linear_sum = coef[-1]  # a_0
    for i in range(len(row)):
        linear_sum = linear_sum + row[i]*coef[i]
    
    prob = 1/(1 + math.exp(linear_sum))
    
    ## if prob = 0.7 --> prob row to be in class '1' is 0.7 
    ##               --> class of row should be '1'
    
    ## if prob = 0.2 --> prob row to be in class '1' is 0.2 
    ##               --> class of row should be '0'
    
    if prob > 0.5:
        class_row = 1
    else:
        class_row = 0
    
    return class_row
        

def predict_dataset(X_train, coef):
    yhat = []
    for row in X_train:
        y_app = predict_row(row, coef)
        yhat.append(y_app)
    return yhat


def evaluation(X_train,y_train, coef):
    yhat = predict_dataset(X_train, coef)
    #print(len(y_train)
    ls_y = y_train.tolist()
    acc = accuracy_score(ls_y, yhat)
    return acc


def train(X_train, y_train, n_iter,step_size, n_coef, init_coef):
    best_coef = init_coef
    best_accuracy = evaluation(X_train,y_train, init_coef)
    
    for i in range(n_iter):
        new_coef = best_coef + step_size*np.random.randn(n_coef)
        new_accuracy = evaluation(X_train,y_train, new_coef)
        
        if  new_accuracy > best_accuracy:
            best_coef = new_coef
            best_accuracy = new_accuracy
        
        print(i, best_accuracy)
    
    return best_coef, best_accuracy
            

In [21]:
X, y = make_classification(n_samples = 1000, n_features = 5, n_informative=2, random_state=2)

In [22]:
X.shape

(1000, 5)

In [23]:
y.shape

(1000,)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((800, 5), (200, 5), (800,), (200,))

In [25]:
X_train.shape[1]

5

In [26]:
#initial hyperparameter
n_coef = X_train.shape[1] + 1 
n_iter = 100
step_size = 0.2 

In [27]:
#random coeff 
init_coef = np.random.rand(n_coef)
init_coef

array([0.18096264, 0.53629165, 0.566158  , 0.22598384, 0.76720752,
       0.45167645])

In [28]:
#training
best_coef, accuracy_score = train(X_train, y_train, n_iter, step_size, n_coef, init_coef)

0 0.60125
1 0.66625
2 0.795
3 0.795
4 0.795
5 0.795
6 0.82125
7 0.82125
8 0.82125
9 0.82125
10 0.8275
11 0.8275
12 0.85375
13 0.86125
14 0.89125
15 0.89125
16 0.89125
17 0.89125
18 0.89125
19 0.89125
20 0.89125
21 0.89125
22 0.89125
23 0.8925
24 0.8925
25 0.8925
26 0.8925
27 0.8925
28 0.8925
29 0.8925
30 0.8925
31 0.8925
32 0.8925
33 0.8925
34 0.89375
35 0.89375
36 0.89375
37 0.89375
38 0.89375
39 0.89375
40 0.89375
41 0.89375
42 0.895
43 0.895
44 0.895
45 0.895
46 0.895
47 0.895
48 0.895
49 0.8975
50 0.8975
51 0.8975
52 0.8975
53 0.8975
54 0.8975
55 0.8975
56 0.8975
57 0.8975
58 0.8975
59 0.8975
60 0.8975
61 0.8975
62 0.8975
63 0.8975
64 0.8975
65 0.8975
66 0.8975
67 0.8975
68 0.8975
69 0.8975
70 0.8975
71 0.8975
72 0.8975
73 0.8975
74 0.8975
75 0.8975
76 0.8975
77 0.8975
78 0.8975
79 0.8975
80 0.8975
81 0.8975
82 0.8975
83 0.8975
84 0.8975
85 0.8975
86 0.8975
87 0.8975
88 0.8975
89 0.8975
90 0.89875
91 0.89875
92 0.9
93 0.9
94 0.9
95 0.9
96 0.9
97 0.9
98 0.9
99 0.9


In [29]:
best_coef

array([-0.56672137, -0.40243285,  0.06187608,  0.02285569,  0.61519492,
       -0.09571669])

In [30]:
y_test_hat = predict_dataset(X_test, best_coef)

In [31]:
y_test_hat[0]

0

In [32]:
count_correct = 0
for i in range(len(y_test_hat)):
    if y_test_hat[i] == y_test[i]:
        count_correct = count_correct + 1 

In [33]:
count_correct/len(y_test_hat)

0.895