In [1]:
import numpy as np
import pandas as pd

In [2]:
def train(bias=None, weights=None, data=None, 
            assignment=None, lr=None, itr=None):
    
    for idx, each in enumerate(range(itr)):
        probability = np.exp(bias + (weights*data)) / (1 + (np.exp(bias + (weights*data))))
        loss = np.sum(assignment*np.log(probability) + (1-assignment)*np.log(1-probability))
        
        dll_dbias = np.sum(assignment - probability)
        dll_dweight = np.sum((assignment - probability)*data)

        bias = bias + lr*dll_dbias
        weights = bias + lr*dll_dweight
        
        if idx % 100 == 0:
            print('IDX: {}'.format(idx))
            print('Loss: {:.6f}'.format(loss))
            print('Bias: {:.6f}'.format(bias))
            print('Weight: {:.6f}'.format(weights))

            print('---')
        
    return bias, weights

def predict(bias=None, weights=None, data=None, cut=None):
    probability = np.exp(bias + (weights*data)) / (1 + (np.exp(bias + (weights*data))))
    y_pred_class = [0 if each<=cut else 1 for each in probability]
    return y_pred_class

In [3]:
class_1 = np.random.random(20)*2 + 1
class_2 = np.random.random(20)*2 - 0.5
df = pd.DataFrame()
df['x'] = np.concatenate([class_1, class_2])
df['y'] = [0]*20 + [1]*20
data_x = np.array(df['x'].tolist())
data_y = np.array(df['y'].tolist())

bias, weights = train(bias=0.5, weights=0.5, data=data_x, assignment=data_y, lr=0.01, itr=1000)
y_pred_class = predict(bias=bias, weights=weights, data=data_x, cut=0.5)

IDX: 0
Loss: -41.595513
Bias: 0.407742
Weight: 0.118218
---
IDX: 100
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 200
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 300
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 400
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 500
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 600
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 700
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 800
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---
IDX: 900
Loss: -26.626210
Bias: 0.078405
Weight: -0.071768
---


In [4]:
print('Bias: {:.5f}, Weight: {:.5f}'.format(bias, weights))

Bias: 0.07841, Weight: -0.07177


In [5]:
df_results = pd.DataFrame({'x':data_x, 'y_gt': data_y, 'y_pred': y_pred_class})
df_results['diff'] = df_results['y_gt'] - df_results['y_pred']

In [6]:
df_results.groupby(['y_gt', 'y_pred'], as_index=False).count()

Unnamed: 0,y_gt,y_pred,x,diff
0,0,0,19,19
1,0,1,1,1
2,1,0,1,1
3,1,1,19,19


In [7]:
df_results

Unnamed: 0,x,y_gt,y_pred,diff
0,1.8504,0,0,0
1,2.026037,0,0,0
2,1.864626,0,0,0
3,1.899476,0,0,0
4,1.521775,0,0,0
5,1.581035,0,0,0
6,2.004685,0,0,0
7,1.620333,0,0,0
8,1.699489,0,0,0
9,2.715576,0,0,0
