## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import time
np.random.seed(97)

# Base Network

In [2]:
class Model:
    def __init__(self, layers):
        self.layers = layers
    
    def forward(self):
        for layer in layers:
            layer.forward()
    
    def backward(self):
        for layer in self.layers.reversed():
            layer.backward()

### BinaryClassifier with vector calculation

In [3]:
    
class BinaryClassifier:
    
    def __init__(self, DIM=(1,2)):
        self.DIM = DIM
        self.w = np.zeros(self.DIM, dtype=np.float64)
        self.b = 0

    def forward(self, X):
        MIN_MARGIN = 2 ** -53

        self.z = np.dot(self.w, X.T) + self.b
        self.a = 1 / (1 + np.exp(-self.z))
        self.a = np.maximum(np.minimum(1 - MIN_MARGIN, self.a), MIN_MARGIN)
        return self.a

    def backward(self, X, y):
        da = -y / self.a + (1 - y) / (1 - self.a) #(1,100)
        dz = self.a * (1 - self.a) * da           #(1,100)
        dw = np.mean(X * dz.T, 0)                 #(2)
        db = np.mean(1 * dz)                      #()
        return dw, db

    def train(self, X, y, learning_rate):
        self.forward(X)
        dw, db = self.backward(X, y)
        self.w -= learning_rate * dw
        self.b -= learning_rate * db

    def predict(self, X):
        return np.round(self.forward(X))

    def loss(self, X, y):
        pred_y = self.forward(X)
        return -np.mean(y * np.log(pred_y) + (1 - y) * np.log(1 - pred_y))


### BinaryClassifier with elementwise calculation

In [4]:
    
class BinaryClassifierElementwise:
    
    def __init__(self, DIM=(1,2)):
        self.DIM = DIM
        self.w = np.zeros(self.DIM, dtype=np.float64)
        self.b = 0
    
    def forward(self, X):
        MIN_MARGIN = 2 ** -53

        # self.z = np.dot(self.w, X.T) + self.b
        tmp_z = []
        tmp_a = []

        for i in range(X.shape[0]):
            tmp_z.append(self.w[0][0]*X[i][0] + self.w[0][1]*X[i][1] + self.b)
            tmp_a.append(1 / (1 + np.exp(-tmp_z[i])))
            
            tmp_min = min(1 - MIN_MARGIN, tmp_a[i])
            tmp_a[i] = tmp_min if tmp_min>MIN_MARGIN else MIN_MARGIN
        
        self.a = np.array(tmp_a).reshape((1, X.shape[0]))
        return self.a

    def backward(self, X, y):
        
        X_nums = X.shape[0]
        
        da=[[]] #(1,100)
        dz=[[]] #(1,100)
        dw=[0.0, 0.0] #(2)
        db=0.0 #()
        for i in range(X_nums):
            da[0].append(-y[i] / self.a[0][i] + (1 - y[i]) / (1 - self.a[0][i]))
            dz[0].append(self.a[0][i] * (1 - self.a[0][i]) * da[0][i])
            dw[0] += X[i][0] * dz[0][i]
            dw[1] += X[i][1] * dz[0][i]
            db += 1 * dz[0][i]
            
        dw[0]/=X_nums
        dw[1]/=X_nums
        db/=X_nums
        return np.array(dw), db

    def train(self, X, y, learning_rate):
        self.forward(X)
        dw, db = self.backward(X, y)
        self.w[0][0] -= learning_rate * dw[0]
        self.w[0][1] -= learning_rate * dw[1]
        self.b -= learning_rate * db

    def predict(self, X):
        return np.round(self.forward(X))

    def loss(self, X, y):
        pred_y = self.forward(X)
        return -np.mean(y * np.log(pred_y) + (1 - y) * np.log(1 - pred_y))


# Train Model

In [5]:
def create_samples(sample_num):
    X = np.random.randint(-10, 10, (sample_num, 2))
    y = (np.sum(X, 1) >0).astype(int)
    return X, y


In [6]:
def train(train_samples, test_samples, learning_rate, epochs, option='NULL'):
    
    # model = Model([BinaryClassifier((1,2))])
    classifier = BinaryClassifier((1,2))
    train_X, train_y = create_samples(train_samples)
    test_X, test_y = create_samples(test_samples)

    for epoch in range(epochs):
        classifier.train(train_X, train_y, learning_rate)
        if option == 'print':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(classifier.w.reshape((1,2))))
            print('b : ' + str(classifier.b))
            
    return {'w': classifier.w.reshape((1,2)), 
            'b': classifier.b,
            'train_loss': classifier.loss(train_X, train_y),
            'test_loss': classifier.loss(test_X, test_y),
            'train_acc': 100 * np.mean(classifier.predict(train_X) == train_y),
            'test_acc': 100 * np.mean(classifier.predict(test_X) == test_y)}

In [7]:
def train_elementwise(train_samples, test_samples, learning_rate, epochs, option='NULL'):
    
    # model = Model([BinaryClassifier((1,2))])
    classifier = BinaryClassifierElementwise((1,2))
    train_X, train_y = create_samples(train_samples)
    test_X, test_y = create_samples(test_samples)

    for epoch in range(epochs):
        classifier.train(train_X, train_y, learning_rate)
        if option == 'print':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(classifier.w.reshape((1,2))))
            print('b : ' + str(classifier.b))
                
    return {'w': classifier.w.reshape((1,2)), 
            'b': classifier.b,
            'train_loss': classifier.loss(train_X, train_y),
            'test_loss': classifier.loss(test_X, test_y),
            'train_acc': 100 * np.mean(classifier.predict(train_X) == train_y),
            'test_acc': 100 * np.mean(classifier.predict(test_X) == test_y)}

In [8]:
train_samples = 100 # m
test_samples = 100 # n
learning_rate = 1e-2
epochs = 100 # K

train(train_samples, test_samples, learning_rate, epochs)


{'w': array([[0.37411396, 0.45177562]]),
 'b': -0.018992888908487472,
 'train_loss': 0.17186290637380514,
 'test_loss': 0.1494035394228638,
 'train_acc': 96.0,
 'test_acc': 97.0}

# Print (w, a) each iteration

In [9]:
### epochs를 100으로 설정하면 결과값이 너무 길어지기 때문에, 10으로 설정하고 돌린다.
train(train_samples, test_samples, learning_rate, 10, option='print')



epoch #1
w :[[0.0149  0.01935]]
b : -0.0008

epoch #2
w :[[0.02851465 0.03683198]]
b : -0.001522395733802674

epoch #3
w :[[0.0409868  0.05266626]]
b : -0.002177053725053515

epoch #4
w :[[0.05246057 0.06706863]]
b : -0.0027739373166111815

epoch #5
w :[[0.06306875 0.08023544]]
b : -0.0033221518822761807

epoch #6
w :[[0.07292775 0.09233793]]
b : -0.0038295645305330295

epoch #7
w :[[0.08213682 0.10352202]]
b : -0.004302764188099197

epoch #8
w :[[0.09077951 0.11391059]]
b : -0.004747181237354083

epoch #9
w :[[0.09892576 0.12360667]]
b : -0.0051672605426926616

epoch #10
w :[[0.10663414 0.13269667]]
b : -0.005566635779194124


{'w': array([[0.10663414, 0.13269667]]),
 'b': -0.005566635779194124,
 'train_loss': 0.3994439835611875,
 'test_loss': 0.4005176221978117,
 'train_acc': 98.0,
 'test_acc': 100.0}

# Time Comparison

### element-wise version

In [10]:
start=time.time()
result_elementwise = train_elementwise(train_samples, test_samples, learning_rate, epochs)
print(time.time()-start)
print(result_elementwise)

0.5849499702453613
{'w': array([[0.41454156, 0.40819955]]), 'b': -0.04905599544328502, 'train_loss': 0.1466250264640928, 'test_loss': 0.15289553856197846, 'train_acc': 98.0, 'test_acc': 100.0}


### vector version

In [11]:
start=time.time()
result_vector = train(train_samples, test_samples, learning_rate, epochs)
print(time.time()-start)
print(result_vector)

0.012872934341430664
{'w': array([[0.42484946, 0.41395356]]), 'b': -0.08788218729060701, 'train_loss': 0.13828093278404657, 'test_loss': 0.15607500607482863, 'train_acc': 100.0, 'test_acc': 100.0}


# Test Parameter

In [12]:
def test_parameter(train_samples, test_samples, learning_rate,  epochs, runs):
    results=[]
    for run in range(runs):
        results.append(train(train_samples, test_samples, learning_rate,  epochs))
    
    results_df = pd.DataFrame(results)[['train_loss', 'test_loss', 'train_acc', 'test_acc']]
    normalized_result = results_df.sum()/runs
    
    return normalized_result

In [13]:
runs = 100

test_parameter(train_samples, test_samples, learning_rate, epochs, runs)


train_loss     0.165952
test_loss      0.173744
train_acc     98.280000
test_acc      97.640000
dtype: float64

### Test Parameter m (=train_samples)

In [14]:
for m in [10,100,1000]:
    print("\nTrain_samples : ", m)
    print(test_parameter(m, test_samples, learning_rate, epochs, runs))



Train_samples :  10
train_loss     0.14185
test_loss      0.22862
train_acc     98.40000
test_acc      92.31000
dtype: float64

Train_samples :  100
train_loss     0.171549
test_loss      0.174781
train_acc     98.270000
test_acc      97.610000
dtype: float64

Train_samples :  1000
train_loss     0.172270
test_loss      0.168548
train_acc     98.918000
test_acc      98.970000
dtype: float64


### Test Parameter K (=epochs)

In [15]:
for K in [10,100,1000]:
    print("\nEpochs : ", K)
    print(test_parameter(train_samples, test_samples, learning_rate, K, runs))


Epochs :  10
train_loss     0.397752
test_loss      0.405442
train_acc     96.630000
test_acc      95.760000
dtype: float64

Epochs :  100
train_loss     0.172736
test_loss      0.173047
train_acc     98.280000
test_acc      97.800000
dtype: float64

Epochs :  1000
train_loss     0.074933
test_loss      0.078105
train_acc     99.560000
test_acc      98.960000
dtype: float64


### Test Parameter lr (=learning_rate)

In [16]:
for lr in [1e-8, 1e-4, 1e-2, 1e-1, 1e0, 1e1]:
    print("\nLearning_rate : ", lr)
    print(test_parameter(train_samples, test_samples, lr, epochs, runs))


Learning_rate :  1e-08
train_loss     0.693142
test_loss      0.693142
train_acc     96.280000
test_acc      95.070000
dtype: float64

Learning_rate :  0.0001
train_loss     0.641518
test_loss      0.641716
train_acc     96.390000
test_acc      95.800000
dtype: float64

Learning_rate :  0.01
train_loss     0.167929
test_loss      0.175276
train_acc     98.290000
test_acc      97.390000
dtype: float64

Learning_rate :  0.1
train_loss     0.075399
test_loss      0.080807
train_acc     99.470000
test_acc      98.800000
dtype: float64

Learning_rate :  1.0
train_loss      0.024045
test_loss       0.030065
train_acc     100.000000
test_acc       99.610000
dtype: float64

Learning_rate :  10.0
train_loss     0.004852
test_loss      0.031214
train_acc     99.930000
test_acc      99.500000
dtype: float64


# Best parameters

In [17]:
train(1000, test_samples, 1.0, epochs, runs)


{'w': array([[2.41652767, 2.3998954 ]]),
 'b': -1.0520468981772695,
 'train_loss': 0.02426320864479219,
 'test_loss': 0.03131281814486276,
 'train_acc': 100.0,
 'test_acc': 100.0}