# ML from Scratch

## Data Collection

In [None]:
# Credit Card Application Example

import numpy as np
import matplotlib.pyplot as plt
import random

### ------- How INPUT FEATURES X are generated --------

def len_gen(mean, var, n=1):
    x = np.random.normal(mean, var, n).astype(int)
    x = np.clip(x, 0, 98)
    print('Length of credit history (month): ', x)
    return x

def debt_gen(mean, var, n=1):
    x = np.random.normal(mean, var, n).astype(int)
    print('Debt owed (USD): ', x)
    return x

def num_gen(mean, var, n=1):
    x = np.random.normal(mean, var, n).astype(int)
    x = np.clip(x, 0, 50)
    print('Number of credit cards: ', x)
    return x

### ------- How INPUT Labels Y are generated --------

def score_func_1(leng):
    s = 0
    if leng < 18:
        s = leng
    else:
        s = 50 + 0.625 * (leng-18)
    return s

def score_func_2(debt):
    s = 100/(1+np.exp((debt-30000)/10000))
    return s

def score_func_3(num):
    if num <= 10 and num >= 4:
        s = 100
    elif num > 10:
        s = 100 - (num-10)*10
    elif num < 4:
        s = 25*num
    return s

def decision(factor_list, score_func_list=[score_func_1, score_func_2, score_func_3], 
             score_weight_list=(0.2,0.6,0.4), threshold=80):
    score = 0
    for i in range(len(factor_list)):
        score += np.array(list(map(score_func_list[i], factor_list[i])))*score_weight_list[i]
    # y = np.sign(score-threshold)
    y = np.sign(score-threshold + np.random.normal()*50)
    print('Decisions: ',y)
    print('Approval rate: ', sum(y==1)/len(y))
    return y


In [None]:
### Observed some data
n = 50 # number of samples
x1 = len_gen(24, 12, n)
x2 = debt_gen(30000, 40000, n)
x3 = num_gen(5, 10, n)
y = decision([x1,x2,x3], threshold=60)

### Some test data
n = 100 # number of samples
x1_test = len_gen(24, 12, n)
x2_test = debt_gen(30000, 40000, n)
x3_test = num_gen(5, 10, n)
y = decision([x1,x2,x3], threshold=60)

### Some test data (Out-of-distribution)
n = 100 # number of samples
x1_test_ood = len_gen(24, 12, n)
x2_test_ood = debt_gen(30000, 40000, n)
x3_test_ood = num_gen(5, 10, n)
y = decision([x1,x2,x3], threshold=80)

## Data Visualization

## Dataset Preprocessing

## The Perceptron Algorithm

### MODEL

In [None]:
class perceptron:
    def __init__(self):
        self.w1, self.w2, self.w3 = np.random.randn(3)
        self.t = np.random.randint(40)
        print('INITIALIZATION:')
        print('weights & threshold initialized: ', self.w1, self.w2, self.w3, self.t)
        print(''.center(100,'-'))
        
    def inference(self, x1, x2, x3):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        print('INFERENCE:')
        print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
        print('predicted results', y)
        print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
        print(''.center(100,'-'))
        return y_pred

    def evaluate(self, x1, x2, x3, y):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        acc = sum(y==y_pred)/len(y)
        print('EVALUATION:')
        print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
        print('Predicted results', y_pred)
        print('Ground truth results', y)
        print('Approval rate (ground truth): ', sum(y==1)/len(y))
        print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
        print('Accuracy: ', acc)
        print(''.center(100,'-'))
        

In [None]:
model = perceptron()
y_pred = model.inference(x1,x2,x3)
model.evaluate(x1,x2,x3,y)

### Learning

In [None]:
class perceptron:
    def __init__(self):
        self.w1, self.w2, self.w3 = np.random.randn(3)
        self.t = np.random.randint(40)
        print('INITIALIZATION:')
        print('weights & threshold initialized: ', self.w1, self.w2, self.w3, self.t)
        print(''.center(100,'-'))
        
    def inference(self, x1, x2, x3):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        print('INFERENCE:')
        print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
        print('predicted results', y)
        print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
        print(''.center(100,'-'))
        return y_pred

    def evaluate(self, x1, x2, x3, y):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        acc = sum(y==y_pred)/len(y)
        print('EVALUATION:')
        print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
        print('Predicted results', y_pred)
        print('Ground truth results', y)
        print('Approval rate (ground truth): ', sum(y==1)/len(y))
        print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
        print('Accuracy: ', acc)
        print(''.center(100,'-'))
    
    def learn(self, x1, x2, x3, y):
        pass
        

### Perceptron Learning Algorithm


### PLA
For $t = 0, 1, …$

 Find a mistake of $𝑤_𝑡$ at $(𝑋_𝑛, 𝑦_𝑛)$, Where $𝑔_𝑡 (𝑋_𝑛 )=𝑠𝑖𝑔𝑛(𝑊_𝑡^𝑇 𝑋_𝑛)≠𝑦_𝑛$
 
Try to correct the mistake by:
$W_{t+1}$ = $𝑊_𝑡$+$𝑦_𝑛$$𝑋_𝑛$
    
… until no more mistakes
Return the last $W$ as $g$. (written as $𝑊_{𝑃𝐿𝐴}$)













Try to work on this!

In [None]:
class perceptron:
    def __init__(self):
        self.w1, self.w2, self.w3 = np.random.randn(3)
        self.t = np.random.randint(40)
        print('INITIALIZATION:')
        print('weights & threshold initialized: ', self.w1, self.w2, self.w3, self.t)
        print(''.center(100,'-'))
        
    def inference(self, x1, x2, x3, print_results=False):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        if print_results == True:
            print('INFERENCE:')
            print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
            print('predicted results', y)
            # print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
            print(''.center(100,'-'))
        return y_pred

    def evaluate(self, x1, x2, x3, y):
        y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
        acc = sum(y==y_pred)/len(y)
        print('EVALUATION:')
        print('current model weights & threshold: ', self.w1, self.w2, self.w3, self.t)
        print('Predicted results', y_pred)
        print('Ground truth results', y)
        print('Approval rate (ground truth): ', sum(y==1)/len(y))
        print('Approval rate (predicted): ', sum(y_pred==1)/len(y_pred))
        print('Accuracy: ', acc)
        print(''.center(100,'-'))
    
    def learn(self, x1, x2, x3, y, print_acc=False):
        N = len(x1)
        assert len(x1)==len(x2)==len(x3)==len(y)
        for i in range(N):
            if self.inference(x1[i], x2[i], x3[i]) != y[i]:
                print('before update:', self.w1, self.w2, self.w3, self.t)
                self.w1 = self.w1 + y[i]*x1[i]
                self.w2 = self.w2 + y[i]*x2[i]
                self.w3 = self.w3 + y[i]*x3[i]
                self.t = self.t   + y[i]*(-1)
                print('after update:', self.w1, self.w2, self.w3, self.t)
                if print_acc:
                    y_pred = np.sign(self.w1*x1 + self.w2*x2 + self.w3*x3 - self.t)
                    acc = sum(y==y_pred)/len(y)
                    print('--->Accuracy: ', acc)
                    
            else:
                print('skip')
                

In [None]:
model = perceptron()
y_pred = model.inference(x1,x2,x3,True)
model.evaluate(x1,x2,x3,y)
model.learn(x1,x2,x3,y, True)
model.evaluate(x1,x2,x3,y)

In [None]:
### DONE






















In [None]:
#### Cyclic PLA

In [None]:
#### Pocket PLA

### Learning with visualization

### Learning with visualization & loss curves

### Testing

## Logistic Regression

### MODEL

### Learning