# Logistic Regression with SGD

---

In [1]:
import numpy as np
import pandas as pd

In [None]:
from tqdm import tqdm

In [2]:
test_data = pd.read_csv('bank-note/test.csv', header=None)
train_data = pd.read_csv('bank-note/train.csv', header=None)

In [3]:
# first 7 columns are features, last column (Slump) is output
columns = ['var', 'skew', 'curt', 'ent', 'label']
features = columns[:-1]
output = columns[-1]

test_data.columns = columns
train_data.columns = columns

In [4]:
train_data.head()

Unnamed: 0,var,skew,curt,ent,label
0,3.8481,10.1539,-3.8561,-4.2228,0
1,4.0047,0.45937,1.3621,1.6181,0
2,-0.048008,-1.6037,8.4756,0.75558,0
3,-1.2667,2.8183,-2.426,-1.8862,1
4,2.2034,5.9947,0.53009,0.84998,0


In [5]:
train_X = train_data.iloc[:,:-1].values
test_X = test_data.iloc[:,:-1].values

In [6]:
train_X = np.hstack((train_X, np.ones(train_X.shape[0]).reshape(-1,1)))

In [7]:
test_X = np.hstack((test_X, np.ones(test_X.shape[0]).reshape(-1,1)))

In [8]:
train_y = train_data.iloc[:,-1].values
test_y = test_data.iloc[:,-1].values

In [9]:
# Convert labels to {-1,1}
train_y = np.array([1 if x else -1 for x in train_y])
test_y = np.array([1 if x else -1 for x in test_y])

# reshape to 2D array
train_y = train_y.reshape(-1,1)
test_y = test_y.reshape(-1,1)

In [61]:
def sgd_method(df, attrs, target_attr, epsilon, t=0.01):
    """ Stochastic Gradient Descent Method
    
    Args:
        A (mxn numpy array): input array holding m samples with n features
        b (mx1 numpy array): output 
        epsilon (float): tolerance level
        t (float): learning rate

    Returns:
        tree (dict): dictionary structure represented the decision tree
  
    """
    
    m,n = df.shape
    
    # convert to numpy array
    data = df.to_numpy()
    
    # Separate to Ax=b where A is input matrix, x is vector of weights, and b the vector of outputs
    A = data[:,:-1]
    A = np.insert(A,0,np.ones(m), axis=1)
    b = data[:,-1] 
    x = np.zeros(n)

    iter=0
    max_iter = 100000
    cur_val=100
    prev_val = np.inf
    history = cur_val
    
    while np.linalg.norm(prev_val-cur_val) > epsilon and iter < max_iter:
        iter = iter + 1
        prev_val = cur_val
        
        # shuffle indexes for sampling
        indexes = np.random.randint(m, size=m)
        
        for i in indexes:
            # define new point x = x + t d, d = - grad
            x = x + t*(b[i] - np.dot(x,A[i]))*A[i]

        cur_val= f(A,b,x)
        history = np.vstack((history,cur_val))
            
        
        #print('i = {}, tol = {:.4e}, fun_val = {:.4e}'.format(iter, np.linalg.norm(prev_val-cur_val), cur_val))
        
    print('i = {}, tol = {:.4e}, fun_val = {:.4e}'.format(iter, np.linalg.norm(prev_val-cur_val), cur_val))
    return x, history

In [16]:
def log_reg(X,y, f, grad, var, epsilon, rate_scheduler):
    
    m,n = X.shape
    
    w = np.zeros(n)
    
    iter=0
    max_iter = 100
    cur_val=100
    prev_val = np.inf
    history = cur_val
    
    while np.linalg.norm(prev_val-cur_val) > epsilon and iter < max_iter:
        iter = iter + 1
        prev_val = cur_val
        
        # shuffle indexes for sampling
        indexes = np.random.randint(m, size=m)
        
        for i in indexes:
            rate = rate_scheduler(iter)
            
            X_ = np.repeat(X[i].reshape(1,-1), m, axis=0)
            y_ = np.repeat(y[i].reshape(1,-1), m, axis=0)
            w = w - rate*grad(X_,y_,w, var)

        cur_val= f(X,y,w, var)
        history = np.vstack((history,cur_val))
            
        print('i = {}, tol = {:.4e}, fun_val = {:.4e}'.format(iter, np.linalg.norm(prev_val-cur_val), cur_val))
        
    print('i = {}, tol = {:.4e}, fun_val = {:.4e}'.format(iter, np.linalg.norm(prev_val-cur_val), cur_val))
    return w, history

In [70]:
a = y[1].reshape(1,-1)
a

array([[-1]])

In [73]:
b = np.repeat(a,3, axis=0)
b

array([[-1],
       [-1],
       [-1]])

$$ p(w_i) = \frac{1}{sqrt(2 pi v)} exp(-\frac{1}{2 v} w_{i}^2) $$

In [11]:
def param(var):
    return lambda x: 1/(np.sqrt(2*np.pi*var))*np.exp(-1.0/(2*var)*x**2)

In [12]:
def schedule(g, d):
    return lambda t:g / (1 + (g/d)*t)

In [13]:
def f(X, y, w, var):
    m,n = X.shape
    
    f1 = 0
    for i in range(m):
        f1 += np.log(1+np.exp(-y[i]*np.dot(w,X[i])))
        
    f2 = 1/(2*var)* np.dot(w,w)
    res = f1+f2
    
    return res.item()

In [14]:
def grad(X,y,w,var):
    m,n = X.shape
    
    f1 = 0
    for i in range(m):
        f1 += -y[i]*X[i]*np.exp(-y[i]*np.dot(w,X[i]))/(np.exp(-y[i]*np.dot(w,X[i]))+ 1)
        
    f2 = 1/(var)* w
    res = f1+f2
    
    return res

In [15]:
var_list = [0.01, 0.1, 0.5, 1, 3, 5, 10, 100]

In [83]:
X = train_X
y = train_y

In [86]:
m,n = X.shape
    
w = np.ones(n)

In [87]:
a = f(X,y,w,0.01)

In [88]:
a

4331.31223819926

In [90]:
b = grad(X,y,w,0.01)
b

array([1670.25593661, 2335.02534609,   36.68461509,    7.58321194,
        328.6255436 ])

In [18]:
g = 0.001
d = 0.01

In [19]:
log_reg(train_X,train_y, f, grad,var = var_list[0], epsilon=1e-6, rate_scheduler=schedule(g,d))

i = 1, tol = 4.0635e+03, fun_val = 4.1635e+03
i = 2, tol = 3.8173e+03, fun_val = 3.4621e+02
i = 3, tol = 8.9645e+02, fun_val = 1.2427e+03
i = 4, tol = 4.3073e+00, fun_val = 1.2470e+03
i = 5, tol = 3.3158e+03, fun_val = 4.5628e+03
i = 6, tol = 2.9309e+03, fun_val = 1.6319e+03
i = 7, tol = 8.8834e+02, fun_val = 2.5202e+03
i = 8, tol = 2.4658e+02, fun_val = 2.7668e+03
i = 9, tol = 1.4893e+03, fun_val = 1.2775e+03
i = 10, tol = 9.0426e+02, fun_val = 3.7325e+02
i = 11, tol = 3.0321e+02, fun_val = 6.7646e+02
i = 12, tol = 3.8134e+03, fun_val = 4.4899e+03
i = 13, tol = 3.0746e+03, fun_val = 1.4153e+03
i = 14, tol = 1.1307e+03, fun_val = 2.8463e+02
i = 15, tol = 5.6167e+00, fun_val = 2.9025e+02
i = 16, tol = 9.7733e+02, fun_val = 1.2676e+03
i = 17, tol = 8.2524e+02, fun_val = 4.4234e+02
i = 18, tol = 3.0103e+03, fun_val = 3.4526e+03
i = 19, tol = 1.9331e+03, fun_val = 1.5195e+03
i = 20, tol = 1.0863e+03, fun_val = 4.3319e+02
i = 21, tol = 1.4607e+02, fun_val = 5.7926e+02
i = 22, tol = 3.3163e+

(array([-0.80619482, -0.95976298, -0.28919857, -0.10859547,  0.50211707]),
 array([[ 100.        ],
        [4163.48352715],
        [ 346.2134459 ],
        [1242.66841521],
        [1246.97572805],
        [4562.79245219],
        [1631.87759342],
        [2520.21972124],
        [2766.7983853 ],
        [1277.50859165],
        [ 373.24747494],
        [ 676.45920362],
        [4489.8774527 ],
        [1415.2937419 ],
        [ 284.62887397],
        [ 290.24553524],
        [1267.57365489],
        [ 442.33580694],
        [3452.6471676 ],
        [1519.51562692],
        [ 433.19144178],
        [ 579.2647527 ],
        [ 247.63567725],
        [ 620.56432388],
        [2580.655213  ],
        [1468.10696959],
        [1340.48779536],
        [2519.48576853],
        [ 501.6064078 ],
        [ 329.20730606],
        [ 574.8219743 ],
        [3749.45780842],
        [ 239.33556347],
        [ 647.05338283],
        [ 336.67854058],
        [ 246.9071211 ],
        [ 385.700253  ],
