In [1]:
import numpy as np

# Functions

In [2]:
def readFile(filename):
    data = np.loadtxt(filename)
    X = data[:,:-1].copy()
    y = data[:,-1].copy()
    return X, y

In [52]:
def standardizex(X):
    m, n = np.shape(X)
    mean = np.zeros([1,n])
    std = np.zeros([1,n])
    for j in range(n):
        mean[0,j] = np.sum(X[:,j])/m
        std[0,j] = np.sqrt(np.sum(np.power(X[:,j] - mean[0,j],2))/m)
    print (X.shape,mean.shape,std.shape)
    X_standardized = (X - mean[:,None])/std[:,None]
    print ("mean:",mean[:,None].shape,mean.shape,mean[None,:].shape,mean[None].shape)
    print (X_standardized.shape)
    return X_standardized[0]

def standardize(X):
    m, n = np.shape(X)
    mean = np.zeros([1,n])
    std = np.zeros([1,n])
    mean= np.sum(X,axis=0)/m
    std=np.sqrt(np.sum (np.power(np.subtract(X,mean[None]),2),axis=0)/float(m))
    
    print (X.shape,mean.shape,std.shape)
    X_standardized = (X - mean)/std
    print (X_standardized.shape)
    print ("mena std",np.mean (X_standardized,axis=0),np.std (X_standardized,axis=0))
    return X_standardized

In [4]:
def predict(w, b, X):
    sigmoid_input = np.matmul(X,w)+b
    sigmoid_out = 1 / (1 + np.exp(-sigmoid_input))
    return sigmoid_out > 0.5

In [5]:
def sigmoid(w, b, X):
    sigmoid_input = np.matmul(X,w)+b
    sigmoid_out = 1 / (1 + np.exp(-sigmoid_input))
    return sigmoid_out

In [6]:
def costFunction(y, y_pred, w, m, lambda_reg):
    epsilon = 1e-5 
    return (-1/m)*np.sum(y*np.log(y_pred + epsilon) + (1-y)*np.log(1-y_pred + epsilon)) + lambda_reg/(2*m)*np.sum(np.power(w,2))

In [7]:
def accuracy(w, b, X, y):
    m, n = np.shape(X)
    y_pred = predict(w, b, X)
    correct = 0
    for i in range(m):
        if y[i] == y_pred[i]:
            correct += 1
    return correct / float(m)

In [8]:
def train(X, y, iterations, learning_rate, lambda_reg):
    m, n = np.shape(X)
    np.random.seed(6)
    w = np.random.random([n,])
    b = 0
    dw = np.zeros([n,])
    db = 0
    for i in range(iterations):
        y_pred = sigmoid(w, b, X)
        
        for j in range(n):
            dw[j] = (1/m)*np.sum((y_pred-y)*X[:,j]) + (lambda_reg/m)*w[j]
        db = 1/m*np.sum(y_pred-y)
        w = w - learning_rate*dw
        b = b - learning_rate*db
        print(str(i) + ". cost function value:"+ str(costFunction(y, y_pred, w, m, lambda_reg)))
    return w, b

# Test

In [53]:
X, y = readFile("./spam.data")

In [54]:
print (X.shape,y.shape)
X = standardize(X)
print (X.shape,y.shape)

(4601, 57) (4601,)
(4601, 57) (57,) (57,)
(4601, 57)
mena std [ 3.35123993e-15  1.99413043e-15 -2.33262659e-15 -1.03939835e-15
  1.00006940e-15  6.33107032e-15  6.49918430e-15  4.43992690e-15
 -7.51204262e-15 -2.47694835e-16  2.85610363e-15 -1.26711653e-15
 -8.35961019e-16  2.42637179e-15  8.47731651e-15 -4.24464250e-15
  1.23998713e-14  6.34458315e-15 -9.50482176e-16 -6.20862245e-15
 -4.14405041e-15  1.22613769e-15 -1.46428789e-14 -5.78574354e-15
 -4.20468316e-15 -5.58475240e-15  3.90641177e-15 -9.57829773e-16
  2.51996014e-15  3.86619906e-15 -3.40462764e-16  2.50136795e-15
 -1.54061844e-15  4.28554292e-15  3.86863619e-15 -1.43500126e-15
  2.36956968e-17  1.56783410e-15  7.21258885e-16  3.47436345e-16
 -4.08533600e-16  2.46110698e-15 -5.40693815e-16 -4.45249865e-15
  3.60246982e-15 -9.28113632e-16  1.44041846e-15  3.10416644e-15
  4.64096630e-15  1.87500043e-15 -1.37155133e-15 -4.64296306e-15
  7.55248456e-15  1.65320316e-15  5.85183572e-17  6.06737728e-16
  8.89191881e-17] [1. 1. 1. 

In [55]:
w, b = train(X, y, 40, 0.5, 0.000002)
acc = accuracy(w, b, X, y)
print("acc:", acc)

0. cost function value:1.532144294487638
1. cost function value:1.3822992402553012
2. cost function value:1.241662882542011
3. cost function value:1.1113736616762107
4. cost function value:0.9914610141712917
5. cost function value:0.8811560132937748
6. cost function value:0.7832867294934834
7. cost function value:0.7006146175658234
8. cost function value:0.6257586216190414
9. cost function value:0.5650580228094488
10. cost function value:0.522857155865962
11. cost function value:0.4971706679944604
12. cost function value:0.4773183228883263
13. cost function value:0.4599434115092757
14. cost function value:0.44438792838326663
15. cost function value:0.43032413716492124
16. cost function value:0.4175236421820673
17. cost function value:0.40580683325735123
18. cost function value:0.39503017893501674
19. cost function value:0.3850826220768245
20. cost function value:0.375880327673129
21. cost function value:0.36735851103727135
22. cost function value:0.3594637092734597
23. cost function va