# Exercise: Logistic discrimination

Implement the logistic discrimination (LD) algorithm for 2 class problems (see the lecture notes).

Test out your implementation over a synthetic data set. You can use the dataset generated below.

In [4]:
import math 
import numpy as np
np.random.seed(42)

In [12]:
def generate_binary_synthetic_data(nFeatures=4, nSamples=40, test_ratio=0.2):

    K = 2 # number of classes

    # prepare sample data
    X = np.round(np.random.rand(nSamples, nFeatures), 2)

    # Adding some pattern into X
    half_N=int(nSamples/2)
    X[:half_N,:]=X[:half_N,:]*2
    X[half_N:nSamples,:]=X[half_N:nSamples,:]*5

    # number of instances to pick as test set per class
    test_numel = int(nSamples * test_ratio/2)
    X_test_class1 = X[0:test_numel,:]
    X_test_class2 = X[half_N:half_N+test_numel,:]

    # remove test instances from X
    X = np.delete(X, np.s_[0:test_numel], 0)
    X = np.delete(X, np.s_[half_N-test_numel:half_N], 0)

    X_new = np.concatenate([X_test_class1, X_test_class2])

    N = X.shape[0]
    N_new = X_new.shape[0]

    # R is the output
    R = np.repeat([1,0], N/K, axis=0)
    R_new = np.repeat([1,0], N_new/K, axis=0)

    # print(np.c_[X,R])
    return X, R, X_new, R_new

In [15]:
X_train, y_train, X_test, y_test = generate_binary_synthetic_data(nFeatures=4, nSamples=40, test_ratio=0.2)
# print(X_train.shape, y_train.shape)
# print(X_test.shape, y_test.shape)

# print(np.c_[X_train,y_train])
print(np.c_[X_test,y_test])

[[1.42 0.3  1.16 1.22 1.  ]
 [0.84 1.48 1.86 1.86 1.  ]
 [0.9  0.22 1.96 1.68 1.  ]
 [0.24 1.84 1.74 1.04 1.  ]
 [1.15 3.35 0.1  0.5  0.  ]
 [4.   0.9  3.25 1.2  0.  ]
 [0.5  1.2  3.6  4.3  0.  ]
 [4.15 2.   3.35 1.   0.  ]]


In [18]:
def log_regression(x,r,d,step_size=0.1,iterations=10):
    x0=np.repeat(1,len(x))
    new_x = np.c_[x0,x] # last column is for the bias term, w_0
    w= []
    for j in range(0,d+1):
        w.append(np.random.uniform(-0.01,0.01))
        
    for test in range(0,iterations):
        deriv_w = []
        for j in range(0,d+1):
            deriv_w.append(0)
        for i in range(0,len(new_x)):
            o = 0
            for j in range(0,d+1):
                o = o+w[j]*new_x[i,j]
            y = 1/(1 + math.exp(-o))
            for j in range(0,d):
                deriv_w[j] = deriv_w[j] + (r[i]-y)*new_x[i,j]
                
        for j in range(0,d+1):
            w[j] = w[j] + step_size*deriv_w[j]
    
    return np.round(w,3)

In [32]:
n_features = 4
X_train, y_train, X_test, y_test = generate_binary_synthetic_data(nFeatures=n_features, nSamples=40, test_ratio=0.2)
w_array = log_regression(X_train, y_train, n_features, step_size=0.01,iterations=5000)
print(w_array)

[ 8.162e+00 -1.330e+00 -2.573e+00 -8.840e-01  3.000e-03]


In [33]:
# test on the test data (only prediction probabilities are calculated, you can infer the class prediction based on y > 0.5)

y_pred = []
for xt in X_test:
    y_val = w_array[-1]
    for d in range(n_features):
        y_val += w_array[d] * xt[d]
    y_pred.append(1/(1 + math.exp(-y_val)))

y_pred = np.round(y_pred,3)

print(np.c_[y_test, y_pred])


[[1.    0.029]
 [1.    1.   ]
 [1.    0.999]
 [1.    1.   ]
 [0.    0.003]
 [0.    1.   ]
 [0.    0.003]
 [0.    0.998]]
