Import Libraries 

In [None]:
import numpy as np
import pandas as pd 
import math 
import random

set dataset path

In [None]:
# select dataset

# PENDIGITS
data_train_path = 'pendigits_training.txt'
data_test_path = 'pendigits_test.txt'

# SATELLITE
# data_train_path = 'satellite_training.txt'
# data_test_path = 'satellite_test.txt'

# YEAST
# data_train_path = 'yeast_training.txt'
# data_test_path = 'yeast_test.txt'


create column names based on dataset 

In [None]:
x = pd.read_csv(data_train_path, delim_whitespace=True) #
no_columns = len(x.iloc[0])
nms = []
for cc in range(1,no_columns):
    cl = 'x'+str(cc)
    nms.append(cl)

Load and split data

In [None]:
# import training data
data_train = pd.read_csv(data_train_path, delim_whitespace=True, names=nms)

# remove rows with missing values
data_train.dropna()


# split data features vs label 
n_features = len(data_train.iloc[0,])-1
x_train = data_train.iloc[:,0:n_features]
y_train = data_train.iloc[:,-1]

# normalize data 
maxx = max(x_train.max())
minn = min(x_train.min())
x_train = (x_train-minn)/(maxx-minn)

# add x0=1 column
x_train.insert(0, 'x0', np.ones(len(data_train['x1'])))

# set important parameters
m = len(y_train)
alpha = 0.1
threshold = 0.03

#### Create Necessary Functions

In [None]:
def sigmoid(X, thetas):
    return (1/(1+np.exp(-X@thetas)))

In [None]:
def cost_function(x_train, y_train, thetas):
    m = len(x_train)
    total_loss = 0
    for i in range(m):
        # print('i is: ', i)
        row = x_train.iloc[i]
        y_hat = sigmoid(row, thetas)
        y_i = y_train[i]
        loss = (y_i * np.log(y_hat))+ ( (1-y_i) * np.log(1-y_hat) )
        total_loss += loss
    return (-1/m)*total_loss

In [None]:
# initialize thetas
def initial_thetas(x_train):   
    theta = np.array([])
    one_row = x_train.iloc[0]
    for l in range(len(one_row)):
        # select:
        # 1. initialize thetas to random numbers between 0 and 1
        theta = np.append(theta, [random.uniform(-1,1)])
        # or 
        # 2. initialize thetas to zero
        # theta = np.append(theta, [0])
    return theta


In [None]:
def gradient_descent(x_train, y_train, alpha, thetas):
    m = len(x_train)
    constant = alpha/m
    # empty array to hold the temp values of theta
    new_theta = np.array([])
    # cycle through thetas 
    for t in range(len(thetas)):
        # cycle through the rows
        value = 0
        for i in range(m):
            row = x_train.iloc[i]
            y_i = y_train[i]
            x_i = x_train.iloc[i,t]
            y_hat = sigmoid(row, thetas)
            value += ((y_hat-y_i) * x_i)
        # update theta and save it into temp value
        temp_th = thetas[t] - (constant*value)
        # update the temp thetas
        new_theta = np.append(new_theta, temp_th)
    #return the new thetas array
    return new_theta

class for training each for each class 

In [None]:
class Model:
    
    def __init__(self, x_train, y_train, lr, threshold, clas, max_iter):
        self.thetas = initial_thetas(x_train)
        self.loss = cost_function(x_train, y_train, self.thetas)
        self.alpha = lr
        self.x_train = x_train
        self.y_train = y_train
        self.threshold = threshold
        self.clas = clas
        self.max_iter = max_iter



    def fit(self):
        print('initial loss is: ', self.loss)
        if self.loss <= self.threshold:
            print('Training for class%d complete' % (self.clas))
            print('THETA VALUES ARE: ')
            for i in range(len(self.thetas)): #th
                print('theta %d = %f' % (i, self.thetas[i])) #th
            return self.thetas #th
        else:
            xc = self.loss
            iteration = 0
            # revisit comparison
            while (xc > self.threshold):
                if iteration == self.max_iter:
                  break
                self.thetas = gradient_descent(self.x_train, self.y_train, self.alpha, self.thetas) #th
                xc = cost_function(self.x_train, self.y_train, self.thetas)
                print(iteration, ' new loss =', xc)
                iteration += 1
            print(' final Training for class%d complete' % (self.clas))
            print('THETA VALUES ARE: ')
            for i in range(len(self.thetas)): #th
                print('theta %d = %f' % (i, self.thetas[i])) #th
            return self.thetas

### start training process

Extract y values as one hot vector

In [11]:
n = y_train.nunique()
labels = []
for i in y_train:
    y_example = np.zeros(n, dtype=int)
    y_example[i-1] = 1
    labels.append(y_example)
# labels is our final labels which will be used later for training
labels = np.array(labels)
print('********************************************************')
print('*  this dataset have %d unique classes                 *' % n)
print('*  available classes are: ', np.sort(y_train.unique()), '      *')
print('*                                                      *')
print('********************************************************')

********************************************************
*  this dataset have 10 unique classes                 *
*  available classes are:  [0 1 2 3 4 5 6 7 8 9]       *
*                                                      *
********************************************************


In [12]:
n = y_train.nunique()
y_train.unique()

array([8, 2, 1, 4, 6, 0, 5, 9, 7, 3])

train the model by creating a Model object for each data class and invoke the fit() function

In [13]:
# reduce size of training examples if needed
x_train = x_train.iloc[:100,]
y_train = y_train.iloc[:100,]
len(y_train)

100

In [14]:
no_classes = len(labels[1])
classes = []
# train for each class 
for c in range(no_classes):
    print('Training for class ', c+1)
    y1 = labels[:,c]
    # threshold is the loss value to reach after stopping iterations
    # max_iter is the maximum number of iterations before stopping if the threshold is not reached
    m = Model(x_train, y1, alpha, threshold, clas=c+1, max_iter=500)
    m.fit()
    # add final theta for each class for output classes
    classes.append(m.thetas)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
199  new loss = 0.2562559614294222
200  new loss = 0.2559949600693994
201  new loss = 0.2557352328799165
202  new loss = 0.25547676924741036
203  new loss = 0.25521955867465945
204  new loss = 0.25496359077932645
205  new loss = 0.2547088552925177
206  new loss = 0.25445534205736325
207  new loss = 0.25420304102761293
208  new loss = 0.25395194226625206
209  new loss = 0.2537020359441348
210  new loss = 0.25345331233863433
211  new loss = 0.25320576183231014
212  new loss = 0.25295937491159465
213  new loss = 0.25271414216549454
214  new loss = 0.25247005428431046
215  new loss = 0.2522271020583724
216  new loss = 0.2519852763767922
217  new loss = 0.25174456822623303
218  new loss = 0.2515049686896929
219  new loss = 0.2512664689453066
220  new loss = 0.2510290602651612
221  new loss = 0.25079273401412916
222  new loss = 0.2505574816487152
223  new loss = 0.25032329471591885
224  new loss = 0.250090164852113
225  new los

print final values of theta for each class

In [15]:
i_ = 1
print('**************** SUMMARY ******************** *')
print('* this dataset has %d unique classes          *' % n)
print('* accordingly we have %d different theta sets *' % n)
print('* which are summarized below                  *')
print('***********************************************')
print()
for cls in classes:
    print ('FINAL THETA VALUES FOR CLASS ', i_)
    for theta in range(len(cls)):
        print('theta %d = %.4f' % (theta, cls[theta])) 
    print()
    i_ += 1
    

**************** SUMMARY ******************** *
* this dataset has 10 unique classes          *
* accordingly we have 10 different theta sets *
* which are summarized below                  *
***********************************************

FINAL THETA VALUES FOR CLASS  1
theta 0 = -0.4576
theta 1 = -0.8222
theta 2 = -0.4332
theta 3 = -0.3261
theta 4 = 0.1212
theta 5 = -0.3117
theta 6 = 0.3038
theta 7 = 1.6083
theta 8 = -0.1433
theta 9 = 1.0780
theta 10 = -1.1713
theta 11 = 0.4824
theta 12 = -1.3938
theta 13 = -1.5614
theta 14 = -0.0002
theta 15 = -0.5319

FINAL THETA VALUES FOR CLASS  2
theta 0 = -0.8294
theta 1 = -1.5914
theta 2 = -0.2044
theta 3 = 0.8797
theta 4 = 0.6984
theta 5 = -0.0008
theta 6 = 0.7026
theta 7 = -0.8545
theta 8 = -1.9777
theta 9 = 0.0742
theta 10 = -0.8810
theta 11 = -0.7707
theta 12 = 0.5520
theta 13 = -1.0932
theta 14 = 0.8018
theta 15 = -0.8505

FINAL THETA VALUES FOR CLASS  3
theta 0 = -0.4488
theta 1 = -0.3731
theta 2 = 0.3612
theta 3 = -1.0593
theta 4 = 0.4

## Testing stage

create column names based on dataset 

In [None]:
x = pd.read_csv(data_test_path, delim_whitespace=True) #
no_columns = len(x.iloc[0])
nms = []
for cc in range(1,no_columns):
    cl = 'x'+str(cc)
    nms.append(cl)

load data

In [None]:
# create column name array
# nms = ['x1', 'x2', 'x3','x4','x5','x6','x7','x8','y',]
# import training data
data_test = pd.read_csv(data_test_path, delim_whitespace=True, names=nms)

# split data features vs label 
x_test = data_test.iloc[:,0:n_features]
y_test = data_test.iloc[:,-1]

# normalize data 
maxx = max(x_test.max())
minn = min(x_test.min())
x_test = (x_test-minn)/(maxx-minn)

# add x0=1 column
x_test.insert(0, 'x0', np.ones(len(data_test['x1'])))

# set important parameters
m = len(y_test)
# alpha = 0.1

In [18]:
x_test.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15
88,1.0,0.92,0.02,0.99,0.16,0.66,0.94,0.37,0.7,0.0,0.0,0.24,0.42,0.65,1.0,1.0
80,1.0,1.0,0.18,0.98,0.6,0.66,1.0,0.29,0.42,0.0,0.0,0.23,0.42,0.61,0.56,0.98
0,1.0,0.94,0.09,0.57,0.2,0.19,0.07,0.0,0.2,0.36,0.7,0.68,1.0,1.0,0.18,0.92
95,1.0,0.82,0.71,1.0,0.27,0.77,0.77,0.73,1.0,0.8,0.93,0.42,0.56,0.13,0.0,0.0
68,1.0,1.0,0.06,0.88,0.47,0.75,0.87,0.82,0.85,0.56,1.0,0.29,0.75,0.06,0.0,0.0


run testing data through the model 

In [19]:
m = len(x_test)
id_ = 1
# print('id     predicted class      true class      acuracy')
counter = 0
for example in range(m):
    x_i = x_test.iloc[example]
    output = []
    for xx in classes:
        # print(sigmoid(x_i, xx))
        output.append(sigmoid(x_i, xx))
    prediction = output.index(max(output))+1
    accuracy = 1 if prediction == y_test.iloc[example] else 0
    if accuracy ==1:
        counter += 1
    print('ID=%5d, output=%14.4f, target value=%14.4f, misclassification error=%4d' % (id_, prediction, y_test.iloc[example], accuracy))
    id_ += 1

print('\nOverall accuracy is {}'.format((counter/m)*100), '%')

    

ID=    1, output=        8.0000, target value=        8.0000, misclassification error=   1
ID=    2, output=        8.0000, target value=        8.0000, misclassification error=   1
ID=    3, output=       10.0000, target value=        8.0000, misclassification error=   0
ID=    4, output=        9.0000, target value=        9.0000, misclassification error=   1
ID=    5, output=        4.0000, target value=        9.0000, misclassification error=   0
ID=    6, output=        1.0000, target value=        1.0000, misclassification error=   1
ID=    7, output=        4.0000, target value=        4.0000, misclassification error=   1
ID=    8, output=        1.0000, target value=        7.0000, misclassification error=   0
ID=    9, output=        3.0000, target value=        9.0000, misclassification error=   0
ID=   10, output=        9.0000, target value=        9.0000, misclassification error=   1
ID=   11, output=        9.0000, target value=        9.0000, misclassification error=   1