## Neural Network - Keras

In [None]:
print('UBID:          hnitturk')
print('Person Number: 50291411')

In [None]:
import keras
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential

from PIL import Image
import os
import numpy as np
import scipy.sparse

## Load USPS data from file system

In [None]:
USPSMat  = []
USPSTar  = []
curPath  = 'USPSdata/Test'
savedImg = []
curFolderPath = curPath
imgs =  os.listdir(curFolderPath)
imgs.sort()
for img in imgs:
    #print(img)
    curImg = curFolderPath + '/' + img
    if curImg[-3:] == 'png':
        img = Image.open(curImg,'r')
        
        #resizing the image
        img = img.resize((28, 28))
        
        #grayscaling the image
        imgdata = (255-np.array(img.getdata()))/255
        USPSMat.append(imgdata)       
for i in range(9,-1,-1):
    for j in range(150):
        USPSTar.append(i)
        
#function for one hoht encoding of the target variables        
def oneHotIt(Y):
    m = Y.shape[0]
    OHV = scipy.sparse.csr_matrix((np.ones(m), (Y, np.array(range(m)))))
    OHV = np.array(OHV.todense()).T
    return OHV

usps_x_nn = np.asarray(USPSMat)
y = np.asarray(USPSTar)
usps_y_nn = oneHotIt(y)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
num_classes=10
image_vector_size=28*28
x_train_nn = x_train.reshape(x_train.shape[0], image_vector_size)
x_test_nn = x_test.reshape(x_test.shape[0], image_vector_size)
y_train_nn = keras.utils.to_categorical(y_train, num_classes)
y_test_nn = keras.utils.to_categorical(y_test, num_classes)

In [None]:
image_size = 784 
model = Sequential()

In [None]:
#creating hidden layer
model.add(Dense(units=32, activation='relu', input_shape=(image_size,)))

#creating output layer
model.add(Dense(units=num_classes, activation='softmax'))

#ptimizer used is stochastic gradient and cross entropy loss
model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
bsize=[64,128,150,200,250]
for i in bsize:
    print("batch_size:",i)
    print("epochs: 150" )
    
    #Training
    history = model.fit(x_train_nn, y_train_nn, batch_size=i, epochs=150, verbose=False,validation_split=0.1)
    
    #predicting
    y_pred = model.predict_classes(x_test_nn)
    
    #confusion_matrix
    print(confusion_matrix(y_test_nn, y_pred))
    
    #testing or evaluating the model
    loss,accuracy = model.evaluate(x_test_nn, y_test_nn, verbose=False)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)
    loss,accuracy = model.evaluate(usps_x_nn, usps_y_nn, verbose=False)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)

epochsize=[150,100,200,50,250]
for i in epochsize:
    print("number of epochs:",i)
    
    #Training
    history = model.fit(x_train_nn, y_train_nn, batch_size=128, epochs=i, verbose=False,validation_split=0.1)
    
    #predicting
    y_pred = model.predict_classes(x_test_nn)
    
    #confusion_matrix
    print(confusion_matrix(y_test_nn, y_pred))  
    
    #testing or evaluating the model
    loss,accuracy = model.evaluate(x_test_nn, y_test_nn, verbose=False)
    print("Loss:", loss)
    print("Accuracy: ",accuracy)
    loss,accuracy = model.evaluate(usps_x_nn, usps_y_nn, verbose=False)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)

## SVM and Random Forest  - Keras - MNIST

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import fetch_mldata
from sklearn import metrics
from sklearn.multiclass import OneVsRestClassifier
import time
import random

In [None]:
#fetching MNIST data
mnist = fetch_mldata('MNIST original')

In [None]:
#creating the data

n_train = 60000
n_test = 10000
indices = np.arange(len(mnist.data))
train_idx = np.arange(0,n_train)
test_idx = np.arange(n_train,n_train+n_test)
X_train_svmrf, y_train_svmrf = mnist.data[train_idx], mnist.target[train_idx]
X_test_svmrf, y_test_svmrf = mnist.data[test_idx], mnist.target[test_idx]
usps_y_svmrf = y
usps_x = usps_x_nn

## SVM classifier

In [None]:
#SVM classifiers using various combinations of parameters such as kernel, degree, gamma and C

print("Kernal: linear")
classifier1 = SVC(kernel='linear', C=1.0, degree=3, gamma= 0.1)
classifier1.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_svm=classifier1.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_svm))

y_pred_usps_svm=classifier1.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_svm))

print("Kernal: polynomial")
classifier1 = SVC(kernel='poly', C=0.2, degree=2, gamma= 0.1)
classifier1.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_svm=classifier1.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_svm))

y_pred_usps_svm=classifier1.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_svm))

print("Kernal: rbf")
classifier1 = SVC(kernel='rbf', C=1.0, degree=1, gamma=0.1)
classifier1.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_svm=classifier1.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf,y_pred_mnist_svm))

y_pred_usps_svm=classifier1.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_svm))


print("Kernal: rbf")
classifier1 = SVC(kernel='rbf', C=1.0, degree=1, gamma=1)
classifier1.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_svm=classifier1.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf,y_pred_mnist_svm))

y_pred_usps_svm=classifier1.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_svm))


print("Kernal: sigmoid")
classifier1 = SVC(kernel='sigmoid', C=1.0, degree=1, gamma=0.1)
classifier1.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_svm=classifier1.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_svm))

y_pred_usps_svm=classifier1.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_svm))

## RandomForest

In [None]:
#Random Forest classifiers using various combinations of parameters such as n_estimators, criterion, max depth,
#min_samples_split

classifier2 = RandomForestClassifier(n_estimators=200, criterion="gini", max_depth = 2, min_samples_split = 10)
classifier2.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_rf = classifier2.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_rf))

y_pred_usps_rf = classifier2.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_rf))

In [None]:
classifier2 = RandomForestClassifier(n_estimators=300,criterion="entropy",max_depth = 2,min_samples_split = 10)
classifier2.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_rf = classifier2.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_rf))

y_pred_usps_rf = classifier2.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_rf))

In [None]:
classifier2 = RandomForestClassifier(n_estimators=400, criterion="gini",max_depth = 2, min_samples_split = 10)
classifier2.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_rf = classifier2.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_rf))

y_pred_usps_rf = classifier2.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_rf))

In [None]:
classifier2 = RandomForestClassifier(n_estimators=500,criterion="entropy",max_depth = 2,min_samples_split = 10)
classifier2.fit(X_train_svmrf, y_train_svmrf)

y_pred_mnist_rf = classifier2.predict(X_test_svmrf)
print("Accuracy:",metrics.accuracy_score(y_test_svmrf, y_pred_mnist_rf))

y_pred_usps_rf = classifier2.predict(usps_x)
print("Accuracy:",metrics.accuracy_score(usps_y_svmrf, y_pred_usps_rf))

## Softmax Logistic Regression

## Load MNIST on Python 3.x

In [None]:
import pickle
import gzip
from mlxtend.data import loadlocal_mnist
import random
import scipy.sparse
import numpy as np

In [None]:
#Load MNIST data
X_train, y_train = loadlocal_mnist(images_path='train-images.idx3-ubyte', labels_path='train-labels.idx1-ubyte')

X_test, y_test = loadlocal_mnist(images_path='t10k-images.idx3-ubyte', labels_path='t10k-labels.idx1-ubyte')


## Load USPS on Python 3.x

In [None]:
#one hot encoding for target variables
def oneHotIt(Y):
    targets = np.array(Y).reshape(-1)
    ohv = np.eye(10)[targets]
    return ohv 

#initialize weights
def init_params(dimension,y):
    w = np.zeros((dimension, len(np.unique(y))))
    b = 0
    return w, b

def softmax(z):
    z -= np.max(z)
    sm = (np.exp(z).T / np.sum(np.exp(z),axis=1)).T
    return sm

def propagate(w, b, X, Y,epochs, lr):
    #training
    
    # num of training samples
    m = X.shape[0]
    
    #one-hot encoding the data
    y_ohv = oneHotIt(Y)
    for i in range(epochs):
        lam = 1
        
        # forward pass
        A = softmax(np.dot(X,w))
        
        #loss function
        loss = -((1 / m) *np.sum(y_ohv * np.log(A))) #We then find the loss of the probabilities
        
        # back propagation - calculating the gradient
        dw = (1/m)*(np.dot(X.T, (y_ohv-A)))+lam*w
        db = (np.sum(A-y_ohv))

        #update rule for weight and bias
        w = w-(lr*dw)
        b = b-(lr*db)

    return w, b

def predict(w, b, X):    
    #make predictions
    A = softmax(np.dot(X,w))    
    Y_predict = np.argmax(A,axis=1)   
    return A, Y_predict

def getAccuracy(w,b,someX,someY):
    
    #get accuracy
    prob,pred = predict(w,b,someX)
    accuracy = sum(pred == someY)/(float(len(someY)))
    return accuracy

def model(X_train, Y_train, X_test, Y_test, epochs, lr):
    w, b = init_params(X_train.shape[1], Y_train)
    w, b = propagate(w, b, X_train, Y_train, epochs, lr)
    print(getAccuracy(w,b,X_test,Y_test))

In [None]:
myModel = model(X_train, y_train, X_test, y_test, 1000, 1e-3)
myModel = model(X_train, y_train, X_test, y_test, 1500, 1e-3)
myModel = model(X_train, y_train, X_test, y_test, 2000, 1e-3)
myModel = model(X_train, y_train, X_test, y_test, 1000, 1e-4)
myModel = model(X_train, y_train, X_test, y_test, 1000, 1e-2)
myModel = model(X_train, y_train, X_test, y_test, 1000, 1e-1)


myModel = model(X_train, y_train, usps_x, y, 1000, 1e-3)
myModel = model(X_train, y_train, usps_x, y, 1500, 1e-3)
myModel = model(X_train, y_train, usps_x, y, 2000, 1e-3)
myModel = model(X_train, y_train, usps_x, y, 1000, 1e-4)
myModel = model(X_train, y_train, usps_x, y, 1000, 1e-2)
myModel = model(X_train, y_train, usps_x, y, 1000, 1e-1)

## Combining Models

In [None]:
import random
X_train_svmrf, y_train_svmrf = mnist.data[train_idx], mnist.target[train_idx]
X_test_svmrf, y_test_svmrf = mnist.data[test_idx], mnist.target[test_idx]

a = list(zip(X_train_svmrf,y_train_svmrf))

In [None]:
sample_nn = random.sample(a,15000)
x,y = zip(*sample_nn)
x1 = np.asarray(x)
y1 = np.asarray(y)
y2 = keras.utils.to_categorical(np.asarray(y), num_classes)

In [None]:
image_size = 784 
model = Sequential()

In [None]:
model.add(Dense(units=32, activation='sigmoid', input_shape=(image_size,)))
model.add(Dense(units=num_classes, activation='softmax'))
model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit(x1, y2, batch_size=128, epochs=150, verbose=False,validation_split=0.1)
ynew_mnist_nn_bg = model.predict_classes(X_test_svmrf)
ynew_usps_nn_bg = model.predict_classes(usps_x_nn)

In [None]:
sample_nn = random.sample(a,15000)
x,y = zip(*sample_nn)
x1 = np.asarray(x)
y1 = np.asarray(y)
y2 = keras.utils.to_categorical(np.asarray(y), num_classes)

classifier1 = SVC(kernel='linear', C=1.0, degree=1, gamma= 0.1)
classifier1.fit(x1, y1)

y_pred_mnist_svm_bg=classifier1.predict(X_test_svmrf)

y_pred_usps_svm_bg=classifier1.predict(usps_x)

In [None]:
classifier2 = RandomForestClassifier(n_estimators=100,criterion="entropy",max_depth = 2,min_samples_split = 10)
classifier2.fit(x1, y1)

y_pred_mnist_rf_bg = classifier2.predict(X_test_svmrf)
y_pred_usps_rf_bg = classifier2.predict(usps_x)

## Bagging using majority voting

In [None]:
#finding class with maximum number of predictions that gives us the result of bagging using Majority Voting
from collections import Counter
import operator
final=[]
for i in range(len(y_pred_mnist_rf_bg)):
    a=[]
    a.append(ynew_mnist_nn_bg[i])
    a.append(y_pred_mnist_svm_bg[i])
    a.append(y_pred_mnist_rf_bg[i])
    b=dict(Counter(a))
    c=max(b.items(), key=operator.itemgetter(1))[0]
    final.append(c)
final_pred = np.asarray(final)

In [None]:
print("Accuracy:",metrics.accuracy_score(ynew_mnist_nn_bg,final_pred))

In [None]:
print("Accuracy:",metrics.accuracy_score(y_pred_mnist_svm_bg,final_pred))

In [None]:
print("Accuracy:",metrics.accuracy_score(y_pred_mnist_rf_bg,final_pred))

In [None]:
final=[]
for i in range(len(y_pred_usps_rf_bg)):
    a=[]
    a.append(ynew_usps_nn_bg[i])
    a.append(y_pred_usps_svm_bg[i])
    a.append(y_pred_usps_rf_bg[i])
    b=dict(Counter(a))
    c=max(b.items(), key=operator.itemgetter(1))[0]
    final.append(c)
final_pred = np.asarray(final)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_pred_usps_rf_bg,final_pred))

In [None]:
print("Accuracy:",metrics.accuracy_score(ynew_usps_nn_bg,final_pred))

In [None]:
print("Accuracy:",metrics.accuracy_score(y_pred_usps_svm_bg,final_pred))