In [0]:
import numpy as np
from urllib import request
import gzip
import pickle
import pandas as pd
from PIL import Image
import os
import tensorflow as tf
from tqdm import tqdm_notebook
from keras.utils import np_utils
from sklearn.metrics import confusion_matrix
%matplotlib inline

In [0]:
#MNIST Data download and processing
filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]

def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")

def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open(r"mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")

def init():
    download_mnist()
    save_mnist()

def load():
    with open(r"mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
        print(mnist.keys())
    #return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
    return mnist

if __name__ == '__main__':
    init()


Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


In [0]:
#from google.colab import drive
#drive.mount('/content/gdrive')

USPSMat  = []
USPSTar  = []
curPath  = r'/content/gdrive/My Drive/Colab Notebooks/Numerals/'
savedImg = [] 

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
#MNIST Data
MNist_Dataset = load()
Mnist_TrainingData = MNist_Dataset["training_images"][:50000]
Mnist_TrainingTarget = MNist_Dataset["training_labels"][:50000]

#MNIST TestData
Mnist_TestingData = MNist_Dataset["training_images"][50000:60000]
Mnist_TestingTarget = MNist_Dataset["training_labels"][50000:60000]

#USPS Data
USPS_TestingData = pd.DataFrame(USPSMat)
USPS_TargetData = pd.DataFrame(USPSTar)

dict_keys(['training_images', 'test_images', 'training_labels', 'test_labels'])


In [0]:
#IMPLEMENTING LOGISTIC REGRESSION

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


def model(X, w):
    return tf.matmul(X, w) 

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

X = tf.placeholder("float", [None, 784]) # Create symbolic variables
Y = tf.placeholder("float", [None, 10])

w = init_weights([784, 10]) #We need shared variable weight matrix for logistic regression

py_x = model(X, w)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y)) #Computes mean cross entropy by applying softmax internally
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost) #Construct optimizer
predict_op = tf.argmax(py_x, 1) # at predict time, evaluate the argmax of the logistic regression

# Launch the graph in a session
with tf.Session() as sess:
    # you need to initialize all variables
    tf.global_variables_initializer().run()

    for i in range(500):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX)+1, 128)):
            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
        print(i, np.mean(np.argmax(teY, axis=1) ==
                         sess.run(predict_op, feed_dict={X: teX})))
    


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
0 0.8839
1 0.8971
2 0.9028
3 0.9071
4 0.9093
5 0.9103
6 0.9116
7 0.9132
8 0.9145
9 0.9156
10 0.9158
11 0.9163
12 0.9166
13 0.9171
14 0.9175
15 0.9177
16 0.918
17 0.9184
18 0.919
19 0.9196
20 0.9197
21 0.9199
22 0.92
23 0.9201
24 0.9204
25 0.9204
26 0.9206
27 0.921
28 0.9211
29 0.9218
30 0.9218
31 0.9218
32 0.9215
33 0.9214
34 0.9214
35 0.9214
36 0.9214
37 0.9214
38 0.9218
39 0.9219
40 0.9219
41 0.9219
42 0.9218
43 0.9221
44 0.9219
45 0.922
46 0.9219
47 0.9219
48 0.9221
49 0.9221
50 0.9219
51 0.9219
52 0.9221
53 0.9223
54 0.9223
55 0.9224
56 0.9224
57 0.9224
58 0.9226
59 0.923
60 0.9231
61 0.9232
62 0.9234
63 0.9238
64 0.9239
65 0.9239
66 0.9239
67 0.9239
68 0.9238
69 0.924
70 0.924
71 0.9239
72 0.9239
73 0.9239
74 0.9238
75 0.9237
76 0.9237
77 0.9236
78 0.9235
79 0.9234
80 0.9234
81 0.9234
82 0.

In [0]:
# SVM & RandomForest
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
#from sklearn.datasets import fetch_mldata
X_train_MNIST, y_train_MNIST = Mnist_TrainingData, Mnist_TrainingTarget

X_test_MNIST, y_test_MNIST = Mnist_TestingData, Mnist_TestingTarget
X_test_USPS, y_test_USPS = USPS_TestingData, USPS_TargetData

# SVM
classifier1 = SVC(kernel='poly', C=2, gamma =0.01);#polynomial, increase C (tradeoff), remove gamma
classifier1.fit(X_train_MNIST, y_train_MNIST)
predicted_SVM_USPS = classifier1.predict(X_test_USPS)
predicted_SVM_MNIST = classifier1.predict(X_test_MNIST)
target_names = ['class 0', 'class 1', 'class 2','class 3', 'class 4', 'class 5','class 6', 'class 7', 'class 8','class 9']
# get the accuracy
print("SVM:")
print("Accuracy USPS")
print(classification_report(y_test_USPS, predicted_SVM_USPS, target_names=target_names))
print(confusion_matrix(y_test_USPS, predicted_SVM_USPS))
#print accuracy_score(y_test_USPS, predicted_SVM_USPS)
print("Accuracy MNIST")
print(classification_report(y_test_MNIST, predicted_SVM_MNIST, target_names=target_names))
print(confusion_matrix(y_test_MNIST, predicted_SVM_MNIST))
#print accuracy_score(y_test_MNIST, predicted_SVM_MNIST)


#RandomForestClassifier
classifier2 = RandomForestClassifier(n_estimators=5);#less number of estimator, inlcude more parameters
classifier2.fit(X_train_MNIST, y_train_MNIST)
predicted_USPS = classifier2.predict(X_test_USPS)
predicted_MNIST = classifier2.predict(X_test_MNIST)

# get the accuracy
print("Random Forest:")
print("Accuracy USPS")
print(classification_report(y_test_USPS, predicted_USPS, target_names=target_names))
print(confusion_matrix(y_test_USPS, predicted_USPS))
#print accuracy_score(y_test_USPS, predicted_SVM_USPS)
print("Accuracy MNIST")
print(classification_report(y_test_MNIST, predicted_MNIST, target_names=target_names))
print(confusion_matrix(y_test_MNIST, predicted_MNIST))
#############################

SVM:
Accuracy USPS
             precision    recall  f1-score   support

    class 0       0.00      0.00      0.00      2000
    class 1       0.10      1.00      0.18      2000
    class 2       0.00      0.00      0.00      1999
    class 3       0.00      0.00      0.00      2000
    class 4       0.00      0.00      0.00      2000
    class 5       0.00      0.00      0.00      2000
    class 6       0.00      0.00      0.00      2000
    class 7       0.00      0.00      0.00      2000
    class 8       0.00      0.00      0.00      2000
    class 9       0.00      0.00      0.00      2000

avg / total       0.01      0.10      0.02     19999

[[   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 1999    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0   

  'precision', 'predicted', average, warn_for)


Random Forest:
Accuracy USPS
             precision    recall  f1-score   support

    class 0       0.14      0.07      0.09      2000
    class 1       0.09      0.34      0.14      2000
    class 2       0.18      0.21      0.19      1999
    class 3       0.27      0.11      0.15      2000
    class 4       0.38      0.06      0.11      2000
    class 5       0.21      0.12      0.15      2000
    class 6       0.00      0.00      0.00      2000
    class 7       0.09      0.33      0.15      2000
    class 8       0.00      0.00      0.00      2000
    class 9       0.00      0.00      0.00      2000

avg / total       0.14      0.12      0.10     19999

[[ 134  852  152   12   30  135    0  685    0    0]
 [  66  690  213   63   12   14    0  942    0    0]
 [  83  663  421  102   21  128    0  581    0    0]
 [  92  739  250  215   19  119    0  565    0    1]
 [  74  659  102  132  124   25    0  883    0    1]
 [ 131  675  216   52    7  232    0  687    0    0]
 [  81  531  3

  'precision', 'predicted', average, warn_for)


In [0]:
# Neural Network
import keras
from keras.datasets import mnist
from keras.layers import Dense
from keras.layers import Activation
from keras.models import Sequential

x_train_Mnist = Mnist_TrainingData
y_train_Mnist = Mnist_TrainingTarget
x_test_Mnist = Mnist_TestingData
y_test_Mnist = Mnist_TestingTarget
x_test_USPS = USPS_TestingData
y_test_USPS = USPS_TargetData
num_classes=20

y_train_Mnist = keras.utils.to_categorical(y_train_Mnist, num_classes)
y_test_Mnist = keras.utils.to_categorical(y_test_Mnist, num_classes)
y_test_USPS = keras.utils.to_categorical(y_test_USPS, num_classes)
image_size = 784
model = Sequential()
model.add(Dense(units=250, input_shape=(image_size,)))
model.add(Activation(tf.nn.softmax))
model.add(Dense(units=num_classes))
model.add(Activation(tf.nn.softmax))
classifiermodel.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x_train_Mnist, y_train_Mnist, batch_size=250, epochs=2000, verbose=False,validation_split=.1)
#MNIST Testing
loss,accuracy = model.evaluate(x_test_Mnist, y_test_Mnist, verbose=False)
z1=accuracy
print("MNIST Dataset:")
print("accuracy:")
print(accuracy)
print("loss:")
print(loss)
#print(confusion_matrix(x_test_Mnist, y_test_Mnist))
#USPS Testing
loss,accuracy = model.evaluate(x_test_USPS, y_test_USPS, verbose=False)
z2=accuracy
print("USPS Dataset:")
print("accuracy:")
print(accuracy)
print("loss:")
print(loss)
#print(confusion_matrix(x_test_USPS, y_test_USPS))


MNIST Dataset:
accuracy:
0.5895
loss:
0.8705210044860839
USPS Dataset:
accuracy:
0.2627631381494569
loss:
2.6679038808574567


In [0]:
#MAJORITY VOTING USING VOTING CLASSIFIER
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
X=X_teest_MNIST
Y=y_test_MNIST
X1=X_test_USPS
Y1=y_test_USPS
clf1=LogisticRegression(solver='lbfgs', multi_class='multinomial',random_state=1)
eclf1=VotingClassifier(estimators=[('lr', clf1), ('svm', classifier1), ('rf', classifier2), ('nn', z1) ], voting=soft)   #For MNIST
eclf1=eclf1.fit(X, Y)
print(eclf1.predict(X))
print(eclf1.score(X, Y, sample_weight=None))
eclf2=VotingClassifier(estimators=[('lr', clf1), ('svm', classifier1), ('rf', classifier2), ('nn', z2) ], voting=soft)   #For USPS
eclf2=eclf2.fit(X1, Y1)
print(eclf2.predict(X1))
print(eclf2.score(X1, Y1, sample_weight=None))


<bound method Model.evaluate of <keras.engine.sequential.Sequential object at 0x7f7fd42d14e0>>