# Question 1

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
# Load data
matrix = []

with open("spambase.data", "r") as raw_data:
    for raw_line in raw_data:
        line = [float(x) for x in raw_line.split(",")]
        matrix.append(line)

data = pd.DataFrame(matrix)
row, col = data.shape
X, y = data.iloc[:,:col - 1], data[col - 1]
# y = y.astype(bool)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

## Training Random Forest Classifier on existing package

In [4]:
from sklearn.ensemble import RandomForestClassifier

  from numpy.core.umath_tests import inner1d


In [5]:
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [6]:
def classfierData(clf, X, y):
    test_data = clf.predict(X)
    print("Confusion matrix\n", confusion_matrix(test_data, y), "\n")
    tn, fp, fn, tp = confusion_matrix(test_data, y).ravel()
    print("True negative:", tn, ", false positive:", fp, ", false negative:", fn, ",true positive:", tp, "\n")
    print("Accuracy score", accuracy_score(test_data, y), "\n")
    print("Precision", precision_score(test_data, y), "\n")
    print("Recall", recall_score(test_data, y), "\n")
    print("F1 score", f1_score(test_data, y), "\n")

In [7]:
classfierData(clf, X_train, y_train)

Confusion matrix
 [[2089    1]
 [   2 1358]] 

True negative: 2089 , false positive: 1 , false negative: 2 ,true positive: 1358 

Accuracy score 0.9991304347826087 

Precision 0.9992641648270787 

Recall 0.9985294117647059 

F1 score 0.9988966531813166 



In [8]:
classfierData(clf, X_test, y_test)

Confusion matrix
 [[681  46]
 [ 16 408]] 

True negative: 681 , false positive: 46 , false negative: 16 ,true positive: 408 

Accuracy score 0.946133796698523 

Precision 0.8986784140969163 

Recall 0.9622641509433962 

F1 score 0.9293849658314352 



## Implementing own Random Forest Classifier

Predicate class acts as the splitting question:

In [98]:
class Predicate:
    def __init__(self, column, value):
        self.column = column
        self.value = value
    
    def match(self, example, pp=False):
        if pp:
            print("Match on column", self.column)
        val = example[self.column]
        if Util.is_numeric(val):
            return val >= self.value
        else:
            return val == self.value
    
    def __str__(self):
        return 'Question: column {}, value {}'.format(self.column, self.value)

Some utility functions that being reused a lot in the implementation of random forest. In this implementation, I used gini impurity instead of entropy since `log` function would arguably take longer to calculate

In [99]:
import time

class Util:
    @staticmethod
    def label_count(labels):
        count = {}
        for r in labels:
            if r not in count:
                count[r] = 0
            count[r] += 1
        return count
    
    @staticmethod
    def is_numeric(val):
        return isinstance(val, int) or isinstance(val, float)
    
    @staticmethod
    def partition(X, y, pred):
        true_X, false_X, true_y, false_y = [], [], [], []
        
        for x_inst, y_inst in zip(X, y):
            if pred.match(x_inst):
                true_X.append(x_inst)
                true_y.append(y_inst)
            else:
                false_X.append(x_inst)
                false_y.append(y_inst)
        return true_X, true_y, false_X, false_y
    
    @staticmethod
    def gini_impur(labels):
        """
        Gini impurity
        """
        counts = Util.label_count(labels)
        total = 0
        for lbl in counts:
            prob_of_lbl = float(counts[lbl]) / len(labels)
            total += (prob_of_lbl * prob_of_lbl)
        return 1 - total
    
    @staticmethod
    def info_gain(leftLbl, rightLbl, curr_uncertainty):
        """
        Calculating information gain
        """
        p = float (len(leftLbl)) / (len(leftLbl) + len(rightLbl))
        return curr_uncertainty - p * Util.gini_impur(leftLbl) - (1 - p) * Util.gini_impur(rightLbl)

Tree data structure

In [100]:
class Leaf:
    def __init__(self, y, depth=0):
        self.depth = depth
        pred = Util.label_count(y)
        for l in pred:
            pred[l] = pred[l] / len(y)
        self.predictions = pred
        
    def isLeaf(self):
        return True

class DecTreeNode:
    def __init__(self, pred, true_branch, false_branch, depth=0):
        self.depth = depth
        self.pred = pred
        self.true_branch = true_branch
        self.false_branch = false_branch
        
    def isLeaf(self):
        return False

Implementation of Decision Tree

In [101]:
class DecTreeClassifier:
    def __init__(self, max_depth=10):
        self.max_depth = max_depth
    
    def findBestSplit(self, X, y, pp=False):
        start = time.time()
        best_gain = 0  
        best_pred = None
        current_uncert = Util.gini_impur(y)
        
        for col in self.selected_features:
            vals = set([row[col] for row in X]) # different values in column
            
            for v in vals:
                pred = Predicate(col, v)
                true_X, true_y, false_X, false_y = Util.partition(X, y, pred)
                
                if len(true_X) == 0 or len(false_X) == 0:
                    continue
                
                gain = Util.info_gain(true_y, false_y, current_uncert)
                
                if gain >= best_gain:
                    best_gain, best_pred = gain, pred
        if pp:
            print("Find best split size", np.array(X).shape, "took", time.time() - start)
        return best_gain, best_pred
    
    def build_tree(self, X, y, depth=0):
        gain, pred = self.findBestSplit(X, y)
        if gain == 0 or depth >= self.max_depth:
            return Leaf(y, depth)
        true_X, true_y, false_X, false_y = Util.partition(X, y, pred)
        
        true_branch = self.build_tree(true_X, true_y, depth + 1)
        false_branch = self.build_tree(false_X, false_y, depth + 1)
        
        return DecTreeNode(pred, true_branch, false_branch)
    
    def predict_by_tree(self, tree, X_inst):
        if tree.isLeaf():
            return tree.predictions
        elif tree.pred.match(X_inst, pp=False):
            return self.predict_by_tree(tree.true_branch, X_inst)
        else:
            return self.predict_by_tree(tree.false_branch, X_inst)
    
    def fit(self, X, y, selected_features=None):
        if selected_features is None:
            self.selected_features = X.columns
        else:
            self.selected_features = selected_features
            
        self.tree = self.build_tree(X.values, y.values) 
    
    def predict_inst(self, X_inst):
        pred = self.predict_by_tree(self.tree, np.array(X_inst))
        max_arg, max_prob = None, 0
        for l in pred:
            if pred[l] > max_prob:
                max_arg = l
            max_prob = max(max_prob, pred[l])
        return max_arg
    
    def predict(self, X):
        y = []
        for idx, r in X.iterrows():
            y.append(self.predict_inst(r))
        return pd.DataFrame(y)

In [102]:
# Training decision tree
decTreeClf = DecTreeClassifier(max_depth=10)
start = time.time()
decTreeClf.fit(X_train, y_train)
print("Decision tree classifier take", time.time() - start, "seconds to build tree")

Decision tree classifier take 269.33306765556335 seconds to build tree


So to build a full tree with depth 10, it took me 247 seconds (~4 minutes). Having max depth helps us limit the training time and also avoid overfitting on the training data.

In [103]:
classfierData(decTreeClf, X_test, y_test)

Confusion matrix
 [[663  66]
 [ 34 388]] 

True negative: 663 , false positive: 66 , false negative: 34 ,true positive: 388 

Accuracy score 0.9131190269331017 

Precision 0.8546255506607929 

Recall 0.919431279620853 

F1 score 0.8858447488584474 



In [105]:
from sklearn.tree import DecisionTreeClassifier
dcf = DecisionTreeClassifier(max_depth=10)
dcf.fit(X_train, y_train)
classfierData(dcf, X_test, y_test)

Confusion matrix
 [[668  63]
 [ 29 391]] 

True negative: 668 , false positive: 63 , false negative: 29 ,true positive: 391 

Accuracy score 0.9200695047784535 

Precision 0.8612334801762115 

Recall 0.930952380952381 

F1 score 0.8947368421052633 



Hence, my implementation of Decision Tree Classifier gains approximately the same result as sklearn's Decision Tree Classifier. Now we can implement random forest classifier after decision tree

In [106]:
from sklearn.utils import resample
import random

class RandomForestClassifier:
    def __init__(self, num_est=10, max_depth=5, num_attrs=5):
        self.num_est = num_est
        self.max_depth = max_depth
        self.forest = []
        self.num_attrs = num_attrs
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        
        # Bagging
        for i in range(self.num_est):
            shape = len(X.columns)
            
            selected_features = random.sample(range(0, shape), self.num_attrs)
            X_samp, y_samp = resample(X, y, replace=True)
            clf = DecTreeClassifier(max_depth=self.max_depth)
            print("Selected features", selected_features)
            start = time.time()
            print("=== Training estimator #", i + 1, "with", len(selected_features),"features")
            clf.fit(X_samp, y_samp, selected_features=selected_features)
            print("===> Done training estimator #", i + 1, "in", time.time() - start, "seconds")
            self.forest.append(clf)
            print("--------------------------------------------------------------\n")
            
    def predict(self, X):
        result = []
        for idx, x in X.iterrows():
            labels = {}
            for clf in self.forest:
                l = clf.predict_inst(x)
                if l not in labels:
                    labels[l] = 0
                labels[l] += 1
            for l in labels:
                if labels[l] >= 0.5:
                    result.append(l)
                    break
        return pd.DataFrame(result)
            

## Testing random forest on $\sqrt{d}$ attributes

In [107]:
rfc = RandomForestClassifier(num_est=10, max_depth=10, num_attrs=7)

In [108]:
rfc.fit(X_train, y_train)

Selected features [6, 44, 10, 35, 12, 28, 43]
=== Training estimator # 1 with 7 features
===> Done training estimator # 1 in 12.335493326187134 seconds
--------------------------------------------------------------

Selected features [37, 16, 14, 31, 49, 0, 5]
=== Training estimator # 2 with 7 features
===> Done training estimator # 2 in 13.729286909103394 seconds
--------------------------------------------------------------

Selected features [27, 16, 5, 6, 12, 24, 52]
=== Training estimator # 3 with 7 features
===> Done training estimator # 3 in 13.08350419998169 seconds
--------------------------------------------------------------

Selected features [48, 39, 22, 33, 23, 3, 43]
=== Training estimator # 4 with 7 features
===> Done training estimator # 4 in 9.32597804069519 seconds
--------------------------------------------------------------

Selected features [45, 48, 4, 50, 23, 34, 14]
=== Training estimator # 5 with 7 features
===> Done training estimator # 5 in 12.2847149372100

In [109]:
classfierData(rfc, X_test, y_test)
classfierData(rfc, X_train, y_train)

Confusion matrix
 [[661 207]
 [ 36 247]] 

True negative: 661 , false positive: 207 , false negative: 36 ,true positive: 247 

Accuracy score 0.788879235447437 

Precision 0.5440528634361234 

Recall 0.872791519434629 

F1 score 0.6702849389416554 

Confusion matrix
 [[2015  597]
 [  76  762]] 

True negative: 2015 , false positive: 597 , false negative: 76 ,true positive: 762 

Accuracy score 0.8049275362318841 

Precision 0.5607064017660044 

Recall 0.9093078758949881 

F1 score 0.6936731907146109 



## Random forest on all attributes with 10 estimators

In [27]:
rfc = RandomForestClassifier(num_est=10, max_depth=10, num_attrs=57)
rfc.fit(X_train, y_train)

=== Training estimator # 1 with features [36, 11, 9, 12, 35, 32, 37, 56, 43, 22, 53, 39, 23, 20, 51, 30, 52, 14, 49, 44, 1, 31, 34, 10, 28, 8, 26, 40, 41, 2, 29, 18, 27, 50, 19, 45, 38, 25, 15, 16, 47, 3, 4, 46, 33, 6, 0, 17, 55, 7, 24, 21, 5, 42, 48, 54, 13]
===> Done training estimator # 1 in 239.93970227241516 seconds
--------------------------------------------------------------

=== Training estimator # 2 with features [0, 15, 21, 48, 20, 4, 46, 33, 18, 54, 31, 8, 1, 36, 5, 16, 52, 11, 39, 26, 25, 40, 53, 41, 55, 10, 3, 32, 43, 27, 38, 45, 6, 49, 28, 22, 47, 2, 37, 50, 42, 12, 14, 30, 56, 23, 9, 7, 35, 13, 17, 29, 44, 24, 19, 34, 51]
===> Done training estimator # 2 in 247.90983271598816 seconds
--------------------------------------------------------------

=== Training estimator # 3 with features [46, 23, 48, 8, 55, 10, 56, 16, 4, 2, 9, 36, 5, 28, 26, 54, 20, 39, 11, 18, 51, 30, 41, 13, 17, 19, 22, 29, 49, 35, 1, 45, 3, 42, 38, 32, 7, 12, 33, 37, 34, 53, 52, 15, 0, 21, 31, 24, 1

TypeError: classfierData() missing 1 required positional argument: 'y'

In [28]:
classfierData(rfc, X_test, y_test)

Confusion matrix
 [[670 403]
 [ 27  51]] 

True negative: 670 , false positive: 403 , false negative: 27 ,true positive: 51 

Accuracy score 0.6264118158123371 

Precision 0.11233480176211454 

Recall 0.6538461538461539 

F1 score 0.19172932330827067 



In [32]:
## With d/2 features
rfc = RandomForestClassifier(num_est=20, max_depth=10, num_attrs=int(57/2))
rfc.fit(X_train, y_train)
classfierData(rfc, X_test, y_test)

=== Training estimator # 1 with features [15, 11, 16, 24, 13, 41, 7, 27, 55, 53, 37, 56, 18, 49, 10, 43, 30, 25, 17, 50, 6, 40, 12, 33, 0, 20, 39, 21]
===> Done training estimator # 1 in 91.5327980518341 seconds
--------------------------------------------------------------

=== Training estimator # 2 with features [43, 42, 5, 14, 6, 55, 25, 20, 0, 24, 29, 46, 8, 36, 48, 34, 31, 49, 33, 1, 38, 12, 7, 50, 52, 53, 56, 3]
===> Done training estimator # 2 in 83.30659604072571 seconds
--------------------------------------------------------------

=== Training estimator # 3 with features [44, 3, 2, 47, 15, 12, 11, 25, 33, 20, 19, 39, 50, 22, 45, 7, 27, 14, 42, 38, 41, 26, 52, 36, 4, 46, 24, 29]
===> Done training estimator # 3 in 71.90655469894409 seconds
--------------------------------------------------------------

=== Training estimator # 4 with features [24, 51, 4, 3, 38, 12, 26, 54, 17, 56, 0, 28, 20, 55, 31, 30, 7, 14, 9, 47, 32, 49, 23, 29, 22, 25, 21, 40]
===> Done training estimat

## Random forest with $\sqrt{d}$ attributes and varying number of estimator

In [33]:
rfc = RandomForestClassifier(num_est=10, max_depth=20, num_attrs=int(np.sqrt(57)))
rfc.fit(X_train, y_train)
classfierData(rfc, X_test, y_test)

print("*****************************************************************************************\n")

rfc = RandomForestClassifier(num_est=50, max_depth=20, num_attrs=int(np.sqrt(57)))
rfc.fit(X_train, y_train)
classfierData(rfc, X_test, y_test)

print("*****************************************************************************************\n")

rfc = RandomForestClassifier(num_est=100, max_depth=20, num_attrs=int(np.sqrt(57)))
rfc.fit(X_train, y_train)
classfierData(rfc, X_test, y_test)

=== Training estimator # 1 with features [11, 54, 3, 2, 35, 55, 15]
===> Done training estimator # 1 in 26.35715341567993 seconds
--------------------------------------------------------------

=== Training estimator # 2 with features [39, 50, 16, 46, 1, 2, 4]
===> Done training estimator # 2 in 13.443044662475586 seconds
--------------------------------------------------------------

=== Training estimator # 3 with features [19, 22, 8, 31, 7, 16, 43]
===> Done training estimator # 3 in 13.193948745727539 seconds
--------------------------------------------------------------

=== Training estimator # 4 with features [14, 33, 22, 35, 39, 44, 54]
===> Done training estimator # 4 in 29.668092966079712 seconds
--------------------------------------------------------------

=== Training estimator # 5 with features [34, 20, 8, 32, 4, 9, 48]
===> Done training estimator # 5 in 17.91074538230896 seconds
--------------------------------------------------------------

=== Training estimator # 6 

KeyboardInterrupt: 

In [82]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(max_depth=10, n_estimators=1, max_features=7)
rfc.fit(X_test, y_test)
classfierData(rfc, X_test, y_test)

Confusion matrix
 [[626  30]
 [ 71 424]] 

True negative: 626 , false positive: 30 , false negative: 71 ,true positive: 424 

Accuracy score 0.9122502172024327 

Precision 0.933920704845815 

Recall 0.8565656565656565 

F1 score 0.8935721812434142 



# Question 2

In [110]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [113]:
ada = AdaBoostClassifier(DecisionTreeClassifier(max_depth=10), n_estimators=50)

In [114]:
ada.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=50, random_state=None)

In [115]:
classfierData(ada, X_test, y_test)

Confusion matrix
 [[675  44]
 [ 22 410]] 

True negative: 675 , false positive: 44 , false negative: 22 ,true positive: 410 

Accuracy score 0.9426585577758471 

Precision 0.9030837004405287 

Recall 0.9490740740740741 

F1 score 0.9255079006772009 



In [116]:
from sklearn.linear_model import LogisticRegression
ada = AdaBoostClassifier(LogisticRegression(), n_estimators=50)

In [117]:
ada.fit(X_train, y_train)
classfierData(ada, X_test, y_test)

Confusion matrix
 [[656  63]
 [ 41 391]] 

True negative: 656 , false positive: 63 , false negative: 41 ,true positive: 391 

Accuracy score 0.9096437880104257 

Precision 0.8612334801762115 

Recall 0.9050925925925926 

F1 score 0.8826185101580134 



In [121]:
base_learners = [10, 50, 100]
# use with decision tree
for l in base_learners:
    ada = AdaBoostClassifier(DecisionTreeClassifier(max_depth=10), n_estimators=l)
    ada.fit(X_train, y_train)
    print("AdaBoost classifier data for", l, "number of estimators\n")
    print("*********************************************************************")
    classfierData(ada, X_test, y_test)
    print("---------------------------------------------------------------------")

AdaBoost classifier data for 10 number of estimators

*********************************************************************
Confusion matrix
 [[667  40]
 [ 30 414]] 

True negative: 667 , false positive: 40 , false negative: 30 ,true positive: 414 

Accuracy score 0.9391833188531712 

Precision 0.9118942731277533 

Recall 0.9324324324324325 

F1 score 0.9220489977728284 

---------------------------------------------------------------------
AdaBoost classifier data for 50 number of estimators

*********************************************************************
Confusion matrix
 [[674  37]
 [ 23 417]] 

True negative: 674 , false positive: 37 , false negative: 23 ,true positive: 417 

Accuracy score 0.947871416159861 

Precision 0.9185022026431718 

Recall 0.9477272727272728 

F1 score 0.9328859060402686 

---------------------------------------------------------------------
AdaBoost classifier data for 100 number of estimators

********************************************************

In [122]:
from sklearn.ensemble import RandomForestClassifier

for l in base_learners:
    ada = AdaBoostClassifier(RandomForestClassifier(n_estimators=10), n_estimators=l)
    ada.fit(X_train, y_train)
    print("AdaBoost classifier data for", l, "number of estimators\n")
    print("*********************************************************************")
    classfierData(ada, X_test, y_test)
    print("---------------------------------------------------------------------")

AdaBoost classifier data for 10 number of estimators

*********************************************************************
Confusion matrix
 [[681  32]
 [ 16 422]] 

True negative: 681 , false positive: 32 , false negative: 16 ,true positive: 422 

Accuracy score 0.9582971329278888 

Precision 0.9295154185022027 

Recall 0.9634703196347032 

F1 score 0.946188340807175 

---------------------------------------------------------------------
AdaBoost classifier data for 50 number of estimators

*********************************************************************
Confusion matrix
 [[680  38]
 [ 17 416]] 

True negative: 680 , false positive: 38 , false negative: 17 ,true positive: 416 

Accuracy score 0.952215464813206 

Precision 0.9162995594713657 

Recall 0.9607390300230947 

F1 score 0.9379932356257047 

---------------------------------------------------------------------
AdaBoost classifier data for 100 number of estimators

*********************************************************

# Problem 3

In [161]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten

In [128]:
# Preprocess data for neural net

In [129]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255

y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

X_train = np.reshape(X_train, (60000, 784))
X_test = np.reshape(X_test, (10000, 784))

## Configuration for FFN

This model will have 3 hidden layer. Each layer (except for the last one to produce result) will use relu as activation functions. After calculating neurons for each layer, Dropout Regularization will be applied

In [137]:
model = Sequential()
model.add(Dense(500, input_dim=28*28))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(300))
model.add(Activation('relu'))
model.add(Dropout(0.4))
model.add(Dense(10))
model.add(Activation('softmax'))

from keras.optimizers import RMSprop
rms = RMSprop()

In [140]:
def run_network(model, epochs=20, batch=256):
    start_time = time.time()
    model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['accuracy'])
    print('Training model...')
    model.fit(X_train, y_train, nb_epoch=epochs, batch_size=batch,
              validation_data=(X_test, y_test), verbose=2)

    print("Training duration : {0}".format(time.time() - start_time))
    score = model.evaluate(X_test, y_test, batch_size=16)
    print("Network's test score [loss, accuracy]: {0}".format(score))

In [141]:
run_network(model)

Training model...


  


Train on 60000 samples, validate on 10000 samples
Epoch 1/20
 - 5s - loss: 0.0345 - acc: 0.9898 - val_loss: 0.0791 - val_acc: 0.9831
Epoch 2/20
 - 4s - loss: 0.0316 - acc: 0.9907 - val_loss: 0.0790 - val_acc: 0.9833
Epoch 3/20
 - 4s - loss: 0.0280 - acc: 0.9916 - val_loss: 0.0838 - val_acc: 0.9821
Epoch 4/20
 - 4s - loss: 0.0279 - acc: 0.9918 - val_loss: 0.0903 - val_acc: 0.9828
Epoch 5/20
 - 4s - loss: 0.0290 - acc: 0.9914 - val_loss: 0.0767 - val_acc: 0.9845
Epoch 6/20
 - 4s - loss: 0.0282 - acc: 0.9920 - val_loss: 0.0801 - val_acc: 0.9838
Epoch 7/20
 - 4s - loss: 0.0266 - acc: 0.9925 - val_loss: 0.0878 - val_acc: 0.9835
Epoch 8/20
 - 4s - loss: 0.0260 - acc: 0.9921 - val_loss: 0.0905 - val_acc: 0.9838
Epoch 9/20
 - 4s - loss: 0.0269 - acc: 0.9929 - val_loss: 0.0920 - val_acc: 0.9841
Epoch 10/20
 - 4s - loss: 0.0257 - acc: 0.9924 - val_loss: 0.0863 - val_acc: 0.9850
Epoch 11/20
 - 4s - loss: 0.0236 - acc: 0.9932 - val_loss: 0.1030 - val_acc: 0.9821
Epoch 12/20
 - 4s - loss: 0.0241 - 

In [144]:
model = Sequential()
model.add(Dense(10, input_dim=28*28))
model.add(Activation('softmax'))

In [145]:
run_network(model)

Training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/20


  


 - 1s - loss: 0.7692 - acc: 0.8160 - val_loss: 0.3984 - val_acc: 0.8976
Epoch 2/20
 - 1s - loss: 0.3676 - acc: 0.8992 - val_loss: 0.3253 - val_acc: 0.9091
Epoch 3/20
 - 0s - loss: 0.3219 - acc: 0.9103 - val_loss: 0.2997 - val_acc: 0.9168
Epoch 4/20
 - 0s - loss: 0.3025 - acc: 0.9153 - val_loss: 0.2912 - val_acc: 0.9183
Epoch 5/20
 - 0s - loss: 0.2912 - acc: 0.9188 - val_loss: 0.2834 - val_acc: 0.9217
Epoch 6/20
 - 1s - loss: 0.2837 - acc: 0.9206 - val_loss: 0.2768 - val_acc: 0.9230
Epoch 7/20
 - 0s - loss: 0.2780 - acc: 0.9224 - val_loss: 0.2734 - val_acc: 0.9246
Epoch 8/20
 - 0s - loss: 0.2737 - acc: 0.9240 - val_loss: 0.2718 - val_acc: 0.9244
Epoch 9/20
 - 0s - loss: 0.2701 - acc: 0.9251 - val_loss: 0.2694 - val_acc: 0.9251
Epoch 10/20
 - 0s - loss: 0.2673 - acc: 0.9257 - val_loss: 0.2704 - val_acc: 0.9253
Epoch 11/20
 - 0s - loss: 0.2651 - acc: 0.9257 - val_loss: 0.2663 - val_acc: 0.9253
Epoch 12/20
 - 0s - loss: 0.2628 - acc: 0.9273 - val_loss: 0.2660 - val_acc: 0.9259
Epoch 13/20


In [146]:
model = Sequential()
model.add(Dense(300, input_dim=28*28))
model.add(Activation('relu'))
model.add(Dense(10))
model.add(Activation('softmax'))

In [147]:
run_network(model)

Training model...
Train on 60000 samples, validate on 10000 samples
Epoch 1/20


  


 - 2s - loss: 0.3396 - acc: 0.9065 - val_loss: 0.1913 - val_acc: 0.9442
Epoch 2/20
 - 2s - loss: 0.1540 - acc: 0.9558 - val_loss: 0.1285 - val_acc: 0.9628
Epoch 3/20
 - 2s - loss: 0.1070 - acc: 0.9691 - val_loss: 0.1071 - val_acc: 0.9664
Epoch 4/20
 - 2s - loss: 0.0800 - acc: 0.9764 - val_loss: 0.0836 - val_acc: 0.9754
Epoch 5/20
 - 2s - loss: 0.0625 - acc: 0.9811 - val_loss: 0.0739 - val_acc: 0.9775
Epoch 6/20
 - 2s - loss: 0.0507 - acc: 0.9848 - val_loss: 0.0720 - val_acc: 0.9787
Epoch 7/20
 - 2s - loss: 0.0409 - acc: 0.9881 - val_loss: 0.0689 - val_acc: 0.9794
Epoch 8/20
 - 2s - loss: 0.0333 - acc: 0.9904 - val_loss: 0.0704 - val_acc: 0.9791
Epoch 9/20
 - 2s - loss: 0.0272 - acc: 0.9928 - val_loss: 0.0670 - val_acc: 0.9802
Epoch 10/20
 - 2s - loss: 0.0226 - acc: 0.9939 - val_loss: 0.0694 - val_acc: 0.9798
Epoch 11/20
 - 2s - loss: 0.0182 - acc: 0.9953 - val_loss: 0.0710 - val_acc: 0.9798
Epoch 12/20
 - 2s - loss: 0.0150 - acc: 0.9963 - val_loss: 0.0623 - val_acc: 0.9818
Epoch 13/20


For Neural network, we will not flatten the $28 \times 28$ image into a $781 \times 1$ vector.

In [176]:
import keras

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [177]:
convMod = Sequential()

convMod.add(Conv2D(64, (2, 2), input_shape = (28, 28, 1), activation = 'relu'))
convMod.add(MaxPooling2D(pool_size = (2, 2)))

convMod.add(Conv2D(64, (2, 2), activation = 'relu'))
convMod.add(MaxPooling2D(pool_size = (1, 1)))
convMod.add(Dropout(0.2))

convMod.add(Flatten())

convMod.add(Dense(units = 128, activation='relu'))
convMod.add(Dropout(0.5))
convMod.add(Dense(units = 10, activation='softmax'))

In [184]:
convMod.compile(optimizer=rms, loss = 'categorical_crossentropy', metrics = ['accuracy'])
convMod.fit(x_train, y_train,epochs=10, verbose = 1)

Epoch 1/10
 4704/60000 [=>............................] - ETA: 1:17 - loss: 0.1121 - acc: 0.9722

KeyboardInterrupt: 

In [183]:
print(convMod.evaluate(x_test, y_test, batch_size=16))

[0.06280644375819575, 0.982]


In [189]:
convMod = Sequential()

convMod.add(Conv2D(64, (4, 4), input_shape = (28, 28, 1), activation = 'relu'))
convMod.add(MaxPooling2D(pool_size = (2, 2)))
convMod.add(Dropout(0.2))
convMod.add(Flatten())
convMod.add(Dense(units = 128, activation='relu'))
convMod.add(Dropout(0.5))
convMod.add(Dense(units = 10, activation='softmax'))

In [190]:
convMod.compile(optimizer=rms, loss = 'categorical_crossentropy', metrics = ['accuracy'])
convMod.fit(x_train, y_train,epochs=10, verbose = 1)

Epoch 1/10
Epoch 2/10
  704/60000 [..............................] - ETA: 1:18 - loss: 0.0899 - acc: 0.9702

KeyboardInterrupt: 

In [191]:
convMod = Sequential()

convMod.add(Conv2D(64, (2, 2), input_shape = (28, 28, 1), activation = 'relu'))
convMod.add(MaxPooling2D(pool_size = (1, 1)))
convMod.add(Dropout(0.4))
convMod.add(Flatten())
convMod.add(Dense(units = 32, activation='relu'))
convMod.add(Dropout(0.8))
convMod.add(Dense(units = 10, activation='softmax'))

In [192]:
convMod.compile(optimizer=rms, loss = 'categorical_crossentropy', metrics = ['accuracy'])
convMod.fit(x_train, y_train,epochs=10, verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fbb7710fe80>

In [193]:
print(convMod.evaluate(x_test, y_test, batch_size=16))

[0.1301682964519132, 0.9654]
