In [None]:
#@title Everything not mine is copyright 2020 Google LLC. Double-click here for full information.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
 
# Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset,
# which is a derivative work from original NIST datasets. 
# MNIST dataset is made available under the terms of the 
# Creative Commons Attribution-Share Alike 3.0 license.

In [None]:
# load some standard utilities.
#%tensorflow_version 2.x
 
import random as rd
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import time
 
print("Loaded modules.")

In [None]:
x_train = pd.read_csv('/content/training_set.csv', delimiter=",", index_col=0)
#x_train = pd.read_csv('https://github.com/davidabelin/data/blob/master/training_set.csv', delimiter=",", index_col=0)            

y_train = x_train["labels"]
x_train = x_train.drop(labels="labels", axis=1)

y_test = y_train.iloc[950:]
y_train = y_train.iloc[0:950]

x_test = x_train.iloc[950:]
x_train = x_train.iloc[0:950]

x_test_norm = x_test/2
x_train_norm = x_train/2

In [None]:
x_train_norm

## Create a deep neural net model and a convolutional neural network to compare

In [None]:
# SET UP A DEEP NEURAL NET 
 
def create_DNN(learning_rate):
    """Create and compile a deep neural net."""  
    # Define the kind of model to use.
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Input(shape=(42)))
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    model.add(tf.keras.layers.Dropout(rate=0.1)) #avoid overfitting to train set
    model.add(tf.keras.layers.Dense(units=256, activation='relu'))
    model.add(tf.keras.layers.Dropout(rate=0.2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=7, activation='softmax'))     
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
                    loss="sparse_categorical_crossentropy",
                    metrics=['accuracy']) 
    return model 

In [None]:
 # SET UP A **CONVOLUTIONAL** NEURAL NET 
 
def create_CNN(learning_rate):
    """Create and compile a convolutional neural net."""  
    # Define the kind of model to use.
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(32, 2, activation='relu', input_shape=(6, 7, 1)))
    model.add(tf.keras.layers.Conv2D(64, 2, activation='relu'))
    #model.add(tf.keras.layers.Conv2D(128, 2, activation='relu'))
    #model.add(tf.keras.layers.MaxPooling2D((2,2)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units=7, activation='softmax'))     
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
                    loss="sparse_categorical_crossentropy",
                    metrics=['accuracy']) 
    return model

In [None]:
 # train on the training set with 10% held back for validation #
def train_model(model, train_features, train_label, epochs,
                batch_size=None, validation_split=None):

    history = model.fit(x=train_features, y=train_label, 
                        batch_size=batch_size,
                        epochs=epochs, shuffle=True, 
                        validation_split=validation_split,
                        verbose=1)

    # Gather the model's metrics after each round of training
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    return epochs, hist

In [None]:
# Train and evalate CNN
learning_rate = 0.0005
epochs = 30
batch_size = 10
validation_split = None
start_time = time.time()

dense = create_DNN(learning_rate)
#convoluter = create_CNN(learning_rate)

# TRAIN X2:
for _ in range(2):
    #x_train_norm:
    epochs_DNN, hist_DNN = train_model(dense, x_train_norm, y_train, epochs, batch_size, validation_split)
    batch_time = time.time() - start_time
    print("Dense finished train set of 950 in {} seconds".format(round(batch_time,3)))
    #epochs_CNN, hist_CNN = train_model(convoluter, x_train_norm, y_train, epochs, batch_size, validation_split)
    #batch_time = time.time() - batch_time
    #print("Convoluter finished train set of 950 in {} seconds".format(round(batch_time,3)))

print ("\nTotal time: {:.2} seconds".format(time.time()-start_time))


In [None]:
dense.evaluate(x=x_test_norm, y=y_test, batch_size=batch_size)

In [None]:
x_test_norm.reindex(range(0,len(y_test)-1), method='bfill')

In [None]:
x_test_norm.reindex(list(range(len(y_test))))
y_test.reindex(list(range(len(y_test))))

In [None]:
y_test = pd.Series([y for y in y_test])
x_test_norm.reindex(y_test.index)
x_test_norm

In [None]:
def getKaggles():  
    kaggles = pd.DataFrame(columns=['ImageId','Guess','Answer'])  
    predicts = dense.predict(x_test_norm)
    for j in range(len(x_test_norm)):
        probs = predicts[j] # one row of 10 probabilities 
        max_id = np.argmax(probs)   # index of top probability in row
        kaggles.at[j,'ImageId'] = j+1
        kaggles.at[j,'Guess'] = max_id
        kaggles.at[j,'Answer'] = y_test[j]
    return kaggles

print("Loaded function getKaggles.")
print("Getting answers..." )

# LOAD UP ALL THE GUESSES (W/ PROBABILITES) FOR 
# EACH EXAMPLE IMAGE IN THE NORMALIZED TEST SET
kaggles = getKaggles()
kaggles

In [None]:
kaggles.to_csv('submission.csv', columns=["ImageId","Label"], index=False) #

In [None]:
g = pd.read_csv('/content/submission.csv')

In [None]:
s1 = pd.read_csv('/content/submission1.csv', index_col="ImageId")#header=None, 

In [None]:
g

In [None]:
dig = kaggle.iloc[3]
dig = np.reshape(list(dig),(28,28))
plt.imshow(dig)

##Visualizations

In [None]:
# Plot a graph of the 'accuracy' metric vs. epochs:
plt.plot(range(epochs),hist_DNN["accuracy"])
plt.plot(range(epochs),hist_DNN["val_accuracy"])
plt.plot(range(epochs),hist_CNN["accuracy"])
plt.plot(range(epochs),hist_CNN["val_accuracy"])