In [None]:
#!/usr/bin/env python
# coding: utf-8

# # Digital Recognizer Analysis - Exploring MNIST

# ## - Description - 

# 
# MNIST ("Modified National Institute of Standards and Technology") is the de facto “hello world” dataset of computer vision. Since its release in 1999, this classic dataset of handwritten images has served as the basis for benchmarking classification algorithms. As new machine learning techniques emerge, MNIST remains a reliable resource for researchers and learners alike.
# 
# In this competition, your goal is to correctly identify digits from a dataset of tens of thousands of handwritten images. We’ve curated a set of tutorial-style kernels which cover everything from regression to neural networks. We encourage you to experiment with different algorithms to learn first-hand what works well and how techniques compare.
# 

# ## - Goal - 

# The goal in this competition is to take an image of a handwritten single digit, and determine what that digit is.
# For every in the test set, you should predict the correct label.

# In[1]:


# Importing Module
get_ipython().system('pip install tensorflow')


# In[2]:


# Importing Libraries

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten

import warnings
warnings.filterwarnings('ignore')


# In[3]:


# Load train/test dataframe
dr = pd.read_csv('train.csv')
drt = pd.read_csv('test.csv')
dr.head()
drt.head()


# In[4]:


## Counting Labels
dr['label'].value_counts()


# In[5]:


#Splitting train dataset
x=dr.iloc[:, 1:]
y=dr['label']


# In[6]:


# Reshaping data for train/test
xrshp=x.values.reshape(-1,28,28,1)
tst=drt.values.reshape(-1,28,28,1)


# In[7]:


# Converting labels into classes
yrshp = tf.keras.utils.to_categorical(y, num_classes = 10)


# In[8]:


xrshp.shape, yrshp.shape


# In[9]:


# Image Data Augmentation for train
trn_dtagn = ImageDataGenerator (rescale = 1./255, shear_range = 0.1, zoom_range = 0.1, 
                                width_shift_range = 0.1, height_shift_range = 0.1,
                               rotation_range=10)

dta_trn = trn_dtagn.flow(x=xrshp, y=yrshp, batch_size=32)


# In[10]:


# Image data augmentation for test
tst_dtagn = ImageDataGenerator(rescale = 1./255)

tst_st = tst_dtagn.flow(x = tst, batch_size=32)


# # - Visualizing Data - 

# In[11]:


plt.imshow(xrshp[0].reshape(28,28), cmap = 'Greys')
print(yrshp[0])


# In[12]:


plt.imshow(xrshp[10].reshape(28,28), cmap = 'Greys')
print(yrshp[10])


# In[14]:


plt.imshow(xrshp[100].reshape(28,28), cmap = 'Greys')
print(yrshp[100])


# # - Model Training - 

# In[15]:


mdl= tf.keras.models.Sequential()


# In[16]:


## Adding Layers

mdl.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[28,28,1]))
mdl.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
mdl.add(tf.keras.layers.Flatten())
mdl.add(tf.keras.layers.Dense(units=128, activation='relu'))
mdl.add(tf.keras.layers.Dense(units=10, activation='softmax'))


# In[17]:


## Compiling using gradient descent
mdl.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# In[18]:


# Importing scipy for train model
import scipy.ndimage


# In[19]:


#Train Neural Network
mdl.fit(x=dta_trn, epochs=12)


# In[20]:


# Predict test data
pred=mdl.predict(tst_st)


# In[21]:


ss = pd.read_csv('sample_submission.csv')


# In[23]:


tst_prds = []
for i in range(len(pred)):
    tst_prds.append(np.argmax(pred[i]))


# In[24]:


ss.Label = tst_prds


# In[25]:


ss.to_csv('submission.csv', index=False)


# In[26]:


t = pd.read_csv('submission.csv')


# In[27]:


t.head(20)


# In[ ]: