### Importing necessary stuff !

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

   ### Reading in data (and defining necessary constants ) !

In [None]:
full_data = pd.read_csv('../input/train.csv')

In [None]:
LABEL = 'label'

In [None]:
NUM_PREDICTOR_COLS = 784
PREDICTOR_COLS = ['pixel' + str(i) for i in range(NUM_PREDICTOR_COLS)]

In [None]:
REL_COLUMNS = PREDICTOR_COLS.copy()
REL_COLUMNS.append('label')

### Checking for null values.

In [None]:
full_data.isnull().values.any()

### Split into training and validation sets

In [None]:
LEN_TRAIN_SET = int(0.8 * len(full_data))
train_data = full_data[0:LEN_TRAIN_SET]
validation_data = full_data[LEN_TRAIN_SET:len(full_data)]

In [None]:
assert(len(train_data) + len(validation_data) == len(full_data))

In [None]:
len(full_data)

In [None]:
len(train_data)

In [None]:
len(validation_data) 

### Fit model on training data


In [None]:
def make_train_input_fn(df, num_epochs):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=df[LABEL],
        shuffle=True,
        num_epochs=num_epochs)

In [None]:
def make_eval_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=df[LABEL],
        shuffle=False)

In [None]:
def make_prediction_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x=df,
        y=None,
        shuffle=False)    

In [None]:
def make_features():
    input_cols = [tf.feature_column.numeric_column(col) for col in PREDICTOR_COLS]
    return input_cols

In [None]:
X_train = train_data[PREDICTOR_COLS]
Y_train = np.ravel(train_data[['label']])

In [None]:
OUTDIR = 'classification_outputs'
import shutil
shutil.rmtree(OUTDIR, ignore_errors=True)

In [None]:
N_CLASSES = 10

In [None]:
model = tf.estimator.LinearClassifier(feature_columns=make_features(),
                                       n_classes=N_CLASSES,
                                       model_dir=OUTDIR)


In [None]:
model.train(input_fn=make_train_input_fn(train_data[REL_COLUMNS], num_epochs=100))    

In [None]:
metrics = model.evaluate(input_fn = make_eval_input_fn(train_data[REL_COLUMNS]))
print(metrics['accuracy'])

In [None]:
metrics = model.evaluate(input_fn = make_eval_input_fn(validation_data[REL_COLUMNS]))
print(metrics['accuracy'])

In [None]:
# Results
# Linear Classifier using all components(epoch of 1). Out of sample score : 0.899
# Linear Classifier using all components(epoch of 100). Out of sample score : 0.8761
# Linear Classifier using PCA (200) components(epoch of 100). Out of sample score : 0.877
# Linear Classifier using PCA (300) components(epoch of 100). Out of sample score : 0.876
# Linear Classifier using PCA (300) components(epoch of 1). Out of sample score : 0.8864



### Do PCA here

In [None]:
from sklearn.decomposition import PCA

In [None]:
NUM_PCA_COMPONENTS = 300
pca_input = PCA(n_components=NUM_PCA_COMPONENTS)
pca_input.fit(train_data[PREDICTOR_COLS])

In [None]:
PCA_COLS = ['pca' + str(i) for i in range(NUM_PCA_COMPONENTS)]
REL_PCA_COLUMNS = PCA_COLS.copy()
REL_PCA_COLUMNS.append('label')

In [None]:
transformed_train = pca_input.transform(train_data[PREDICTOR_COLS])
transformed_validation = pca_input.transform(validation_data[PREDICTOR_COLS])

transformed_train_df = pd.DataFrame(transformed_train, columns=PCA_COLS)
transformed_validation_df = pd.DataFrame(transformed_validation, columns=PCA_COLS)

transformed_train_df[LABEL] = train_data[LABEL].values
transformed_validation_df[LABEL] = validation_data[LABEL].values

In [None]:
OUTDIR = 'classification_outputs_pca'
import shutil
shutil.rmtree(OUTDIR, ignore_errors=True)

In [None]:
model_pca = tf.estimator.LinearClassifier(
    feature_columns=[tf.feature_column.numeric_column(col) for col in PCA_COLS],
    n_classes=N_CLASSES,
    model_dir=OUTDIR)


In [None]:
model_pca.train(input_fn=make_train_input_fn(transformed_train_df[REL_PCA_COLUMNS], num_epochs=1))    

In [None]:
metrics = model_pca.evaluate(input_fn = make_eval_input_fn(transformed_train_df[REL_PCA_COLUMNS]))
print(metrics['accuracy'])

In [None]:
metrics = model_pca.evaluate(input_fn = make_eval_input_fn(transformed_validation_df[REL_PCA_COLUMNS]))
print(metrics['accuracy'])

### Making predictions on test data
NOTE : I have not included the code for making predictions using the model built using PCA , as we are focussing only on validation score for now.

In [None]:
test_data = pd.read_csv('../input/test.csv')

In [None]:
test_predictions = model.predict(input_fn=make_prediction_input_fn(test_data[PREDICTOR_COLS]))

In [None]:
test_data['Label'] = [int(i['classes'][0]) for i in test_predictions]

In [None]:
test_data['ImageId'] = np.arange(1, len(test_data)+1)

In [None]:
test_data[['ImageId', 'Label']].to_csv('submission_dnn_tensorflow.csv', index=False)