In [40]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
%matplotlib inline
import tensorflow as tf
train_df = pd.read_csv('./train.csv')

def get_images(df):
    df = df.copy()
    labels = df['label'].values
    enc = OneHotEncoder(sparse=False)
    enc.fit(np.array(range(10)).reshape(-1,1))
    labels = enc.transform(labels.reshape(-1,1))
    
    del df['label']
    
    images = df.values.reshape(len(df), 28, 28, 1)
    return labels, images

def view_images(images, labels):
    num_cols = int(np.ceil(np.sqrt(len(images))))
    fig, axs = plt.subplots(nrows=num_cols,ncols=num_cols)
    axs = axs.flatten()

    for idx, ax in enumerate(axs):
        ax.imshow(images[idx].reshape(28,28))
        ax.axes.set_title(np.argmax(labels[idx]))
    plt.show()

section_1 = np.cos(np.linspace(0,np.pi/2,50))/50.0
section_2 = np.cos(np.linspace(0,np.pi/2,100))/50.0
section_3 = np.cos(np.linspace(0,np.pi/2,200))/50.0
section_4 = np.cos(np.linspace(0,np.pi/2,400))/50.0

learning_rates = np.concatenate([section_1, section_2, section_3, section_4])



In [51]:
import tensorflow as tf
tf.reset_default_graph()

def conv_layer(X, input_channels, output_channels, name, dropout):
    with tf.name_scope(name):
        W = tf.Variable(tf.random_normal(
            [3,3,input_channels,output_channels],stddev=.01), name='W1')
        b = tf.Variable(tf.random_normal([output_channels], stddev=.01), name='bias')
        
        tf.summary.histogram('weights', W)
                        
        l1 = tf.nn.conv2d(X, filter=W, strides=[1,1,1,1], padding='SAME')
        l1 = tf.nn.relu(l1)
        l1 = tf.nn.max_pool(l1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        l1 = tf.nn.dropout(l1, keep_prob=1.0-dropout)
        
        return l1

def fc_layer(X, input_channels, output_channels, name, dropout):
    with tf.name_scope(name):
        W = tf.Variable(tf.random_normal([input_channels, output_channels]), name='W2')
        l1 = tf.matmul(X, W)
        l1 = tf.nn.dropout(l1, keep_prob=1.0-dropout)
        return l1
    
    
def train_model():
    labels, images = get_images(train_df)
    images = (images-np.mean(images))/np.std(images)
    trX, teX, trY, teY = train_test_split(images, labels, test_size=.2, random_state=42)
    #img_mn = tf.constant(np.mean(trX), 'float32')
    #img_std = tf.constant(np.std(trX), 'float32')
    
    X = tf.placeholder('float32', [None, 28, 28, 1], name='x')     #28x28x1
    Y = tf.placeholder('float32', [None, 10], name='y')
    
    LR = tf.placeholder('float32')
    dropout = tf.placeholder("float32")
    
    
    #X = tf.divide(tf.subtract(X,img_mn), img_std)
    h = conv_layer(X, 1, 32, 'convlayer1', dropout)       #14x14x32
    h = conv_layer(h, 32, 64, 'convlayer2', dropout)      #7x7x64
    h = tf.reshape(h, [-1, 7*7*64])
    Yp = fc_layer(h, 7*7*64, 10, 'fc1', dropout)
    
    with tf.name_scope('xent'):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Yp, labels=Y))
        tf.summary.scalar('xent', cost)
    
    with tf.name_scope('train'):
        tf.summary.scalar('lr', LR)
        train_op = tf.train.GradientDescentOptimizer(LR).minimize(cost)
    
    with tf.name_scope("accuracy"):
        acc = tf.reduce_mean(tf.cast(tf.equal(tf.arg_max(Yp,1),tf.arg_max(Y,1)),tf.float32))
        tf.summary.scalar('accuracy', acc)
        
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        
        merged = tf.summary.merge_all()
        
        fw_train = tf.summary.FileWriter('./hope/6/train/', sess.graph, flush_secs=5)
        fw_valid = tf.summary.FileWriter('./hope/6/valid/', flush_secs=5)
        
        
        for i in range(750):
            for start, end in zip(range(0,len(trX), 256), range(256, len(trX)+1, 256)):
                
                sess.run(train_op, feed_dict={X:trX[start:end], Y:trY[start:end], LR:learning_rates[i], dropout:.3})
                
            if i % 5 == 0:
                train_acc = sess.run(acc, feed_dict={X:trX, Y:trY, dropout:0.0})
                val_acc = sess.run(acc, feed_dict={X:teX, Y:teY, dropout:0.0})
               
                print 'step: {}'.format(i)
                print 'Train: ', train_acc
                print 'Validation: ', val_acc
            
                summary_train = sess.run(merged, feed_dict={X:trX, Y:trY, LR:learning_rates[i], dropout:0.0})
                summary_valid = sess.run(merged, feed_dict={X:teX, Y:teY, LR:learning_rates[i], dropout:0.0})
                
                fw_train.add_summary(summary_train, i)
                fw_valid.add_summary(summary_valid, i)
    

In [None]:
train_model()

step: 0
Train:  0.910506
Validation:  0.911071
step: 5
Train:  0.95131
Validation:  0.95119
step: 10
Train:  0.959137
Validation:  0.955714
step: 15
Train:  0.964613
Validation:  0.960238
step: 20
Train:  0.968006
Validation:  0.962976
step: 25
Train:  0.968869
Validation:  0.965595
step: 30
Train:  0.971607
Validation:  0.97
step: 35
Train:  0.972113
Validation:  0.968571
step: 40
Train:  0.973542
Validation:  0.969286
step: 45
Train:  0.97381
Validation:  0.970238
step: 50
Train:  0.972202
Validation:  0.968929
step: 55
Train:  0.972113
Validation:  0.965476
step: 60
Train:  0.972351
Validation:  0.967976
step: 65
Train:  0.97375
Validation:  0.97
step: 70
Train:  0.973125
Validation:  0.968571
step: 75
Train:  0.973482
Validation:  0.969762
step: 80
Train:  0.976429
Validation:  0.972262
step: 85
Train:  0.974613
Validation:  0.969643
step: 90
Train:  0.976548
Validation:  0.971905
step: 95
Train:  0.97619
Validation:  0.97119
step: 100
Train:  0.976488
Validation:  0.970952
step: 1