# Let's see if there is difference in neural net's training time on GPU depending on the format of the input images.
## We will compare images in
##           - fp32 format
##           - int8 format

In [1]:
import os

import sys
import blosc
import time 

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm

sys.path.append("..")
from my_batch import MnistBatch
from dataset import DatasetIndex, Dataset
from mnist import MNIST

## Load data

Let us load all data at once instead of doing this for every batch because it can all be loaded in memory.

In [2]:
src = 'C:\\Users\\Dari\\Documents\\az_training\\task_03'

with open(os.path.join(src, 'mnist_pics.blk'), 'rb') as file:
    full_imgs = blosc.unpack_array(file.read())
    
with open(os.path.join(src, 'mnist_labels.blk'), 'rb') as file:
    full_labs = blosc.unpack_array(file.read())
    
src = (np.reshape(full_imgs, (65000, 28, 28)), full_labs)
# src = (full_imgs, full_labs)

In [3]:
LEN_MNIST = 65000
indy = DatasetIndex(np.arange(LEN_MNIST))

mnistset = Dataset(indy, batch_class=MnistBatch)
mnistset.cv_split([0.9, 0.1])

## Train on fp32 input image:

In [4]:
def measure_tf32():
    time_list = []
    ppl = (mnistset.train.pipeline().
       load(src=src, fmt='ndarray').
       resize(shape=(400, 400)).
       train_convy_float(time_list))
    
    ppl.next_batch(100, n_epochs=None)
    ppl = None
    return time_list[0]

In [5]:
measure_tf32()

3.860091022139239

In [None]:
measure_tf32()

In [6]:
times_fp = []
for i in range(10):
    current_time = measure_tf32()
    times_fp.append(current_time)
#     if i % 10 == 0:
    print (current_time)

0.9826095201280722
0.999001892647847
1.0019816278230174
0.9890259245347703
0.9986955681407927
1.0059146156903793
0.9895196904663806
0.9963117070662513
0.9925833002089561
0.993459971774385


## Mean and std of measured time for one iteration with fp32 input image

In [7]:
print (np.mean(np.array(times_fp)))
print (np.std(np.array(times_fp)))

0.994910381848
0.0065317997458


## Train on int8 input image:

In [4]:
src_int = (src[0]*256 - 128).astype(np.int8), src[1]

In [5]:
def measure_int8():
    time_list = []
    ppl = (mnistset.train.pipeline().
       load(src=src_int, fmt='ndarray').
       resize(shape=(400, 400)).
       train_convy_int(time_list))
    
    ppl.next_batch(100, n_epochs=None)
    ppl = None
    return time_list[0]

In [6]:
measure_int8()

3.866162082131435

In [7]:
times_int = []
for i in range(10):
    current_time = measure_int8()
    times_int.append(current_time)
#     if i % 10 == 0:
    print (current_time)

1.0290563381822153
0.9836590462367667
0.9831572575204746
0.9823152297980755
0.986119488438078
0.9865833512630502
0.9984789529536897
0.9930577385229071
0.9938180797100244
0.9971650396216205


## Mean and std of measured time for one iteration with int8 input image

In [7]:
print (np.mean(np.array(times_int)))
print (np.std(np.array(times_int)))

0.993341052225
0.0131671848052


## We discovered that there isn't any significant difference in training time for different input images' formats. Particularly we were interested in time of transfering data to gpu. 