In [1]:
!nvidia-smi

Sun Jan 31 03:21:08 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.81       Driver Version: 456.81       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce GTX 165... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   61C    P8     3W /  N/A |    538MiB /  4096MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import numpy as np
from numba import cuda, float32
import utils.utils as u
import os
import random
import math

import cnn.cnn as cnn
import cnn.cnn_sequential as cnn_s
import cnn.cnn_parallel_cpu as cnn_p_cpu
import cnn.cnn_parallel_cuda as cnn_p_gpu

import pandas as pd


In [3]:
SEED = 0
np.random.seed(SEED)
random.seed(SEED)

In [4]:
train_data,train_label, test_data, test_label = u.load_data()

Extracting mnist\train-images-idx3-ubyte.gz
Extracting mnist\train-labels-idx1-ubyte.gz
Extracting mnist\t10k-images-idx3-ubyte.gz
Extracting mnist\t10k-labels-idx1-ubyte.gz


In [5]:
#time

BATCH_SIZE_ = [100,250,500,1000]
FILTERS_ = [32, 64,128,256]

kernel_shape=(3, 3, 1)

conv_stride=1
pool_stride=2
pool_size = (2,2)
num_classes = 10

epochs = 100
lr = 0.1

num_threads = 2

data = (train_data,train_label, test_data, test_label)

df_results = pd.DataFrame(columns=['training_mode','num_parameters', 'filters','batch_size', 'duration','accs','epochs_time','cuda_dt_time','model'])

for filters in FILTERS_:
    for batch_size in BATCH_SIZE_:
        print('-------')
        print('filters: {}; bs: {}'.format(filters,batch_size))
       
        duration, epochs_time, accs, num_parameters,model = cnn_p_cpu.train_parallel(data, batch_size, kernel_shape, filters, \
                                                                                     conv_stride,pool_stride, pool_size, num_classes,\
                                                                                     num_threads,lr,epochs)

        df_results = df_results.append({'training_mode': 'parallel_cpu','num_parameters':num_parameters, 'filters': filters, \
                                        'batch_size':batch_size,'duration': duration,'epochs_time':epochs_time,'cuda_dt_time':np.nan,\
                                        'accs':accs,'model':model},ignore_index=True)

        print('mode:{}; duration:{}; last_acc: {}; mean_epoch: {}'.format('parallel cpu',duration,accs[-1],np.mean(epochs_time)))
        
        
        duration, epochs_time, accs, num_parameters, model = \
        cnn_s.train_sequential(data, batch_size, kernel_shape, filters, conv_stride,pool_stride, pool_size, num_classes,lr,epochs)

        df_results = df_results.append({'training_mode': 'sequential','num_parameters':num_parameters, 'filters': filters, \
                                        'batch_size': batch_size, 'duration': duration,'epochs_time':epochs_time,'cuda_dt_time':np.nan,\
                                        'accs':accs,'model':model},ignore_index=True)
        
        print('mode:{}; duration:{}; last_acc: {}; mean_epoch: {}'.format('sequential',duration,accs[-1],np.mean(epochs_time)))
         
        duration, epochs_time,data_transfer_time, accs, num_parameters, model = \
        cnn_p_gpu.train_cuda(data, batch_size, kernel_shape, filters, conv_stride,pool_stride, pool_size, num_classes,lr,epochs)
        df_results = df_results.append({'training_mode': 'parallel_cuda','num_parameters':num_parameters, 'filters': filters, \
                                        'batch_size':batch_size,'duration': duration,'epochs_time':epochs_time,'cuda_dt_time':data_transfer_time,\
                                        'accs':accs,'model':model},ignore_index=True)
        
        print('mode:{}; duration:{}; last_acc: {}; mean_epoch: {}'.format('gpu',duration,accs[-1],np.mean(epochs_time)))

df_results.to_pickle('df_results.pkl')


-------
filters: 32; bs: 100
mode:parallel cpu; duration:41.853899002075195; last_acc: 0.84; mean_epoch: 0.418509316444397
mode:sequential; duration:32.25034713745117; last_acc: 0.83; mean_epoch: 0.3224334788322449
mode:gpu; duration:30.67756223678589; last_acc: 0.87; mean_epoch: 0.30675562381744387
-------
filters: 32; bs: 250
mode:parallel cpu; duration:56.516010761260986; last_acc: 0.88; mean_epoch: 0.5651100826263428
mode:sequential; duration:62.4572491645813; last_acc: 0.904; mean_epoch: 0.6244624662399292
mode:gpu; duration:34.00773310661316; last_acc: 0.848; mean_epoch: 0.3400673294067383
-------
filters: 32; bs: 500
mode:parallel cpu; duration:87.09855914115906; last_acc: 0.896; mean_epoch: 0.8709755921363831
mode:sequential; duration:168.56968688964844; last_acc: 0.866; mean_epoch: 1.6855869674682618
mode:gpu; duration:51.147387742996216; last_acc: 0.856; mean_epoch: 0.5114542269706726
-------
filters: 32; bs: 1000
mode:parallel cpu; duration:233.2961015701294; last_acc: 0.893