In [1]:
import os
import glob
import pprint

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 10)

In [3]:
dir_src = './logs'
dir_src2 = './logs_backup'

log_files = sorted(glob.glob('{}/*/*.log'.format(dir_src)))
print(len(log_files))

epoch_times_files = [x for x in log_files if '_times.log' in x]
comp_times_files = [x for x in log_files if 'compilation_time.log' in x]
print(len(epoch_times_files))
print(len(comp_times_files))

30
6
12


In [4]:
keras_models = [
    'densenet121',
    'densenet169',
    'inceptionv3',
    'inceptionresnetv2',
    'resnet50',
    'nasnet_large',
]

pytorch_models = [
    'DenseNet121',
    'DenseNet169',
    'Inception3',
    'InceptionResNetV2',
    'NASNet',
    'PNASNet',
    'ResNet50',
]

In [5]:
log_dirs = sorted(glob.glob('{}/*/'.format(dir_src)))
log_dirs2 = sorted(glob.glob('{}/*/'.format(dir_src2)))
log_dirs.extend(log_dirs2)


log_sets_good = []
log_set_keras = []
log_set_pytorch = []

for d in log_dirs:
    log_dir_files = sorted(glob.glob('{}/*.log'.format(d)))
    if len(log_dir_files) == 3 or len(log_dir_files) == 2:
        model_name = d.split('/')[-2].split('_')[0]
        if model_name in pytorch_models:
            log_set_pytorch.append(d)
        else:
            log_set_keras.append(d)
        log_sets_good.append(d)

In [6]:
df_keras = []

for dk in log_set_keras:
    
    keras_dir = dk
    keras_logs = sorted(glob.glob(keras_dir + '*.log'))
    compil_logs = [x for x in keras_logs if 'compilation' in x][0]
    run_name = keras_dir.split('/')[-2]
    log_file = '{}_times.log'.format(run_name)

    try:
        keras_epochs = pd.read_csv(keras_dir + log_file, header=None)
        keras_compil = pd.read_csv(compil_logs, header=None).iloc[0, 0]
        
        keras_run_df = keras_epochs.copy()
        keras_run_df.columns = ['epoch', 'epoch_time']
        keras_run_df['compilation_time'] = keras_compil
        keras_run_df['run_name'] = run_name
        keras_run_df['framework'] = 'keras'

        df_keras.append(keras_run_df)
    
    except Exception as e:
        continue


df_keras = pd.concat(df_keras, ignore_index=True, sort=False)

In [7]:
df_pytorch = []

for dk in log_set_pytorch:
    
    pytorch_dir = dk
    pytorch_logs = sorted(glob.glob(pytorch_dir + '*.log'))
    compil_logs = [x for x in pytorch_logs if 'compilation' in x][0]
    run_name = pytorch_dir.split('/')[-2]
    log_file = '{}.log'.format(run_name)

    try:
        pytorch_epochs = pd.read_csv(pytorch_dir + log_file, header=None)
        pytorch_compil = pd.read_csv(compil_logs, header=None).iloc[0, 0]
        
        pytorch_run_df = pytorch_epochs.copy()
        pytorch_run_df.columns = ['epoch', 'epoch_time']
        pytorch_run_df['compilation_time'] = pytorch_compil
        pytorch_run_df['run_name'] = run_name
        pytorch_run_df['framework'] = 'pytorch'

        df_pytorch.append(pytorch_run_df)
    
    except Exception as e:
        continue


df_pytorch = pd.concat(df_pytorch, ignore_index=True, sort=False)
df_pytorch['run_name'] = df_pytorch['run_name'].str.lower()

In [8]:
df_combined = pd.concat([df_keras, df_pytorch], ignore_index=True, sort=False)
df_combined['model_name'] = df_combined['run_name'].apply(lambda x: x.split('_')[0])
df_combined['image_size'] = df_combined['run_name'].apply(lambda x: int(x.split('_')[1].replace('size', '')))
df_combined['batch_size'] = df_combined['run_name'].apply(lambda x: int(x.split('_')[2].replace('batch', '')))
df_combined['trial_num'] = df_combined['run_name'].apply(lambda x: int(x.split('_')[-1]))

df_combined['model_name'] = df_combined.model_name.replace('inception3', 'inceptionv3')
df_combined.loc[
    df_combined['framework'] == 'keras', 'epoch'] = df_combined.loc[
    df_combined['framework'] == 'keras', 'epoch'] - 1

df_combined.head()

Unnamed: 0,epoch,epoch_time,compilation_time,run_name,framework,model_name,image_size,batch_size,trial_num
0,0,414.981912,19.06425,inceptionresnetv2_size299_batch12_trial_0,keras,inceptionresnetv2,299,12,0
1,1,383.155514,19.06425,inceptionresnetv2_size299_batch12_trial_0,keras,inceptionresnetv2,299,12,0
2,2,384.261925,19.06425,inceptionresnetv2_size299_batch12_trial_0,keras,inceptionresnetv2,299,12,0
3,3,384.001226,19.06425,inceptionresnetv2_size299_batch12_trial_0,keras,inceptionresnetv2,299,12,0
4,4,384.141808,19.06425,inceptionresnetv2_size299_batch12_trial_0,keras,inceptionresnetv2,299,12,0


In [9]:
df_epochs = df_combined.groupby(
    ['model_name', 'framework', 'batch_size'])['epoch_time'].mean().reset_index()

df_compils = df_combined.groupby(
    ['model_name', 'framework', 'batch_size'])['compilation_time'].mean().reset_index()

df_first_epoch = df_combined.groupby(
    ['epoch', 'model_name', 'framework', 'batch_size'])['epoch_time'].mean().reset_index()
df_first_epoch = df_first_epoch.loc[df_first_epoch['epoch'] == 0, :]

df_non_first = df_combined.groupby(
    ['epoch', 'model_name', 'framework', 'batch_size'])['epoch_time'].mean().reset_index()
df_non_first = df_non_first.loc[df_non_first['epoch'] != 0, :]
df_non_first = df_non_first.groupby(
    ['model_name', 'framework', 'batch_size']).mean().reset_index()
df_non_first = df_non_first.drop(['epoch'], axis=1)


df_epochs = df_epochs.sort_values(['model_name', 'framework', 'batch_size'])
df_first_epoch = df_first_epoch.sort_values(['model_name', 'framework', 'batch_size'])
df_compils = df_compils.sort_values(['model_name', 'framework', 'batch_size'])
df_non_first = df_non_first.sort_values(['model_name', 'framework', 'batch_size'])


df_epochs = df_epochs.drop([5, 7, 14]).reset_index(drop=True)
df_compils = df_compils.drop([5, 7, 14]).reset_index(drop=True)
df_first_epoch = df_first_epoch.drop([5, 7, 14]).reset_index(drop=True)
df_non_first = df_non_first.drop([5, 7, 14]).reset_index(drop=True)


assert np.all(df_compils.iloc[:, :-1].values == df_first_epoch.iloc[:, 1:-1].values)
assert np.all(df_epochs.iloc[:, :-1].values == df_first_epoch.iloc[:, 1:-1].values)
assert np.all(df_epochs.iloc[:, :-1].values == df_non_first.iloc[:, :-1].values)

In [10]:
df_epochs.to_csv('df_epoch_times.csv', index=False)
df_compils.to_csv('df_compilation_times.csv', index=False)
df_first_epoch.to_csv('df_first_epoch.csv', index=False)
df_non_first.to_csv('df_non_first.csv', index=False)

df_combined.to_csv('df_combined_times.csv', index=False)