In [1]:
import os
import json
import sqlite3
import pandas as pd
import datetime

In [2]:
all_events = []
for root,directories, files in os.walk('../nni'):
    for file in files:
        file_name = '{}/{}'.format(root,file)
        meta_data = file_name.split('/')
        experiment_id = meta_data[3]
        if 'log' in file_name or 'run.sh' in file_name or 'stderr' in file_name:
            continue
        if 'sqlite' in file_name:
                try:
                    conn = sqlite3.connect(file_name)
                    c = conn.cursor()
                    c.execute('SELECT * FROM TrialJobEvent')
                    result = c.fetchall()
                    all_events+=[{'time_point':  datetime.datetime.fromtimestamp(i[0]/1000),
                                  'experiment_id': experiment_id,
                                  'trial_id': i[1],
                                  'type': i[2],
                                  'queue': json.loads(i[3])['parameter_id'] if i[3] else None,
                                  'batch': json.loads(i[3])['parameters']['batch'] if i[3] else None,
                                  'hidden_factors': json.loads(i[3])['parameters']['hidden_factors'] if i[3] else None,
                                  'regularizer': json.loads(i[3])['parameters']['regularizer'] if i[3] else None,
                                 } for i in result] 
                finally:
                    conn.close()

In [3]:
all_events_df = pd.DataFrame(all_events)
all_events_df['duration'] = 0

In [4]:
for trial_id, body in all_events_df.groupby('trial_id'):
    begin_time = body[body['type']=='RUNNING']['time_point'].array
    end_time = body[body['type']=='SUCCEEDED']['time_point'].array
    if end_time.size == 0:
        all_events_df = all_events_df[all_events_df['trial_id']!=trial_id]
    else:
        all_events_df.loc[all_events_df['trial_id']==trial_id, 'duration'] = (end_time[0] - begin_time[0]).total_seconds()

In [5]:
all_events_df = all_events_df[all_events_df['type']=='WAITING'].drop(columns=['type'])
all_events_df['value'] = 0
all_events_df['method'] = ''
all_events_df

Unnamed: 0,time_point,experiment_id,trial_id,queue,batch,hidden_factors,regularizer,duration,value
0,2020-07-12 14:08:30.026,zgNIwuIH,K4kLH,0,64.0,64.0,-4.299037,10930.493,0
2,2020-07-12 14:10:30.212,zgNIwuIH,pu8pT,1,256.0,128.0,-5.671608,4528.161,0
3,2020-07-12 14:10:30.216,zgNIwuIH,sdDyq,2,512.0,32.0,-6.808246,1756.478,0
4,2020-07-12 14:10:30.219,zgNIwuIH,kwhLc,3,256.0,128.0,-6.859900,4074.489,0
9,2020-07-12 14:39:56.768,zgNIwuIH,ybIuh,4,64.0,16.0,-7.222800,10622.249,0
...,...,...,...,...,...,...,...,...,...
972,2020-07-11 12:31:36.254,kED1A1n8,H6OOX,25,128.0,4.0,-5.000000,5500.077,0
975,2020-07-11 12:35:17.593,kED1A1n8,hJtpA,26,128.0,4.0,-5.000000,5459.160,0
978,2020-07-11 12:37:53.554,kED1A1n8,MUnws,27,128.0,4.0,-4.000000,5467.229,0
981,2020-07-11 12:43:00.375,kED1A1n8,HRFZI,28,128.0,4.0,-4.000000,5554.405,0


In [6]:
with open('experiment_model2.json', 'r') as f:
    data_json = json.loads(f.read())

In [11]:
for k, v in data_json.items():
    for trial in v['trials']:
        if 'trial_job_id' not in trial:
            continue
        all_events_df.loc[all_events_df['trial_id']==trial['trial_job_id'], 'value'] = trial['value']
        all_events_df.loc[all_events_df['trial_id']==trial['trial_job_id'], 'method'] = v['method']

In [19]:
for method, data in all_events_df.groupby('method'):
    sum(data['duration'])

Unnamed: 0,time_point,experiment_id,trial_id,queue,batch,hidden_factors,regularizer,duration,value,method
270,2020-07-06 18:24:24.796,aw3gLZDK,HOtpZ,0,64.0,8.0,-4.456029,12032.856,0.746980607509613,TPE
272,2020-07-06 21:10:26.668,aw3gLZDK,r9Gao,1,64.0,128.0,-6.441745,13966.376,0.8303606510162354,TPE
274,2020-07-06 21:24:39.118,aw3gLZDK,rJTgS,2,256.0,8.0,-6.445642,2874.11,0.8290109038352966,TPE
277,2020-07-06 21:45:09.018,aw3gLZDK,TkkpM,3,512.0,4.0,-6.431288,1517.675,0.8169384598731995,TPE
280,2020-07-06 22:10:35.433,aw3gLZDK,Rp3Zm,4,64.0,16.0,-4.335428,10337.392,0.7535792589187622,TPE
283,2020-07-06 22:12:40.951,aw3gLZDK,Woffn,5,2048.0,8.0,-4.151452,603.434,0.8273362517356873,TPE
286,2020-07-06 22:22:53.599,aw3gLZDK,DToRG,6,128.0,64.0,-6.396892,5369.885,0.8284485340118408,TPE
289,2020-07-06 23:52:32.552,aw3gLZDK,sQaf8,7,64.0,16.0,-6.542312,10532.415,0.8236545324325562,TPE
292,2020-07-07 01:02:59.654,aw3gLZDK,PWoPo,8,1024.0,4.0,-4.630834,960.849,0.8102323412895203,TPE
295,2020-07-07 01:03:24.752,aw3gLZDK,GuYmV,9,1024.0,4.0,-6.392019,974.244,0.8148788809776306,TPE
