In [1]:
import os
import json
import sqlite3
import pandas as pd
import datetime

In [2]:
all_events = []
for root,directories, files in os.walk('../nni'):
    for file in files:
        file_name = '{}/{}'.format(root,file)
        meta_data = file_name.split('/')
        experiment_id = meta_data[3]
        if 'log' in file_name or 'run.sh' in file_name or 'stderr' in file_name:
            continue
        if 'sqlite' in file_name:
                try:
                    conn = sqlite3.connect(file_name)
                    c = conn.cursor()
                    c.execute('SELECT * FROM TrialJobEvent')
                    result = c.fetchall()
                    all_events+=[{'time_point':  datetime.datetime.fromtimestamp(i[0]/1000),
                                  'experiment_id': experiment_id,
                                  'trial_id': i[1],
                                  'type': i[2],
                                  'queue': json.loads(i[3])['parameter_id'] if i[3] else None,
                                  'batch': json.loads(i[3])['parameters']['batch'] if i[3] else None,
                                  'hidden_factors': json.loads(i[3])['parameters']['hidden_factors'] if i[3] else None,
                                  'regularizer': json.loads(i[3])['parameters']['regularizer'] if i[3] else None,
                                 } for i in result] 
                finally:
                    conn.close()

In [3]:
all_events_df = pd.DataFrame(all_events)
all_events_df['duration'] = 0

In [4]:
for trial_id, body in all_events_df.groupby('trial_id'):
    begin_time = body[body['type']=='RUNNING']['time_point'].array
    end_time = body[body['type']=='SUCCEEDED']['time_point'].array
    if end_time.size == 0:
        all_events_df = all_events_df[all_events_df['trial_id']!=trial_id]
    else:
        all_events_df.loc[all_events_df['trial_id']==trial_id, 'duration'] = (end_time[0] - begin_time[0]).total_seconds()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [5]:
all_events_df = all_events_df[all_events_df['type']=='WAITING'].drop(columns=['type'])
all_events_df['value'] = 0
all_events_df

Unnamed: 0,time_point,experiment_id,trial_id,queue,batch,hidden_factors,regularizer,duration,value
0,2020-06-30 17:43:06.738,MrUaVQmO,nwxKc,3_0_0,512.0,8.0,-4.533964,154.509,0
3,2020-06-30 17:45:52.015,MrUaVQmO,KUJca,3_0_1,1024.0,8.0,-6.233733,95.362,0
6,2020-06-30 17:47:42.251,MrUaVQmO,XF4ZS,3_0_2,1024.0,8.0,-6.658249,94.922,0
9,2020-06-30 17:49:22.439,MrUaVQmO,w1Zey,3_0_3,1024.0,32.0,-6.754749,99.247,0
12,2020-06-30 17:51:12.637,MrUaVQmO,J06s9,3_0_4,2048.0,128.0,-4.749582,85.147,0
...,...,...,...,...,...,...,...,...,...
1064,2020-06-30 23:44:25.733,DOR2vzi7,Q2h3V,25,128.0,8.0,-8.000000,469.221,0
1067,2020-06-30 23:52:26.893,DOR2vzi7,UJjd7,26,128.0,8.0,-7.000000,466.860,0
1070,2020-07-01 00:00:23.123,DOR2vzi7,wH2QP,27,128.0,8.0,-6.000000,458.620,0
1073,2020-07-01 00:08:09.314,DOR2vzi7,mHlUn,28,128.0,8.0,-5.000000,459.169,0


In [6]:
with open('experiment_model1.json', 'r') as f:
    data_json = json.loads(f.read())

In [7]:
for k, v in data_json.items():
    for trial in v['trials']:
        if 'trial_job_id' not in trial:
            continue
        all_events_df.loc[all_events_df['trial_id']==trial['trial_job_id'], 'value'] = trial['value']

In [8]:
all_events_df

Unnamed: 0,time_point,experiment_id,trial_id,queue,batch,hidden_factors,regularizer,duration,value
0,2020-06-30 17:43:06.738,MrUaVQmO,nwxKc,3_0_0,512.0,8.0,-4.533964,154.509,0.5722249746322632
3,2020-06-30 17:45:52.015,MrUaVQmO,KUJca,3_0_1,1024.0,8.0,-6.233733,95.362,0.49950000643730164
6,2020-06-30 17:47:42.251,MrUaVQmO,XF4ZS,3_0_2,1024.0,8.0,-6.658249,94.922,0.49985000491142273
9,2020-06-30 17:49:22.439,MrUaVQmO,w1Zey,3_0_3,1024.0,32.0,-6.754749,99.247,0.49912500381469727
12,2020-06-30 17:51:12.637,MrUaVQmO,J06s9,3_0_4,2048.0,128.0,-4.749582,85.147,0.49729999899864197
...,...,...,...,...,...,...,...,...,...
1064,2020-06-30 23:44:25.733,DOR2vzi7,Q2h3V,25,128.0,8.0,-8.000000,469.221,0.7488250136375427
1067,2020-06-30 23:52:26.893,DOR2vzi7,UJjd7,26,128.0,8.0,-7.000000,466.860,0.74795001745224
1070,2020-07-01 00:00:23.123,DOR2vzi7,wH2QP,27,128.0,8.0,-6.000000,458.620,0.7438250184059143
1073,2020-07-01 00:08:09.314,DOR2vzi7,mHlUn,28,128.0,8.0,-5.000000,459.169,0.7461000084877014
