### Experiment environment

- Please copy all of your logs file at `log` directory. 

In [1]:
import os
import pandas as pd
import numpy as np
import re
import math

In [2]:
# Save files here
fileList = list()

class modelNode:
    def __init__(self, exp, date, time, task, net):
        #File info 
        self.exp = exp
        self.date = date
        self.time = time
        self.task = task
        self.net = net
        
        #Every epoch result
        self.tacc = list()
        self.tloss = list()
        self.vloss = list()
        self.vacc1 = list()
        self.vacc2 = list()
        self.vacc3 = list()
        self.vacc4 = list()
        self.tppl = list()
        self.vppl = list()
        self.sps = list()

### Load files and parse texts

In [3]:
import pdb

In [4]:
for filename in os.listdir(os.getcwd()+'/log'):
    with open(os.path.join(os.getcwd()+'/log', filename), 'r') as f: # open in readonly mode
        # Parse file name
        if filename == '.DS_Store':
            continue
        info = filename.split()
        info = list(map(lambda x: x.rstrip('.log'), info))
        modelResult = modelNode(info[0], info[1], info[2], info[3], info[4])
        
        # Parse every sentence
        counter = 1
        flag = False
        for sen in f:
            if re.search("Done", sen):
                break
            
            if re.search("Epoch #1:", sen) is not None:
                flag = True
            
            if flag and sen != "\n":
                sen = re.split(":|=", sen)

                cond, value = sen[0].strip(), sen[1].strip()

                if cond == "train seq acc":
                    modelResult.tacc.append(value)
                elif cond == "train loss":
                    modelResult.tloss.append(value)
                elif cond == "Current LR":
                    pass
                elif cond == "Training Perplexity":
                    modelResult.tppl.append(value)
                elif cond == "Train sequences per second":
                    modelResult.sps.append(value)
                elif cond.lower() == "validation loss":
                    modelResult.vloss.append(value)
                elif cond == "Perplexity":
                    modelResult.vppl.append(value)
                elif re.search("val accuracy at", cond) and counter == 1: # Val acc at first digit
                    modelResult.vacc1.append(value)
                    counter +=1 
                elif re.search("val accuracy at", cond) and counter == 2: # Val acc at second digit
                    modelResult.vacc2.append(value)
                    counter +=1
                elif re.search("val accuracy at", cond) and counter == 3: # Val acc at third digit
                    modelResult.vacc3.append(value)
                    counter +=1
                elif re.search("val accuracy at", cond) and counter == 4: # Val acc at fourth digit
                    modelResult.vacc4.append(value)
                    counter = 1
                elif cond == "Validation seq acc":
                    modelResult.vacc1.append(value)
                
        fileList.append(modelResult)

### Data preprocessing

In [5]:
fileList.sort(key = lambda x: int(x.exp))

In [6]:
def findKrankIdx(l, k=3):
    
    l2 = list()
    for i, v in enumerate(l):
        l2.append((i, v))
    l2.sort(key=lambda x: x[1])

    return l2[k][0]

In [7]:
f_exp, f_date, f_time, f_task, f_net, f_tacc, f_tloss, f_vloss, f_vacc1, f_vacc2, f_vacc3, f_vacc4, f_tppl, f_vppl, f_sps = list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list()
f_tbpc, f_vbpc = list(), list()

In [8]:
for i, v in enumerate(fileList):
    med_vloss = findKrankIdx(fileList[i].vloss[10:]) + 10 # Choose from epoch 10
        
    f_exp.append(fileList[i].exp)
    f_date.append(fileList[i].date)
    f_time.append(fileList[i].time)
    f_task.append(fileList[i].task)
    f_net.append(fileList[i].net)

    f_tacc.append(fileList[i].tacc[med_vloss])
    f_tloss.append(fileList[i].tloss[med_vloss])
    f_vloss.append(fileList[i].vloss[med_vloss])
    f_vacc1.append(fileList[i].vacc1[med_vloss])
    f_sps.append(np.mean(list(map(lambda x: float(x), fileList[i].sps[11:21])))) # Choose from epoch 11~20

    if fileList[i].task == 'listops': 
        f_vacc2.append('')
        f_vacc3.append('')
        f_vacc4.append('')
        f_tppl.append('')
        f_vppl.append('')
        
    else:
        med_vppl = findKrankIdx(fileList[i].vppl[10:]) + 10
        f_tppl.append(fileList[i].tppl[med_vppl])
        f_vppl.append(fileList[i].vppl[med_vppl])
        
        f_vacc2.append(fileList[i].vacc2[med_vloss])
        f_vacc3.append(fileList[i].vacc3[med_vloss])
        f_vacc4.append(fileList[i].vacc4[med_vloss])
    
    if fileList[i].task == 'ptbc':  
        f_tbpc.append(math.log(float(fileList[i].tppl[med_vppl]), 2))
        f_vbpc.append(math.log(float(fileList[i].vppl[med_vppl]), 2))
    else:
        f_tbpc.append('')
        f_vbpc.append('')

### Add to Dataframe 

In [9]:
d = {'exp': f_exp, 
     'date': f_date,
     'time': f_time,
     'task': f_task,
     'net': f_net,
     'tacc': f_tacc,
     'tloss': f_tloss,
     'vloss': f_vloss,
     'vacc1': f_vacc1,
      'vacc2': f_vacc2,
      'vacc3': f_vacc3,
      'vacc4': f_vacc4,
      'tppl': f_tppl,
       'vppl': f_vppl,
       'sps': f_sps,
        'tbpc': f_tbpc,
     'vbpc': f_vbpc
    }

df = pd.DataFrame(data=d)
df

Unnamed: 0,exp,date,time,task,net,tacc,tloss,vloss,vacc1,vacc2,vacc3,vacc4,tppl,vppl,sps,tbpc,vbpc
0,1,2020-05-23,04:25:16,fib,nam,0.995859375,0.00043219816982059456,0.04091186392770396,0.9791666666666666,0.890625,0.7083333333333334,0.484375,1.0018307737848597,1.1088199465994881,121.550001,,
1,2,2020-05-23,07:26:23,fib,xlnet,0.3347265625,0.14576990044210106,1.7062653352816899,0.0,0.0,0.0,0.0,1.6662879336993384,115.9086124304987,95.966771,,
2,3,2020-05-23,11:17:20,fib,gru,0.1883984375,0.19598466011695564,0.43526421673595905,0.0,0.0,0.0,0.0,2.3109643061194474,3.0042279345091414,263.245679,,
3,4,2020-05-23,12:40:16,fib,lstm,0.167421875,0.2172097153775394,0.4482390731573105,0.0,0.0,0.0,0.0,2.5293892560383826,3.1029044488586974,566.426348,,
4,5,2020-05-23,13:19:19,fib,tf,0.8341015625,0.024261264608066994,1.8048786868651707,0.03125,0.0,0.0,0.0,1.0416901563999639,129.20897733088626,158.702251,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,90,2021-01-26,00:00:00,listops,xlnet,0.518125,1.2606626984477043,1.616836417466402,0.4580078125,,,,,,259.158081,,
82,91,2021-01-26,00:00:00,listops,dnc,0.1203125,2.2996992754936216,2.9773376882076263,0.16552734375,,,,,,43.884923,,
83,92,2021-01-26,00:00:00,listops,ut,0.476796875,1.4063901436328887,1.5789174884557724,0.400390625,,,,,,923.426247,,
84,93,2021-01-26,00:00:00,ptbc,ut,0.23029451301478676,0.05192563007949502,0.052195824382689174,0.19654605263157895,0.19172297297297297,0.19510135135135134,0.192573402417962,5.757280798727529,12.998904474716964,106.121693,2.52539,3.70032


### Save into csv file

In [10]:
df.to_csv("2021-icml-all.csv", index=False)