### Experiment environment

- Please copy all of your logs file at `log` directory. 

In [7]:
import os
import pandas as pd
import numpy as np
import re
import math

In [8]:
# Save files here
fileList = list()

class modelNode:
    def __init__(self, exp, date, time, task, net):
        #File info 
        self.exp = exp
        self.date = date
        self.time = time
        self.task = task
        self.net = net
        
        #Every epoch result
        self.tacc = list()
        self.tloss = list()
        self.vloss = list()
        self.vacc1 = list()
        self.vacc2 = list()
        self.vacc3 = list()
        self.vacc4 = list()
        self.tppl = list()
        self.vppl = list()
        self.sps = list()

### Load files and parse texts

In [9]:
for filename in os.listdir(os.getcwd()+'/log'):
    with open(os.path.join(os.getcwd()+'/log', filename), 'r') as f: # open in readonly mode
        # Parse file name
        if filename == '.DS_Store':
            continue
        info = filename.split()
        info = list(map(lambda x: x.rstrip('.log'), info))
        modelResult = modelNode(info[0], info[1], info[2], info[3], info[4])
        
        # Parse every sentence
        counter = 1
        flag = False
        for sen in f:
            if re.search("Epoch #1:", sen) is not None:
                flag = True
            
            if flag and sen != "\n":
                sen = re.split(":|=", sen)
                cond, value = sen[0].strip(), sen[1].strip()

                if cond == "train seq acc":
                    modelResult.tacc.append(value)
                elif cond == "train loss":
                    modelResult.tloss.append(value)
                elif cond == "Current LR":
                    pass
                elif cond == "Training Perplexity":
                    modelResult.tppl.append(value)
                elif cond == "Train sequences per second":
                    modelResult.sps.append(value)
                elif cond == "validation loss":
                    modelResult.vloss.append(value)
                elif cond == "Perplexity":
                    modelResult.vppl.append(value)
                elif re.search("val accuracy at", cond) and counter == 1: # Val acc at first digit
                    modelResult.vacc1.append(value)
                    counter +=1 
                elif re.search("val accuracy at", cond) and counter == 2: # Val acc at second digit
                    modelResult.vacc2.append(value)
                    counter +=1
                elif re.search("val accuracy at", cond) and counter == 3: # Val acc at third digit
                    modelResult.vacc3.append(value)
                    counter +=1
                elif re.search("val accuracy at", cond) and counter == 4: # Val acc at fourth digit
                    modelResult.vacc4.append(value)
                    counter = 1
                
        fileList.append(modelResult)

### Data preprocessing

In [10]:
fileList.sort(key = lambda x: int(x.exp))

In [11]:
def findMedianIdx(l):
    med = np.argpartition(l, -3)[:3][2]
    return med

In [12]:
f_exp, f_date, f_time, f_task, f_net, f_tacc, f_tloss, f_vloss, f_vacc1, f_vacc2, f_vacc3, f_vacc4, f_tppl, f_vppl, f_sps = list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list(), list()
f_tbpc, f_vbpc = list(), list()

In [13]:
for i, v in enumerate(fileList):
    med_vloss = findMedianIdx(fileList[i].vloss[10:]) + 10 # Choose from epoch 10
    med_vppl = findMedianIdx(fileList[i].vppl[10:]) + 10
    
    f_exp.append(fileList[i].exp)
    f_date.append(fileList[i].date)
    f_time.append(fileList[i].time)
    f_task.append(fileList[i].task)
    f_net.append(fileList[i].net)

    f_tacc.append(fileList[i].tacc[med_vloss])
    f_tloss.append(fileList[i].tloss[med_vloss])
    f_vloss.append(fileList[i].vloss[med_vloss])
    f_vacc1.append(fileList[i].vacc1[med_vloss])
    f_vacc2.append(fileList[i].vacc2[med_vloss])
    f_vacc3.append(fileList[i].vacc3[med_vloss])
    f_vacc4.append(fileList[i].vacc4[med_vloss])
    f_tppl.append(fileList[i].tppl[med_vppl])
    f_vppl.append(fileList[i].vppl[med_vppl])
    f_sps.append(np.mean(list(map(lambda x: float(x), fileList[i].sps[11:21])))) # Choose from epoch 11~20
    
    if fileList[i].task == 'ptbc':  
        f_tbpc.append(math.log(float(fileList[i].tppl[med_vppl]), 2))
        f_vbpc.append(math.log(float(fileList[i].vppl[med_vppl]), 2))
    else:
        f_tbpc.append('')
        f_vbpc.append('')

### Add to Dataframe 

In [14]:
d = {'exp': f_exp, 
     'date': f_date,
     'time': f_time,
     'task': f_task,
     'net': f_net,
     'tacc': f_tacc,
     'tloss': f_tloss,
     'vloss': f_vloss,
     'vacc1': f_vacc1,
      'vacc2': f_vacc2,
      'vacc3': f_vacc3,
      'vacc4': f_vacc4,
      'tppl': f_tppl,
       'vppl': f_vppl,
       'sps': f_sps,
        'tbpc': f_tbpc,
     'vbpc': f_vbpc
    }

df = pd.DataFrame(data=d)
df

Unnamed: 0,exp,date,time,task,net,tacc,tloss,vloss,vacc1,vacc2,vacc3,vacc4,tppl,vppl,sps,tbpc,vbpc
0,1,2020-05-23,04:25:16,fib,nam,0.99828125,0.0002540913072721,0.0361354093414168,0.96875,0.8958333333333334,0.7083333333333334,0.515625,1.0010780163841762,1.0955011498337413,121.550001,,
1,2,2020-05-23,07:26:23,fib,xlnet,0.297578125,0.1566684847325086,1.689033105969429,0.0,0.0,0.0,0.0,1.0462456852337152,1092.3656841111276,95.966771,,
2,3,2020-05-23,11:17:20,fib,gru,0.1911328125,0.1927730013150722,0.4343657412876685,0.0,0.0,0.0,0.0,2.279825938648185,2.997452519783134,263.245679,,
3,4,2020-05-23,12:40:16,fib,lstm,0.1699609375,0.2163827087823301,0.4477322834233443,0.0,0.0,0.0,0.0,2.5206723699454554,3.09897414267014,566.426348,,
4,5,2020-05-23,13:19:19,fib,tf,0.7808203125,0.0349822247424162,1.76351531346639,0.0,0.0,0.0,0.0,1.0310152719005714,124.42934719618036,158.702251,,
5,6,2020-05-23,15:38:41,arith,nam,0.9887890625,0.0013857633226803,0.0275539451880225,0.9270833333333334,0.875,0.7447916666666666,0.5520833333333334,1.0055164139972443,1.0696006489338816,123.523617,,
6,7,2020-05-23,18:36:25,arith,xlnet,0.5573828125,0.0642128897644579,2.464522868394852,0.0,0.0,0.0,0.0,1.1579966541797189,1125.9021610840816,95.709056,,
7,8,2020-05-23,19:20:03,arith,gru,0.1524609375,0.2121080620959401,0.4728956036269665,0.0,0.0,0.0,0.0,2.356660327492386,3.1725770035419307,216.652554,,
8,9,2020-05-23,21:00:24,arith,lstm,0.171171875,0.2199993622489273,0.4542794115841388,0.0,0.0,0.0,0.0,2.430296312288514,3.0308987747240854,473.167772,,
9,10,2020-05-23,22:51:59,arith,tf,0.8738671875,0.0167705991676484,2.041284720102946,0.0520833333333333,0.03125,0.0,0.0,1.0698991761221,146.3175749549045,365.823415,,


### Save into csv file

In [15]:
df.to_csv("2020-nips-v1.csv", index=False)