Import libraries

In [1]:
import re
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt 

def delete_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5*IQR
    upper = Q3 + 1.5*IQR

    upper_array = np.where(df[column] >= upper)[0]
    lower_array = np.where(df[column] <= lower)[0]

    df.drop(index=upper_array, inplace=True)
    df.drop(index=lower_array, inplace=True)

    print("New Shape: ", df.shape)
    sns.boxplot(df[column])
    return df

def parse_pdes(file):
    file = open(file, mode = 'r')
    lines = file.readlines()
    file.close()

    start_time  = []
    end_time    = []
    total_time  = []
    kernels     = []
    kernel_id   = []
    lp_kernel   = []
    threads     = []
    executed    = []
    commited    = []
    reprocessed = []
    rollbacks   = []
    antimessages= []
    r_frequency = []
    r_length    = []
    efficiency  = []
    event_cost  = []
    ema         = []
    checkpoint  = []
    recovery    = []
    log_size    = []
    idle_cycles = []
    gvt         = []
    gvt_redux   = []
    time_speed  = []
    memory      = []
    peak_memory = []

    for line in lines:
        tmp = re.search(r"SIMULATION STARTED AT ..... : (.*)", line)
        if tmp: 
            start_time.append(tmp.group(1))
            continue

        tmp = re.search(r"SIMULATION FINISHED AT .... : (.*)", line)
        if tmp:
            end_time.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL SIMULATION TIME ..... : (.*)", line)
        if tmp: 
            total_time.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL KERNELS ............. : (.*)", line)
        if tmp: 
            kernels.append(tmp.group(1))
            continue

        tmp = re.search(r"KERNEL ID ................. : (.*)", line)
        if tmp: 
            kernel_id.append(tmp.group(1))
            continue

        tmp = re.search(r"LPs HOSTED BY KERNEL....... : (.*)", line)
        if tmp:
            lp_kernel.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL_THREADS ............. : (.*)", line)
        if tmp:
            threads.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL EXECUTED EVENTS ..... : (.*)", line)
        if tmp:
            executed.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL COMMITTED EVENTS..... : (.*)", line)
        if tmp:
            commited.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL REPROCESSED EVENTS... : (.*)", line)
        if tmp:
            reprocessed.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL ROLLBACKS EXECUTED... : (.*)", line)
        if tmp:
            rollbacks.append(tmp.group(1))
            continue

        tmp = re.search(r"TOTAL ANTIMESSAGES......... : (.*)", line)
        if tmp:
            antimessages.append(tmp.group(1))
            continue

        tmp = re.search(r"ROLLBACK FREQUENCY......... : (.*)", line)
        if tmp:
            r_frequency.append(tmp.group(1))
            continue

        tmp = re.search(r"ROLLBACK LENGTH............ : (.*)", line)
        if tmp:
            r_length.append(tmp.group(1))
            continue

        tmp = re.search(r"EFFICIENCY................. : (.*)", line)
        if tmp:
            efficiency.append(tmp.group(1))
            continue

        tmp = re.search(r"AVERAGE EVENT COST .EMA.... : (.*)", line)
        if tmp:
            ema.append(tmp.group(1))
            continue
        
        tmp = re.search(r"AVERAGE EVENT COST......... : (.*)", line)
        if tmp:
            event_cost.append(tmp.group(1))
            continue

        tmp = re.search(r"AVERAGE CHECKPOINT COST.... : (.*)", line)
        if tmp:
            checkpoint.append(tmp.group(1))
            continue

        tmp = re.search(r"AVERAGE RECOVERY COST...... : (.*)", line)
        if tmp:
            recovery.append(tmp.group(1))
            continue

        tmp = re.search(r"AVERAGE LOG SIZE........... : (.*)", line)
        if tmp:
            log_size.append(tmp.group(1))
            continue

        tmp = re.search(r"IDLE CYCLES................ : (.*)", line)
        if tmp:
            idle_cycles.append(tmp.group(1))
            continue

        tmp = re.search(r"LAST COMMITTED GVT ........ : (.*)", line)
        if tmp:
            gvt.append(tmp.group(1))
            continue

        tmp = re.search(r"NUMBER OF GVT REDUCTIONS... : (.*)", line)
        if tmp:
            gvt_redux.append(tmp.group(1))
            continue

        tmp = re.search(r"SIMULATION TIME SPEED...... : (.*)", line)
        if tmp:
            time_speed.append(tmp.group(1))
            continue

        tmp = re.search(r"AVERAGE MEMORY USAGE....... : (.*)", line)
        if tmp:
            memory.append(tmp.group(1))
            continue

        tmp = re.search(r"PEAK MEMORY USAGE.......... : (.*)", line)
        if tmp:
            peak_memory.append(tmp.group(1))
            continue 

    
    df = pd.DataFrame({
        "Start time"                : start_time,
        "Finish time"               : end_time,
        "Total time"                : total_time,
        "Total kernels"             : kernels,
        "Kernel id"                 : kernel_id,
        "LPs hosted by kernel"      : lp_kernel,
        "Total threads"             : threads,
        "Total executed events"     : executed,
        "Total commited events"     : commited,
        "Total reprocessed events"  : reprocessed,
        "Total rollbacks"           : rollbacks,
        "Total antimessages"        : antimessages,
        "Rollback frequency"        : r_frequency,
        "Rollback length"           : r_length,
        "Efficiency"                : efficiency,
        "Average event cost"        : event_cost,
        "Average event cost (EMA)"  : ema,
        "Average checkpoint cost"   : checkpoint,
        "Average recovery cost"     : recovery,
        "Average log size"          : log_size,
        "Idle cycles"               : idle_cycles,
        "Last commited gvt"         : gvt,
        "Number of gvt reductions"  : gvt_redux,
        "Simulation time speed"     : time_speed,
        "Average memory usage"      : memory,
        "Peak memory usage"         : peak_memory
    })
        
    print (df)
    cols = ["Total kernels","Kernel id","LPs hosted by kernel","Total threads","Total executed events","Total commited events","Total reprocessed events","Idle cycles","Last commited gvt","Number of gvt reductions"]
    df[cols] = df[cols].apply(pd.to_numeric)
    df['Total time'] = df['Total time'].str.replace('seconds', '')
    df['Total time'] = df['Total time'].apply(pd.to_numeric)
    
    return df

def parse_pdes_wr(file_name, events):
    file = open(file_name, mode = 'r')
    lines = file.readlines()
    file.close()

    total_time  = []
    commited    = []


    for line in lines:

        tmp = re.search(r"committed "+f"{events}"+r" events in (.*),", line)
        if tmp: 
            total_time.append(tmp.group(1))
            continue



    
    df_wr = pd.DataFrame({
        "Total time"                : total_time
    })

    df_wr["Total commited events"] = events
        
    print (df_wr)
    
    df_wr['Total time'] = df_wr['Total time'].str.replace('s', '')
    df_wr['Total time'] = df_wr['Total time'].apply(pd.to_numeric)
    
    df_wr.info(verbose=True)
    
    return df_wr

# Time warp

### Data cleaning

In [2]:
df = parse_pdes('test_tw_2_16_10000.o')
df.info(verbose=True)
#df = delete_outliers(df,'Total commited events')
#df = delete_outliers(df,'Total time')

              Start time           Finish time      Total time Total kernels  \
0   2024-12-04 11:56:20   2024-12-04 11:56:24   3.885 seconds             1    
1   2024-12-04 11:56:24   2024-12-04 11:56:28   3.983 seconds             1    
2   2024-12-04 11:56:28   2024-12-04 11:56:34   6.029 seconds             1    
3   2024-12-04 11:56:35   2024-12-04 11:56:38   2.991 seconds             1    
4   2024-12-04 11:56:38   2024-12-04 11:56:42   3.994 seconds             1    
..                   ...                   ...             ...           ...   
95  2024-12-04 12:03:01   2024-12-04 12:03:06   4.938 seconds             1    
96  2024-12-04 12:03:07   2024-12-04 12:03:10   3.970 seconds             1    
97  2024-12-04 12:03:11   2024-12-04 12:03:14   2.949 seconds             1    
98  2024-12-04 12:03:14   2024-12-04 12:03:17   2.983 seconds             1    
99  2024-12-04 12:03:17   2024-12-04 12:03:21   4.009 seconds             1    

   Kernel id LPs hosted by kernel Total

In [3]:
df.describe()

Unnamed: 0,Total time,Total kernels,Kernel id,LPs hosted by kernel,Total threads,Total executed events,Total commited events,Total reprocessed events,Idle cycles,Last commited gvt,Number of gvt reductions
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,3.93204,1.0,0.0,16.0,2.0,228348.4,81808.48,109730.26,6115814.0,5120.768951,2.95
std,1.420585,0.0,0.0,0.0,0.0,136371.632579,50351.893539,65847.706599,2295452.0,3151.955003,1.416889
min,2.855,1.0,0.0,16.0,2.0,18954.0,6538.0,8953.0,3374369.0,405.461584,2.0
25%,2.983,1.0,0.0,16.0,2.0,139551.75,48609.0,66941.5,4633524.0,3051.013352,2.0
50%,3.4785,1.0,0.0,16.0,2.0,189183.0,66611.0,91860.5,5303054.0,4160.413396,2.5
75%,3.99425,1.0,0.0,16.0,2.0,297253.25,103041.0,135271.0,6855894.0,6488.59127,3.0
max,11.011,1.0,0.0,16.0,2.0,782535.0,287303.0,384567.0,17994560.0,18030.795175,10.0


#### temp

In [None]:
df_tw_2_2  = pd.DataFrame(df[df['LPs hosted by kernel']==2])
 
df_tw_4_2  = pd.DataFrame(df[(df['LPs hosted by kernel']==4) & (df['Total threads']==2)])
df_tw_4_3  = pd.DataFrame(df[(df['LPs hosted by kernel']==4) & (df['Total threads']==3)])
df_tw_4_4  = pd.DataFrame(df[(df['LPs hosted by kernel']==4) & (df['Total threads']==4)])
 
df_tw_8_2  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==2)] )
df_tw_8_3  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==3)] )
df_tw_8_4  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==4)] )
df_tw_8_5  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==5)] )
df_tw_8_6  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==6)] )
df_tw_8_7  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==7)] )
df_tw_8_8  = pd.DataFrame(df[(df['LPs hosted by kernel']==8) & (df['Total threads']==8)] )

df_tw_16_2  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==2 )] )
df_tw_16_3  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==3 )] )
df_tw_16_4  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==4 )] )
df_tw_16_5  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==5 )] )
df_tw_16_6  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==6 )] )
df_tw_16_7  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==7 )] )
df_tw_16_8  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==8 )] )
df_tw_16_9  = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==9 )] )
df_tw_16_10 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==10)] )
df_tw_16_11 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==11)] )
df_tw_16_12 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==12)] )
df_tw_16_13 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==13)] )
df_tw_16_14 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==14)] )
df_tw_16_15 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==15)] )
df_tw_16_16 = pd.DataFrame(df[(df['LPs hosted by kernel']==16) & (df['Total threads']==16)] )

df_tw_32_2  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==2 )])
df_tw_32_3  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==3 )])
df_tw_32_4  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==4 )])
df_tw_32_5  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==5 )])
df_tw_32_6  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==6 )])
df_tw_32_7  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==7 )])
df_tw_32_8  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==8 )])
df_tw_32_9  = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==9 )])
df_tw_32_10 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==10)])
df_tw_32_11 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==11)])
df_tw_32_12 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==12)])
df_tw_32_13 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==13)])
df_tw_32_14 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==14)])
df_tw_32_15 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==15)])
df_tw_32_16 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==16)])
df_tw_32_17 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==17)])
df_tw_32_18 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==18)])
df_tw_32_19 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==19)])
df_tw_32_20 = pd.DataFrame(df[(df['LPs hosted by kernel']==32) & (df['Total threads']==20)])

df_tw_64_2  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==2 )] )
df_tw_64_3  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==3 )] )
df_tw_64_4  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==4 )] )
df_tw_64_5  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==5 )] )
df_tw_64_6  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==6 )] )
df_tw_64_7  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==7 )] )
df_tw_64_8  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==8 )] )
df_tw_64_9  = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==9 )] )
df_tw_64_10 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==10)] )
df_tw_64_11 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==11)] )
df_tw_64_12 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==12)] )
df_tw_64_13 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==13)] )
df_tw_64_14 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==14)] )
df_tw_64_15 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==15)] )
df_tw_64_16 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==16)] )
df_tw_64_17 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==17)] )
df_tw_64_18 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==18)] )
df_tw_64_19 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==19)] )
df_tw_64_20 = pd.DataFrame(df[(df['LPs hosted by kernel']==64) & (df['Total threads']==20)] )

df_tw_128_2 = pd.DataFrame(df[(df['LPs hosted by kernel']==128) & (df['Total threads']==2)])
df_tw_128_3 = pd.DataFrame(df[(df['LPs hosted by kernel']==128) & (df['Total threads']==3)])
df_tw_128_4 = pd.DataFrame(df[(df['LPs hosted by kernel']==128) & (df['Total threads']==4)])
df_tw_128_5 = pd.DataFrame(df[(df['LPs hosted by kernel']==128) & (df['Total threads']==5)])

#df_tw_128_6 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==6])
#df_tw_128_7 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==7])
#df_tw_128_8 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==8])
#df_tw_128_9 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==9])
#df_tw_128_0 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==0])
#df_tw_128_1 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==1])
#df_tw_128_2 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==2])
#df_tw_128_3 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==3])
#df_tw_128_4 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==4])
#df_tw_128_5 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==5])
#df_tw_128_6 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==6])
#df_tw_128_7 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==7])
#df_tw_128_8 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==8])
#df_tw_128_9 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==9])
#df_tw_128_0 = pd.DataFrame(df[df['LPs hosted by kernel']==128] & df[df['Total threads']==0])

In [None]:
sns.boxplot(df_tw_2_2['Total commited events']).set_title('df_tw_2_2')
plt.show()  
sns.boxplot(df_tw_4_2['Total commited events']).set_title('df_tw_4_2')
plt.show()  
sns.boxplot(df_tw_4_3['Total commited events']).set_title('df_tw_4_3')
plt.show()  
sns.boxplot(df_tw_4_4['Total commited events']).set_title('df_tw_4_4')
plt.show()  
sns.boxplot(df_tw_8_2['Total commited events']).set_title('df_tw_8_2')
plt.show()  
sns.boxplot(df_tw_8_3['Total commited events']).set_title('df_tw_8_3')
plt.show()  
sns.boxplot(df_tw_8_4['Total commited events']).set_title('df_tw_8_4')
plt.show()  
sns.boxplot(df_tw_8_5['Total commited events']).set_title('df_tw_8_5')
plt.show()  
sns.boxplot(df_tw_8_6['Total commited events']).set_title('df_tw_8_6')
plt.show()  
sns.boxplot(df_tw_8_7['Total commited events']).set_title('df_tw_8_7')
plt.show()  
sns.boxplot(df_tw_8_8['Total commited events']).set_title('df_tw_8_8')
plt.show()  
sns.boxplot(df_tw_16_2['Total commited events']).set_title('df_tw_16_2')
plt.show() 
sns.boxplot(df_tw_16_3['Total commited events']).set_title('df_tw_16_3')
plt.show() 
sns.boxplot(df_tw_16_4['Total commited events']).set_title('df_tw_16_4')
plt.show() 
sns.boxplot(df_tw_16_5['Total commited events']).set_title('df_tw_16_5')
plt.show() 
sns.boxplot(df_tw_16_6['Total commited events']).set_title('df_tw_16_6')
plt.show() 
sns.boxplot(df_tw_16_7['Total commited events']).set_title('df_tw_16_7')
plt.show() 
sns.boxplot(df_tw_16_8['Total commited events']).set_title('df_tw_16_8')
plt.show() 
sns.boxplot(df_tw_16_9['Total commited events']).set_title('df_tw_16_9')
plt.show() 
sns.boxplot(df_tw_16_10['Total commited events']).set_title('df_tw_16_10')
plt.show()
sns.boxplot(df_tw_16_11['Total commited events']).set_title('df_tw_16_11')
plt.show()
sns.boxplot(df_tw_16_12['Total commited events']).set_title('df_tw_16_12')
plt.show()
sns.boxplot(df_tw_16_13['Total commited events']).set_title('df_tw_16_13')
plt.show()
sns.boxplot(df_tw_16_14['Total commited events']).set_title('df_tw_16_14')
plt.show()
sns.boxplot(df_tw_16_15['Total commited events']).set_title('df_tw_16_15')
plt.show()
sns.boxplot(df_tw_16_16['Total commited events']).set_title('df_tw_16_16')
plt.show()
sns.boxplot(df_tw_32_2['Total commited events']).set_title('df_tw_32_2')
plt.show() 
sns.boxplot(df_tw_32_3['Total commited events']).set_title('df_tw_32_3')
plt.show() 
sns.boxplot(df_tw_32_4['Total commited events']).set_title('df_tw_32_4')
plt.show() 
sns.boxplot(df_tw_32_5['Total commited events']).set_title('df_tw_32_5')
plt.show() 
sns.boxplot(df_tw_32_6['Total commited events']).set_title('df_tw_32_6')
plt.show() 
sns.boxplot(df_tw_32_7['Total commited events']).set_title('df_tw_32_7')
plt.show() 
sns.boxplot(df_tw_32_8['Total commited events']).set_title('df_tw_32_8')
plt.show() 
sns.boxplot(df_tw_32_9['Total commited events']).set_title('df_tw_32_9')
plt.show() 
sns.boxplot(df_tw_32_10['Total commited events']).set_title('df_tw_32_10')
plt.show()
sns.boxplot(df_tw_32_11['Total commited events']).set_title('df_tw_32_11')
plt.show()
sns.boxplot(df_tw_32_12['Total commited events']).set_title('df_tw_32_12')
plt.show()
sns.boxplot(df_tw_32_13['Total commited events']).set_title('df_tw_32_13')
plt.show()
sns.boxplot(df_tw_32_14['Total commited events']).set_title('df_tw_32_14')
plt.show()
sns.boxplot(df_tw_32_15['Total commited events']).set_title('df_tw_32_15')
plt.show()
sns.boxplot(df_tw_32_16['Total commited events']).set_title('df_tw_32_16')
plt.show()
sns.boxplot(df_tw_32_17['Total commited events']).set_title('df_tw_32_17')
plt.show()
sns.boxplot(df_tw_32_18['Total commited events']).set_title('df_tw_32_18')
plt.show()
sns.boxplot(df_tw_32_19['Total commited events']).set_title('df_tw_32_19')
plt.show()
sns.boxplot(df_tw_32_20['Total commited events']).set_title('df_tw_32_20')
plt.show()
sns.boxplot(df_tw_64_2['Total commited events']).set_title('df_tw_64_2')
plt.show() 
sns.boxplot(df_tw_64_3['Total commited events']).set_title('df_tw_64_3')
plt.show() 
sns.boxplot(df_tw_64_4['Total commited events']).set_title('df_tw_64_4')
plt.show() 
sns.boxplot(df_tw_64_5['Total commited events']).set_title('df_tw_64_5')
plt.show() 
sns.boxplot(df_tw_64_6['Total commited events']).set_title('df_tw_64_6')
plt.show() 
sns.boxplot(df_tw_64_7['Total commited events']).set_title('df_tw_64_7')
plt.show() 
sns.boxplot(df_tw_64_8['Total commited events']).set_title('df_tw_64_8')
plt.show() 
sns.boxplot(df_tw_64_9['Total commited events']).set_title('df_tw_64_9')
plt.show() 
sns.boxplot(df_tw_64_10['Total commited events']).set_title('df_tw_64_10')
plt.show()
sns.boxplot(df_tw_64_11['Total commited events']).set_title('df_tw_64_11')
plt.show()
sns.boxplot(df_tw_64_12['Total commited events']).set_title('df_tw_64_12')
plt.show()
sns.boxplot(df_tw_64_13['Total commited events']).set_title('df_tw_64_13')
plt.show()
sns.boxplot(df_tw_64_14['Total commited events']).set_title('df_tw_64_14')
plt.show()
sns.boxplot(df_tw_64_15['Total commited events']).set_title('df_tw_64_15')
plt.show()
sns.boxplot(df_tw_64_16['Total commited events']).set_title('df_tw_64_16')
plt.show()
sns.boxplot(df_tw_64_17['Total commited events']).set_title('df_tw_64_17')
plt.show()
sns.boxplot(df_tw_64_18['Total commited events']).set_title('df_tw_64_18')
plt.show()
sns.boxplot(df_tw_64_19['Total commited events']).set_title('df_tw_64_19')
plt.show()
sns.boxplot(df_tw_64_20['Total commited events']).set_title('df_tw_64_20')
plt.show()
sns.boxplot(df_tw_128_2['Total commited events']).set_title('df_tw_128_2')
plt.show()
sns.boxplot(df_tw_128_3['Total commited events']).set_title('df_tw_128_3')
plt.show()
sns.boxplot(df_tw_128_4['Total commited events']).set_title('df_tw_128_4')
plt.show()
sns.boxplot(df_tw_128_5['Total commited events']).set_title('df_tw_128_5')
plt.show()

# DES

### Data parsing

In [None]:
file = open('test_des.o', mode = 'r')
lines = file.readlines()
file.close()

start_time  = []
end_time    = []
total_time  = []
lp_kernel   = []
executed    = []
event_cost  = []
ema         = []
gvt         = []
time_speed  = []
memory      = []
peak_memory = []

for line in lines:
    tmp = re.search(r"SIMULATION STARTED AT ..... : (.*)", line)
    if tmp: 
        start_time.append(tmp.group(1))
        continue

    tmp = re.search(r"SIMULATION FINISHED AT .... : (.*)", line)
    if tmp:
        end_time.append(tmp.group(1))
        continue

    tmp = re.search(r"TOTAL SIMULATION TIME ..... : (.*)", line)
    if tmp: 
        total_time.append(tmp.group(1))
        continue

    tmp = re.search(r"TOTAL LPs.................. : (.*)", line)
    if tmp:
        lp_kernel.append(tmp.group(1))
        continue

    tmp = re.search(r"TOTAL EXECUTED EVENTS ..... : (.*)", line)
    if tmp:
        executed.append(tmp.group(1))
        continue

    tmp = re.search(r"AVERAGE EVENT COST .EMA.... : (.*)", line)
    if tmp:
        ema.append(tmp.group(1))
        continue
    
    tmp = re.search(r"AVERAGE EVENT COST......... : (.*)", line)
    if tmp:
        event_cost.append(tmp.group(1))
        continue

    tmp = re.search(r"LAST COMMITTED GVT ........ : (.*)", line)
    if tmp:
        gvt.append(tmp.group(1))
        continue

    tmp = re.search(r"SIMULATION TIME SPEED...... : (.*)", line)
    if tmp:
        time_speed.append(tmp.group(1))
        continue

    tmp = re.search(r"AVERAGE MEMORY USAGE....... : (.*)", line)
    if tmp:
        memory.append(tmp.group(1))
        continue

    tmp = re.search(r"PEAK MEMORY USAGE.......... : (.*)", line)
    if tmp:
        peak_memory.append(tmp.group(1))
        continue 

 
df_des = pd.DataFrame({
    "Start time"                : start_time,
    "Finish time"               : end_time,
    "Total time"                : total_time,
    "LPs"                       : lp_kernel,
    "Total executed events"     : executed,
    "Average event cost"        : event_cost,
    "Average event cost (EMA)"  : ema,
    "Last commited gvt"         : gvt,
    "Simulation time speed"     : time_speed,
    "Average memory usage"      : memory,
    "Peak memory usage"         : peak_memory
})
    
print (df_des)

In [None]:
cols = ["LPs","Total executed events","Last commited gvt"]
df_des[cols] = df_des[cols].apply(pd.to_numeric)
df_des['Total time'] = df_des['Total time'].str.replace('seconds', '')
df_des['Total time'] = df_des['Total time'].apply(pd.to_numeric)
df_des.info(verbose=True)

In [None]:
df_des_2 = pd.DataFrame(df_des[df_des['LPs']==2])
df_des_4 = pd.DataFrame(df_des[df_des['LPs']==4])
df_des_8 = pd.DataFrame(df_des[df_des['LPs']==8])
df_des_16 = pd.DataFrame(df_des[df_des['LPs']==16])
df_des_32 = pd.DataFrame(df_des[df_des['LPs']==32])
df_des_64 = pd.DataFrame(df_des[df_des['LPs']==64])
df_des_128 = pd.DataFrame(df_des[df_des['LPs']==128])

### Data cleanning

In [None]:
sns.boxplot(df_des_2['Total executed events'])
plt.show()
sns.boxplot(df_des_4['Total executed events'])
plt.show()
sns.boxplot(df_des_8['Total executed events'])
plt.show()
sns.boxplot(df_des_16['Total executed events'])
plt.show()
sns.boxplot(df_des_32['Total executed events'])
plt.show()

In [None]:
def delete_outliers (df, col):
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5*IQR
    upper = Q3 + 1.5*IQR

    upper_array = np.where(df[col] >= upper)[0]
    lower_array = np.where(df[col] <= lower)[0]

    df.drop(index=upper_array, inplace=True)
    df.drop(index=lower_array, inplace=True)

    print("New Shape: ", df.shape)
    sns.boxplot(df[col])
    plt.show()

In [None]:
delete_outliers(df=df_des_2, col='Total executed events')
delete_outliers(df=df_des_8, col='Total executed events')
delete_outliers(df=df_des_16, col='Total executed events')
delete_outliers(df=df_des_32, col='Total executed events')
delete_outliers(df=df_des_4, col='Total executed events')

In [None]:
sns.boxplot(df_des_2['Total time'])
plt.show()
sns.boxplot(df_des_4['Total time'])
plt.show()
sns.boxplot(df_des_8['Total time'])
plt.show()
sns.boxplot(df_des_16['Total time'])
plt.show()
sns.boxplot(df_des_32['Total time'])
plt.show()

In [None]:
df_des_2 = delete_outliers(df=df_des_2, col='Total time')
df_des_4 = delete_outliers(df=df_des_4, col='Total time')
df_des_8 = delete_outliers(df=df_des_8, col='Total time')
df_des_16 = delete_outliers(df=df_des_16, col='Total time')
df_des_32 = delete_outliers(df=df_des_32, col='Total time')

# Window racer

### Data parsing

### Data cleaning

In [None]:
df_wr = parse_pdes_wr('test_wr.o', 159326)
df_wr = delete_outliers(df_wr,'Total time')

In [None]:
df_wr.describe()

# Comparison

In [None]:
df_des_2.describe()

In [None]:
df_des_4.describe()

In [None]:
df_des_8.describe()

In [None]:
df_des_16.describe()

In [None]:

df_des_32.describe()

In [None]:

df_des_64.describe()

In [None]:

df_des.describe()

In [None]:
df_wr.describe()

In [None]:
df.describe()

In [None]:
columns_to_average = ['Total time', 'LPs hosted by kernel','Total threads','Total executed events','Total commited events','Last commited gvt']

print('df_tw_2_2')
print(df_tw_2_2[columns_to_average].mean())
print('df_tw_4_2')
print(df_tw_4_2[columns_to_average].mean())
print('df_tw_4_3')  
print(df_tw_4_3[columns_to_average].mean())
print('df_tw_4_4')  
print(df_tw_4_4[columns_to_average].mean())
print('df_tw_8_2')  
print(df_tw_8_2[columns_to_average].mean())
print('df_tw_8_3')  
print(df_tw_8_3[columns_to_average].mean())
print('df_tw_8_4')  
print(df_tw_8_4[columns_to_average].mean())
print('df_tw_8_5')  
print(df_tw_8_5[columns_to_average].mean())
print('df_tw_8_6')  
print(df_tw_8_6[columns_to_average].mean())
print('df_tw_8_7')  
print(df_tw_8_7[columns_to_average].mean())
print('df_tw_8_8')  
print(df_tw_8_8[columns_to_average].mean()) 
print('df_tw_16_2') 
print(df_tw_16_2[columns_to_average].mean())
print('df_tw_16_3')  
print(df_tw_16_3[columns_to_average].mean())
print('df_tw_16_4')  
print(df_tw_16_4[columns_to_average].mean())
print('df_tw_16_5')  
print(df_tw_16_5[columns_to_average].mean())
print('df_tw_16_6')  
print(df_tw_16_6[columns_to_average].mean())
print('df_tw_16_7')  
print(df_tw_16_7[columns_to_average].mean())
print('df_tw_16_8')  
print(df_tw_16_8[columns_to_average].mean())
print('df_tw_16_9')  
print(df_tw_16_9[columns_to_average].mean()) 
print('df_tw_16_10') 
print(df_tw_16_10[columns_to_average].mean())
print('df_tw_16_11') 
print(df_tw_16_11[columns_to_average].mean())
print('df_tw_16_12') 
print(df_tw_16_12[columns_to_average].mean())
print('df_tw_16_13') 
print(df_tw_16_13[columns_to_average].mean())
print('df_tw_16_14') 
print(df_tw_16_14[columns_to_average].mean())
print('df_tw_16_15') 
print(df_tw_16_15[columns_to_average].mean())
print('df_tw_16_16') 
print(df_tw_16_16[columns_to_average].mean())
print('df_tw_32_2')
print(df_tw_32_2[columns_to_average].mean())
print('df_tw_32_3')  
print(df_tw_32_3[columns_to_average].mean())
print('df_tw_32_4')  
print(df_tw_32_4[columns_to_average].mean())
print('df_tw_32_5')  
print(df_tw_32_5[columns_to_average].mean())
print('df_tw_32_6')  
print(df_tw_32_6[columns_to_average].mean())
print('df_tw_32_7')  
print(df_tw_32_7[columns_to_average].mean())
print('df_tw_32_8')  
print(df_tw_32_8[columns_to_average].mean())
print('df_tw_32_9')  
print(df_tw_32_9[columns_to_average].mean()) 
print('df_tw_32_10') 
print(df_tw_32_10[columns_to_average].mean())
print('df_tw_32_11') 
print(df_tw_32_11[columns_to_average].mean())
print('df_tw_32_12') 
print(df_tw_32_12[columns_to_average].mean())
print('df_tw_32_13') 
print(df_tw_32_13[columns_to_average].mean())
print('df_tw_32_14') 
print(df_tw_32_14[columns_to_average].mean())
print('df_tw_32_15') 
print(df_tw_32_15[columns_to_average].mean())
print('df_tw_32_16') 
print(df_tw_32_16[columns_to_average].mean())
print('df_tw_32_17') 
print(df_tw_32_17[columns_to_average].mean())
print('df_tw_32_18') 
print(df_tw_32_18[columns_to_average].mean())
print('df_tw_32_19') 
print(df_tw_32_19[columns_to_average].mean())
print('df_tw_32_20') 
print(df_tw_32_20[columns_to_average].mean())
print('df_tw_64_2')
print(df_tw_64_2[columns_to_average].mean())
print('df_tw_64_3')  
print(df_tw_64_3[columns_to_average].mean())
print('df_tw_64_4')  
print(df_tw_64_4[columns_to_average].mean())
print('df_tw_64_5')  
print(df_tw_64_5[columns_to_average].mean())
print('df_tw_64_6')  
print(df_tw_64_6[columns_to_average].mean())
print('df_tw_64_7')  
print(df_tw_64_7[columns_to_average].mean())
print('df_tw_64_8')  
print(df_tw_64_8[columns_to_average].mean())
print('df_tw_64_9')  
print(df_tw_64_9[columns_to_average].mean()) 
print('df_tw_64_10 ') 
print(df_tw_64_10[columns_to_average].mean())
print('df_tw_64_11 ') 
print(df_tw_64_11[columns_to_average].mean())
print('df_tw_64_12 ') 
print(df_tw_64_12[columns_to_average].mean())
print('df_tw_64_13 ') 
print(df_tw_64_13[columns_to_average].mean())
print('df_tw_64_14 ') 
print(df_tw_64_14[columns_to_average].mean())
print('df_tw_64_15 ') 
print(df_tw_64_15[columns_to_average].mean())
print('df_tw_64_16 ') 
print(df_tw_64_16[columns_to_average].mean())
print('df_tw_64_17 ') 
print(df_tw_64_17[columns_to_average].mean())
print('df_tw_64_18 ') 
print(df_tw_64_18[columns_to_average].mean())
print('df_tw_64_19 ') 
print(df_tw_64_19[columns_to_average].mean())
print('df_tw_64_20 ') 
print(df_tw_64_20[columns_to_average].mean())
print('df_tw_128_2 ') 
print(df_tw_128_2[columns_to_average].mean())
print('df_tw_128_3 ') 
print(df_tw_128_3[columns_to_average].mean())
print('df_tw_128_4 ') 
print(df_tw_128_4[columns_to_average].mean())
