In [7]:
import pandas as pd
from pathlib import Path

In [8]:
RESULT_DIR = Path('../results')
execution="GPU"

In [9]:
# Metrics functions
def get_min_max_mean(df, column, start_time, end_time):
    df_i = df[(df['TIMESTAMP'] >= start_time) & (df['TIMESTAMP'] <= end_time)]
    min_value = df_i[column].min()
    max_value = df_i[column].max()
    mean_value = df_i[column].mean()    
    return min_value, max_value, mean_value
  
def add_min_max_mean_columns(df, column, index, min_value, max_value, mean_value):
    df.loc[index, f'{column}_MIN'] = min_value
    df.loc[index, f'{column}_MAX'] = max_value
    df.loc[index, f'{column}_MEAN'] = mean_value

def sumarise_df_cpu(df_cpu, df_mem_cpu, df_timestamp):
    df_cpu['TIMESTAMP'] = df_cpu['TIMESTAMP'].astype(int)
    df_mem_cpu['TIMESTAMP'] = df_mem_cpu['TIMESTAMP'].astype(int)
    df_timestamp['START_TIME'] = df_timestamp['START_TIME'].astype(str).str[:10].astype(int)
    df_timestamp['END_TIME'] = df_timestamp['END_TIME'].astype(str).str[:10].astype(int)
  
    for i, r in df_timestamp.iterrows():
        start_time = r['START_TIME']
        end_time = r['END_TIME']
        
        min_value, max_value, mean_value = get_min_max_mean(df_cpu, ' CPU', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'CPU', i, min_value, max_value, mean_value)
        
        min_value, max_value, mean_value = get_min_max_mean(df_mem_cpu, ' USED', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'MEM_CPU', i, min_value, max_value, mean_value)
                
    return df_timestamp
  
def sumarise_df_gpu(df_cpu, df_mem_cpu, df_gpu, df_mem_gpu, df_timestamp):
    df_cpu['TIMESTAMP'] = df_cpu['TIMESTAMP'].astype(int)
    df_mem_cpu['TIMESTAMP'] = df_mem_cpu['TIMESTAMP'].astype(int)
    df_gpu['TIMESTAMP'] = df_gpu['TIMESTAMP'].astype(int)
    df_mem_gpu['TIMESTAMP'] = df_mem_gpu['TIMESTAMP'].astype(int)
    df_timestamp['START_TIME'] = df_timestamp['START_TIME'].astype(str).str[:10].astype(int)
    df_timestamp['END_TIME'] = df_timestamp['END_TIME'].astype(str).str[:10].astype(int)

    for i, r in df_timestamp.iterrows():
        start_time = r['START_TIME']
        end_time = r['END_TIME']
        
        min_value, max_value, mean_value = get_min_max_mean(df_cpu, ' CPU', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'CPU', i, min_value, max_value, mean_value)
        
        min_value, max_value, mean_value = get_min_max_mean(df_mem_cpu, ' USED', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'MEM_CPU', i, min_value, max_value, mean_value)

        min_value, max_value, mean_value = get_min_max_mean(df_gpu, ' GPU', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'GPU', i, min_value, max_value, mean_value)
        
        min_value, max_value, mean_value = get_min_max_mean(df_mem_gpu, ' USED', start_time, end_time)
        add_min_max_mean_columns(df_timestamp, 'MEM_GPU', i, min_value, max_value, mean_value)
        
    return df_timestamp


In [11]:
for dir_path in RESULT_DIR.rglob('**/experiment*'):
    try:
        df_timestamp = pd.read_csv(f'{dir_path}/timestamp.csv')

        if execution == "CPU":
            df_cpu = pd.read_csv(f'{dir_path}/cpu.csv')
            df_mem_cpu = pd.read_csv(f'{dir_path}/mem.csv')
            df_timestamp = sumarise_df_cpu(df_cpu, df_mem_cpu, df_timestamp)

        if execution == "GPU":
            df_cpu = pd.read_csv(f'{dir_path}/cpu.csv')
            df_mem_cpu = pd.read_csv(f'{dir_path}/mem.csv')
            df_gpu = pd.read_csv(f'{dir_path}/gpu.csv')
            df_mem_gpu = pd.read_csv(f'{dir_path}/mem-gpu.csv')
            df_mem_gpu[df_mem_gpu[' COMMAND'].str.contains('/src/main')]
            df_timestamp = sumarise_df_gpu(df_cpu, df_mem_cpu, df_gpu, df_mem_gpu, df_timestamp)

        df_timestamp.to_csv(f'{dir_path}/timestamp.csv', index=False)

    except Exception as e:
        print(f"Error on {dir}", e)
        pass


In [12]:
df = pd.DataFrame()
for timestamp_file_path in RESULT_DIR.rglob('**/timestamp.csv'):
    try:
        df_timestamp = pd.read_csv(timestamp_file_path)
        df_timestamp.insert(0, 'directory', timestamp_file_path)
        df = pd.concat([df, df_timestamp], axis=0)
    except Exception as e:
        print(f"Error on {dir}", e)
        pass

df.sort_values(by=['directory'], inplace=True)
df.reset_index(drop=True, inplace=True)
df.to_csv(f'{RESULT_DIR}/timestamp.csv', index=False)