In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import json
import warnings
warnings.filterwarnings("ignore")

In [2]:
node_type = ['compute_haswell']
interval = 24
work_dir = '../../datasets/machine_events'

In [3]:
def load_data(path):
    df = pd.read_csv(path)
    df['EVENT_TIME'] = pd.to_datetime(df['EVENT_TIME'])
    df.sort_values(by=['EVENT_TIME'], inplace=True)
    return df

def set_machine_type(df):
    node_types = []
    for index, row in df.iterrows():
        try:
            nt = row['PROPERTIES']
            nt = nt.replace('\'', '\"')
            nt = nt.replace('None', '\"None\"')
            if 'node_type' in nt:
                nt = json.loads(nt)['node_type']
            else:
                nt = None
        except:
            nt = None
        node_types.append(nt)
    df['node_type'] = node_types
    return df

filter_node_type = lambda df: df[df['node_type'].isin(node_type)]
find_time_range = lambda df: (df.iloc[0]['EVENT_TIME'], df.iloc[-1]['EVENT_TIME'])

def rm_duplicates(df):
    machines = df['HOST_NAME (PHYSICAL)'].unique()
    mach_status = {}
    select_rows = []
    for index, row in df.iterrows():
        if row['HOST_NAME (PHYSICAL)'] not in mach_status:
            if row['EVENT'] in ['ENABLE', 'UPDATE']:
                mach_status.update({row['HOST_NAME (PHYSICAL)']: row['EVENT']})
                select_rows.append(index)
        else:
            if (mach_status[row['HOST_NAME (PHYSICAL)']] in ['ENABLE', 'UPDATE']) and (row['EVENT'] == 'DISABLE'):
                mach_status[row['HOST_NAME (PHYSICAL)']] = 'DISABLE'
                select_rows.append(index)
            elif (mach_status[row['HOST_NAME (PHYSICAL)']] == 'DISABLE') and (row['EVENT'] in ['ENABLE', 'UPDATE']):
                mach_status[row['HOST_NAME (PHYSICAL)']] = row['EVENT']
                select_rows.append(index)
    df = df.loc[select_rows]
    return df

In [4]:
ucme = load_data('%s/uc_machine_events.csv' % work_dir)
taccme = load_data('%s/tacc_machine_events.csv' % work_dir)

In [5]:
ucme = set_machine_type(ucme)
taccme = set_machine_type(taccme)

In [6]:
ucme = filter_node_type(ucme)
taccme = filter_node_type(taccme)

In [7]:
ucme_start_date, ucme_end_date = find_time_range(ucme)
taccme_start_date, taccme_end_date = find_time_range(taccme)

In [8]:
taccme['node_type'] = 'compute_haswell'
ucme['node_type'] = 'compute_haswell'

In [9]:
taccme = rm_duplicates(taccme)
ucme = rm_duplicates(ucme)

In [10]:
me = pd.concat([taccme, ucme], axis=0)
me.sort_values(by=['EVENT_TIME'], inplace=True)
me.to_csv('%s/compute_haswell.csv' % work_dir, index=None)

In [11]:
me

Unnamed: 0,EVENT_TIME,EVENT_TIME_SEC,HOST_NAME (PHYSICAL),EVENT,PROPERTIES,node_type
226,2018-03-14 11:40:04,86528404.0,fa9dc77d7da1a9517f2a7fc1b5141824,UPDATE,"{'architecture.smt_size': '48', 'node_type': '...",compute_haswell
229,2018-03-14 11:40:05,86528405.0,38f89d8426232fb9b11f73b96e401286,UPDATE,"{'main_memory.ram_size': '134956859392', 'node...",compute_haswell
228,2018-03-14 11:40:05,86528405.0,3a9800c4b261fdc8595ed40da7568ccc,UPDATE,"{'node_type': 'compute_haswell', 'placement.ra...",compute_haswell
235,2018-03-14 11:40:05,86528405.0,eed618d03d23adb31c37870d8f291e79,UPDATE,"{'storage_devices.0.size': '250059350016', 'pl...",compute_haswell
234,2018-03-14 11:40:05,86528405.0,ef87ce346581cc2449c4c827bd3844b9,UPDATE,"{'placement.rack': 1, 'node_type': 'compute_ha...",compute_haswell
...,...,...,...,...,...,...
847,2020-05-19 16:30:19,155406619.0,ea9320eaa87b83877f5652f5da92d9fe,DISABLE,"{'architecture.platform_type': 'x86_64', 'arch...",compute_haswell
840,2020-06-17 20:35:19,157926919.0,ea9320eaa87b83877f5652f5da92d9fe,ENABLE,"{'architecture.platform_type': 'x86_64', 'arch...",compute_haswell
838,2020-07-17 20:32:43,160518763.0,c639a6c51ba4ee0577ff8e8b45362207,DISABLE,"{'node_type': 'compute_haswell', 'main_memory....",compute_haswell
220,2020-08-26 17:00:23,163962023.0,eed618d03d23adb31c37870d8f291e79,ENABLE,"{'storage_devices.0.size': '250059350016', 'pl...",compute_haswell
