# MIT Supercloud dataset: análise das séries temporais

Meu objetivo aqui é estruturar o processo de conversão das séries temporais. Eu já filtrei os logs do SLURM e defini os jobs que serão usados.

In [27]:
import pandas as pd

pd.set_option('display.max_columns', 25)

In [30]:
logs = pd.read_csv('scheduler-log-batsim-ready.csv', converters={'steps': eval, 'series': eval})
logs.head()

Unnamed: 0,id,subtime,res,profile,partition,walltime,runtime,cpus,waittime,steps,series
0,82691694838059,1609535703,1,usage_82691694838059,GPU-avail,4294967295,308,20,270594,"[-1, batch]","[1, 1]"
1,3434806870797,1609535703,1,usage_3434806870797,GPU-avail,4294967295,397,20,270904,"[-1, batch]","[1, 1]"
2,8370846758272,1609535703,1,usage_8370846758272,GPU-avail,4294967295,327,20,271301,"[-1, batch]","[1, 1]"
3,16051179268048,1609535912,1,usage_16051179268048,GPU-avail,4294967295,307,20,272558,"[-1, batch]","[1, 1]"
4,26419036762941,1609809099,1,usage_26419036762941,GPU-avail,4294967295,360,1,1,"[-1, batch]","[1, 1]"


# TODO: Adicionar o filtro abaixo no outro arquivo

In [34]:
logs['series'].apply(lambda x: x[-1] > 1)

0         False
1         False
2         False
3         False
4         False
          ...  
386755    False
386756    False
386757    False
386758    False
386759    False
Name: series, Length: 386760, dtype: bool

In [35]:
logs.drop(logs[logs['series'].apply(lambda x: x[-1] > 1)].index, inplace=True)
logs.info()

<class 'pandas.core.frame.DataFrame'>
Index: 386757 entries, 0 to 386759
Data columns (total 11 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   id         386757 non-null  int64 
 1   subtime    386757 non-null  int64 
 2   res        386757 non-null  int64 
 3   profile    386757 non-null  object
 4   partition  386757 non-null  object
 5   walltime   386757 non-null  int64 
 6   runtime    386757 non-null  int64 
 7   cpus       386757 non-null  int64 
 8   waittime   386757 non-null  int64 
 9   steps      386757 non-null  object
 10  series     386757 non-null  object
dtypes: int64(7), object(4)
memory usage: 35.4+ MB


Quero selecionar um job de cada tipo e emular a conversão de cada um deles. Lembrando que temos jobs dos seguintes tipos:
- [-4, batch], nó único;
- [-1, batch], nó único;
- [-4, 0], nó único;
- [-4, 0, batch], nó único e múltiplos nós;
- [-1, 0, batch], nó único e múltiplos nós.

In [44]:
type1_job = logs[(logs.res == 1) & (logs['steps'].isin([['-4', 'batch']]))].iloc[0]
type2_job = logs[(logs.res == 1) & (logs['steps'].isin([['-1', 'batch']]))].iloc[0]
type3_job = logs[(logs.res == 1) & (logs['steps'].isin([['-4', '0']]))].iloc[0]
type4_job = logs[(logs.res == 1) & (logs['steps'].isin([['-4', '0', 'batch']]))].iloc[0]
type5_job = logs[(logs.res == 1) & (logs['steps'].isin([['-1', '0', 'batch']]))].iloc[0]
type6_job = logs[(logs.res > 1) & (logs['steps'].isin([['-4', '0', 'batch']]))].iloc[1]
type7_job = logs[(logs.res > 1) & (logs['steps'].isin([['-1', '0', 'batch']]))].iloc[3]

In [45]:
type7_job

id                 6828654029886
subtime               1610186044
res                            2
profile      usage_6828654029886
partition              GPU-avail
walltime              4294967295
runtime                    36982
cpus                          48
waittime                       0
steps             [-1, 0, batch]
series                 [2, 1, 1]
Name: 15069, dtype: object

In [11]:
type1_series = pd.read_csv(f'timeseries/{type1_job.id}-timeseries.csv', usecols=['Step', 'Series', 'CPUFrequency', 'CPUUtilization'], dtype={'Step': 'string', 'Series': 'int64', 'CPUFrequency': 'int64', 'CPUUtilization': 'float64'})
type1_series.head()

Unnamed: 0,Step,Series,CPUFrequency,CPUUtilization
0,-4,0,3541,0.0
1,-4,0,3541,0.0
2,-4,0,3541,0.0
3,-4,0,3541,0.0
4,-4,0,3541,0.0


In [47]:
def convert_timeseries(job, instructions_per_cycle=32):
    id = job.id
    cpus_allocated = job.cpus
    cpus_per_node = 40 if job.partition == 'GPU-avail' else 48
    resources_allocated = job.res

    ts_df = pd.read_csv(f'timeseries/{id}-timeseries.csv', usecols=['Step', 'Series', 'CPUFrequency', 'CPUUtilization'], dtype={'Step': 'string', 'Series': 'int64', 'CPUFrequency': 'int64', 'CPUUtilization': 'float64'})

    ts_df = ts_df[( ts_df['Step'].isin(['0', 'batch']) ) & ( ts_df['Series'].isin([x for x in range(resources_allocated)]) )].reset_index(drop=True)

    if resources_allocated == 1 and 'batch' in ts_df['Step'].values:
        ts_df = ts_df.drop(ts_df[ts_df['Step'] == '0'].index)
    elif resources_allocated == 1 and 'batch' not in ts_df['Step'].values:
        ts_df = ts_df.drop(ts_df[ts_df['Step'] == 'batch'].index)
    elif resources_allocated > 1 and ts_df['Series'].max() == resources_allocated - 1:
        ts_df = ts_df.drop(ts_df[ts_df['Step'] == 'batch'].index)
    elif resources_allocated > 1 and ts_df['Series'].max() < resources_allocated - 1:
        # set 'batch' series to resources_allocated - 1
        ts_df.loc[ts_df['Step'] == 'batch', 'Series'] = resources_allocated - 1

        # set 'batch' step to '0'
        ts_df.loc[ts_df['Step'] == 'batch', 'Step'] = '0'

    ts_df = ts_df.rename(columns={'Series': 'host'})

    ts_df['flag'] = 'm_usage'
    ts_df['usage'] = ts_df['CPUUtilization'] / (cpus_per_node * 100)
    ts_df['usage'] = ts_df['usage'].clip(upper=1)
    ts_df['flops'] = cpus_allocated * ts_df['CPUFrequency'] * instructions_per_cycle

    ts_df = ts_df.drop(columns=['Step', 'CPUFrequency', 'CPUUtilization'])

    return ts_df

In [49]:
print(convert_timeseries(type7_job))

      host     flag     usage    flops
0        0  m_usage  0.000000  5449728
1        0  m_usage  0.099675  5449728
2        0  m_usage  0.368950  5449728
3        0  m_usage  0.416850  5449728
4        0  m_usage  0.497125  5449728
...    ...      ...       ...      ...
7393     1  m_usage  0.697425        0
7394     1  m_usage  0.699500        0
7395     1  m_usage  0.698625     1536
7396     1  m_usage  0.071725     1536
7397     1  m_usage  1.000000     1536

[7398 rows x 4 columns]


In [14]:
for host in type1_series['host'].unique():
    print('host: ', host)

host:  0
