In [1]:
import json
import numpy as np
import pandas as pd
from typing import Any, Dict, List
from admire.preprocessing import mapping_functions

data_dir = './data/'

In [3]:
with open(f'{data_dir}jobs.json','r') as f:
    data = json.load(f)
df_raw = pd.DataFrame.from_dict(data['jobs'])
drop_cols = ['account', 'cluster', 'container', 'comment', 'array','association', 'derived_exit_code', 'exit_code', 'het', 'job_id', 'name', 'mcs', 'kill_request_user']
drop_rows_indices = [8679]
df_raw = df_raw.drop(columns=drop_cols)
df_raw = df_raw.drop(index=drop_rows_indices).reset_index(drop=True)

# -- if taking step other than 0 then uncoment --
# mask = df_raw['steps'].apply(lambda x: len(x) == 2) 
# df_raw = df_raw[mask].reset_index(drop=True)

df_raw.head()

Unnamed: 0,allocation_nodes,constraints,time,flags,group,nodes,partition,priority,qos,required,reservation,state,steps,tres,user,wckey,working_directory
0,1,,"{'elapsed': 9482, 'eligible': 1672509600, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1788,standard,1417,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1788'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
1,1,,"{'elapsed': 9932, 'eligible': 1672531200, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e2281,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e2281'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
2,1,,"{'elapsed': 10601, 'eligible': 1672552800, 'en...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1901,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1901'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
3,1,,"{'elapsed': 9408, 'eligible': 1672574400, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1403,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1403'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
4,1,,"{'elapsed': 10631, 'eligible': 1672617600, 'en...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1893,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1893'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...


In [4]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12823 entries, 0 to 12822
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   allocation_nodes   12823 non-null  int64 
 1   constraints        116 non-null    object
 2   time               12823 non-null  object
 3   flags              12823 non-null  object
 4   group              12823 non-null  object
 5   nodes              12823 non-null  object
 6   partition          12823 non-null  object
 7   priority           12823 non-null  int64 
 8   qos                12351 non-null  object
 9   required           12823 non-null  object
 10  reservation        12823 non-null  object
 11  state              12823 non-null  object
 12  steps              12823 non-null  object
 13  tres               12823 non-null  object
 14  user               12823 non-null  object
 15  wckey              12823 non-null  object
 16  working_directory  12823 non-null  objec

In [5]:
def fix_steps_tres_allocated_order(row):
    '''Fix order in steps-tres-allocated where node and energy are in the wrong positions'''
    ret = row[0]['tres']['allocated'][0:2]

    if row[0]['tres']['allocated'][2]['type'] == 'energy' and row[0]['tres']['allocated'][3]['type'] == 'node':
        ret += [row[0]['tres']['allocated'][3]]
    else:
        ret += [row[0]['tres']['allocated'][2]]
    row[0]['tres']['allocated'] = ret
    return row

def fix_tres_allocated_order(row):
    '''Fix order in tres-allocated where node and energy are in the wrong positions'''
    ret = row['allocated'][0:2]

    if row['allocated'][2]['type'] == 'energy' and row['allocated'][3]['type'] == 'node':
        ret += [row['allocated'][3]]
    else:
        ret += [row['allocated'][2]]
    row['allocated'] = ret
    return row

df_raw['steps'] = df_raw['steps'].apply(fix_steps_tres_allocated_order)
df_raw['tres'] = df_raw['tres'].apply(fix_tres_allocated_order)

df_raw.head(5)

Unnamed: 0,allocation_nodes,constraints,time,flags,group,nodes,partition,priority,qos,required,reservation,state,steps,tres,user,wckey,working_directory
0,1,,"{'elapsed': 9482, 'eligible': 1672509600, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1788,standard,1417,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1788'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
1,1,,"{'elapsed': 9932, 'eligible': 1672531200, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e2281,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e2281'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
2,1,,"{'elapsed': 10601, 'eligible': 1672552800, 'en...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1901,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1901'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
3,1,,"{'elapsed': 9408, 'eligible': 1672574400, 'end...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1403,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1403'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...
4,1,,"{'elapsed': 10631, 'eligible': 1672617600, 'en...","[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1893,standard,1434,normal,"{'CPUs': 24, 'memory': 2048}","{'id': 0, 'name': 0}","{'current': 'COMPLETED', 'reason': 'BeginTime'}","[{'nodes': {'list': ['e1893'], 'count': 1, 'ra...","{'allocated': [{'type': 'cpu', 'name': None, '...",kulka,"{'wckey': '', 'flags': []}",/mnt/storage_2/scratch/grant_68/kulka/test/dev...


Transform steps

In [6]:
df_raw['steps'][0]

[{'nodes': {'list': ['e1788'], 'count': 1, 'range': 'e1788'},
  'tres': {'requested': {'max': [{'type': 'cpu',
      'name': None,
      'id': 1,
      'count': 150077410,
      'task': 0,
      'node': 'e1788'},
     {'type': 'mem',
      'name': None,
      'id': 2,
      'count': 8399224832,
      'task': 0,
      'node': 'e1788'},
     {'type': 'energy',
      'name': None,
      'id': 3,
      'count': 0,
      'task': 0,
      'node': 'e1788'},
     {'type': 'fs',
      'name': 'disk',
      'id': 6,
      'count': 229099898492,
      'task': 0,
      'node': 'e1788'},
     {'type': 'vmem',
      'name': None,
      'id': 7,
      'count': 303169536,
      'task': 0,
      'node': 'e1788'},
     {'type': 'pages',
      'name': None,
      'id': 8,
      'count': 0,
      'task': 0,
      'node': 'e1788'}],
    'min': [{'type': 'cpu',
      'name': None,
      'id': 1,
      'count': 150077410,
      'task': 0,
      'node': 'e1788'},
     {'type': 'mem',
      'name': None,
     

In [7]:
# -- if step 1 ---
# steps_series = df_raw['steps'].apply(lambda x: x[1] if type(x) is list and len(x) > 0 else 'lol')
steps_series = df_raw['steps'].apply(lambda x: x[0] if type(x) is list and len(x) > 0 else 'lol')
steps_mapping_list = mapping_functions.get_mapping_for_dict(steps_series.iloc[0], 'steps')
print('Mapping done')
steps_df = mapping_functions.flatten_series_by_mapping(steps_series, steps_mapping_list)

print(steps_df.columns.to_numpy())

steps_df

Mapping done
['steps-nodes-list' 'steps-nodes-count' 'steps-nodes-range'
 'steps-tres-requested-max-#0-type' 'steps-tres-requested-max-#0-name'
 'steps-tres-requested-max-#0-id' 'steps-tres-requested-max-#0-count'
 'steps-tres-requested-max-#0-task' 'steps-tres-requested-max-#0-node'
 'steps-tres-requested-max-#1-type' 'steps-tres-requested-max-#1-name'
 'steps-tres-requested-max-#1-id' 'steps-tres-requested-max-#1-count'
 'steps-tres-requested-max-#1-task' 'steps-tres-requested-max-#1-node'
 'steps-tres-requested-max-#2-type' 'steps-tres-requested-max-#2-name'
 'steps-tres-requested-max-#2-id' 'steps-tres-requested-max-#2-count'
 'steps-tres-requested-max-#2-task' 'steps-tres-requested-max-#2-node'
 'steps-tres-requested-max-#3-type' 'steps-tres-requested-max-#3-name'
 'steps-tres-requested-max-#3-id' 'steps-tres-requested-max-#3-count'
 'steps-tres-requested-max-#3-task' 'steps-tres-requested-max-#3-node'
 'steps-tres-requested-max-#4-type' 'steps-tres-requested-max-#4-name'
 'steps-

Unnamed: 0,steps-nodes-list,steps-nodes-count,steps-nodes-range,steps-tres-requested-max-#0-type,steps-tres-requested-max-#0-name,steps-tres-requested-max-#0-id,steps-tres-requested-max-#0-count,steps-tres-requested-max-#0-task,steps-tres-requested-max-#0-node,steps-tres-requested-max-#1-type,...,steps-CPU-governor,steps-kill_request_user,steps-state,steps-statistics-CPU-actual_frequency,steps-statistics-energy-consumed,steps-step-job_id,steps-step-het-component,steps-step-id,steps-step-name,steps-task-distribution
0,[e1788],1,e1788,cpu,,1,150077410,0,e1788,mem,...,[],,COMPLETED,0,0,25977766,,batch,batch,Unknown
1,[e2281],1,e2281,cpu,,1,155892610,0,e2281,mem,...,[],,COMPLETED,0,0,26081263,,batch,batch,Unknown
2,[e1901],1,e1901,cpu,,1,180730980,0,e1901,mem,...,[],,COMPLETED,0,0,26081264,,batch,batch,Unknown
3,[e1403],1,e1403,cpu,,1,153234590,0,e1403,mem,...,[],,COMPLETED,0,0,26081265,,batch,batch,Unknown
4,[e1893],1,e1893,cpu,,1,181754220,0,e1893,mem,...,[],,COMPLETED,0,0,26081267,,batch,batch,Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12818,[e1427],1,e1427,cpu,,1,510,0,e1427,mem,...,[],,COMPLETED,0,575766,27427892,,batch,batch,Unknown
12819,[e1329],1,e1329,cpu,,1,480,0,e1329,mem,...,[],,COMPLETED,0,177408,27428386,,batch,batch,Unknown
12820,[e1427],1,e1427,cpu,,1,3663190,0,e1427,mem,...,[],,COMPLETED,0,48800,27428399,,batch,batch,Unknown
12821,[e1427],1,e1427,cpu,,1,75308460,0,e1427,mem,...,[],,COMPLETED,0,1384508,27428511,,batch,batch,Unknown


Transform TRES column

In [8]:
tres_series = df_raw['tres']
tres_mapping_list = mapping_functions.get_mapping_for_dict(tres_series.iloc[0], 'tres')
tres_df = mapping_functions.flatten_series_by_mapping(tres_series, tres_mapping_list)
tres_df

Unnamed: 0,tres-allocated-#0-type,tres-allocated-#0-name,tres-allocated-#0-id,tres-allocated-#0-count,tres-allocated-#1-type,tres-allocated-#1-name,tres-allocated-#1-id,tres-allocated-#1-count,tres-allocated-#2-type,tres-allocated-#2-name,...,tres-requested-#1-id,tres-requested-#1-count,tres-requested-#2-type,tres-requested-#2-name,tres-requested-#2-id,tres-requested-#2-count,tres-requested-#3-type,tres-requested-#3-name,tres-requested-#3-id,tres-requested-#3-count
0,cpu,,1,24,mem,,2,49152,node,,...,2,49152,node,,4,1,billing,,5,24
1,cpu,,1,24,mem,,2,49152,node,,...,2,49152,node,,4,1,billing,,5,24
2,cpu,,1,24,mem,,2,49152,node,,...,2,49152,node,,4,1,billing,,5,24
3,cpu,,1,24,mem,,2,49152,node,,...,2,49152,node,,4,1,billing,,5,24
4,cpu,,1,24,mem,,2,49152,node,,...,2,49152,node,,4,1,billing,,5,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12818,cpu,,1,96,mem,,2,327680,node,,...,2,327680,node,,4,4,billing,,5,96
12819,cpu,,1,96,mem,,2,327680,node,,...,2,327680,node,,4,4,billing,,5,96
12820,cpu,,1,96,mem,,2,327680,node,,...,2,327680,node,,4,4,billing,,5,96
12821,cpu,,1,96,mem,,2,327680,node,,...,2,327680,node,,4,4,billing,,5,96


Transform time column

In [9]:
time_series = df_raw['time']
time_mapping_list = mapping_functions.get_mapping_for_dict(time_series.iloc[0], 'time')
time_df = mapping_functions.flatten_series_by_mapping(time_series, time_mapping_list)
time_df

Unnamed: 0,time-elapsed,time-eligible,time-end,time-start,time-submission,time-suspended,time-system-seconds,time-system-microseconds,time-limit,time-total-seconds,time-total-microseconds,time-user-seconds,time-user-microseconds
0,9482,1672509600,1672529962,1672509600,1669893940,0,0,0,1380,0,0,0,0
1,9932,1672531200,1672555653,1672531200,1671039773,0,0,0,1380,0,0,0,0
2,10601,1672552800,1672595012,1672552800,1671039773,0,0,0,1380,0,0,0,0
3,9408,1672574400,1672604330,1672574400,1671039773,0,0,0,1380,0,0,0,0
4,10631,1672617600,1672629296,1672617600,1671039774,0,0,0,1380,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12818,1150,1674827652,1674828802,1674827652,1674827652,0,0,0,2880,0,0,0,0
12819,549,1674828526,1674829078,1674828526,1674828526,0,0,0,2880,0,0,0,0
12820,137,1674829182,1674829319,1674829182,1674829182,0,0,0,2880,0,0,0,0
12821,2695,1674829551,1674832246,1674829551,1674829551,0,0,0,2880,0,0,0,0


Transform required

In [10]:
required_series = df_raw['required']
required_mapping_lsit = mapping_functions.get_mapping_for_dict(required_series.iloc[0], 'required')
required_df = mapping_functions.flatten_series_by_mapping(required_series, required_mapping_lsit)
required_df

Unnamed: 0,required-CPUs,required-memory
0,24,2048
1,24,2048
2,24,2048
3,24,2048
4,24,2048
...,...,...
12818,96,81920
12819,96,81920
12820,96,81920
12821,96,81920


Transform state

In [11]:
state_series = df_raw['state']
state_mapping_List = mapping_functions.get_mapping_for_dict(state_series.iloc[0], 'state')
state_df = mapping_functions.flatten_series_by_mapping(state_series, state_mapping_List)
state_df

Unnamed: 0,state-current,state-reason
0,COMPLETED,BeginTime
1,COMPLETED,BeginTime
2,COMPLETED,BeginTime
3,COMPLETED,BeginTime
4,COMPLETED,BeginTime
...,...,...
12818,COMPLETED,
12819,COMPLETED,
12820,COMPLETED,
12821,COMPLETED,


Transform wckey

In [12]:
wckey_series = df_raw['wckey']
wckey_mapping_List = mapping_functions.get_mapping_for_dict(wckey_series.iloc[0], 'wckey')
wckey_df = mapping_functions.flatten_series_by_mapping(wckey_series, wckey_mapping_List)
wckey_df

Unnamed: 0,wckey-wckey,wckey-flags
0,,[]
1,,[]
2,,[]
3,,[]
4,,[]
...,...,...
12818,,[]
12819,,[]
12820,,[]
12821,,[]


MERGE ALL DATAFRAMES

In [13]:
to_be_merged = ['steps', 'tres', 'state', 'time', 'required', 'wckey']

# Drop columns that were flattened
df_flat = df_raw.drop(columns=to_be_merged)

# Merge all dataframes
df_flat = pd.concat([df_flat, required_df, state_df, time_df, wckey_df, tres_df, steps_df], axis='columns')

assert df_flat.shape[0] == df_raw.shape[0], 'Shapes of DataFrame before and after merge should be the same!'

df_flat.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12823 entries, 0 to 12822
Columns: 259 entries, allocation_nodes to steps-task-distribution
dtypes: int64(135), object(124)
memory usage: 25.3+ MB


In [14]:
df_flat.to_csv(f'{data_dir}jobs_flattened.csv', index=False)

In [21]:
# -- IF STEP 1 --
# # Some rows in step 1 have in column 'steps-tres-allocated-#2-type' both energy (11.7k) and node(~100). We will discard rows without energy type.
# mask_inconsistent_jobs_on_step1 = df_flat['steps-tres-allocated-#2-type'] == 'energy'
# df_flat = df_flat[mask_inconsistent_jobs_on_step1]

In [17]:
df_flat_merged = mapping_functions.merge_all_tres_possible(df_flat)   

df_flat_merged = mapping_functions.remove_index_element_from_column_names(df_flat_merged)

df_flat_merged.to_csv(f'{data_dir}jobs_flattened_cleaned.csv', index=False)
df_flat_merged.head(5) 

Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-max-#2-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-max-#3-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-max-#4-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-max-#5-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-min-#2-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-min-#3-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-min-#4-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-min-#5-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-average-#2-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-average-#3-type"
Colpath does not exist! Make sure that it is okay: "steps-tres-consumed-average-#4-type"
Colpath does not exist! Make sure that it is okay: "s

Unnamed: 0,allocation_nodes,constraints,flags,group,nodes,partition,priority,qos,reservation,user,...,steps-tres-allocated-cpu,steps-tres-allocated-mem,steps-tres-allocated-node,tres-allocated-cpu,tres-allocated-mem,tres-allocated-node,tres-requested-cpu,tres-requested-mem,tres-requested-node,tres-requested-billing
0,1,,"[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1788,standard,1417,normal,"{'id': 0, 'name': 0}",kulka,...,24,49152,1,24,49152,1,24,49152,1,24
1,1,,"[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e2281,standard,1434,normal,"{'id': 0, 'name': 0}",kulka,...,24,49152,1,24,49152,1,24,49152,1,24
2,1,,"[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1901,standard,1434,normal,"{'id': 0, 'name': 0}",kulka,...,24,49152,1,24,49152,1,24,49152,1,24
3,1,,"[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1403,standard,1434,normal,"{'id': 0, 'name': 0}",kulka,...,24,49152,1,24,49152,1,24,49152,1,24
4,1,,"[CLEAR_SCHEDULING, STARTED_ON_SCHEDULE]",staff,e1893,standard,1434,normal,"{'id': 0, 'name': 0}",kulka,...,24,49152,1,24,49152,1,24,49152,1,24


In [16]:
print(df_flat_merged.columns.to_numpy())

['allocation_nodes' 'constraints' 'flags' 'group' 'nodes' 'partition'
 'priority' 'qos' 'reservation' 'user' 'working_directory' 'required-CPUs'
 'required-memory' 'state-current' 'state-reason' 'time-elapsed'
 'time-eligible' 'time-end' 'time-start' 'time-submission'
 'time-suspended' 'time-system-seconds' 'time-system-microseconds'
 'time-limit' 'time-total-seconds' 'time-total-microseconds'
 'time-user-seconds' 'time-user-microseconds' 'wckey-wckey' 'wckey-flags'
 'steps-nodes-list' 'steps-nodes-count' 'steps-nodes-range'
 'steps-time-elapsed' 'steps-time-end' 'steps-time-start'
 'steps-time-suspended' 'steps-time-system-seconds'
 'steps-time-system-microseconds' 'steps-time-total-seconds'
 'steps-time-total-microseconds' 'steps-time-user-seconds'
 'steps-time-user-microseconds' 'steps-exit_code-status'
 'steps-exit_code-return_code' 'steps-tasks-count' 'steps-pid'
 'steps-CPU-requested_frequency-min' 'steps-CPU-requested_frequency-max'
 'steps-CPU-governor' 'steps-kill_request_user

In [24]:
check = ['steps-tres-requested-max-cpu-node',
'steps-tres-requested-max-mem-node' ,
'steps-tres-requested-max-energy-node', 
'steps-tres-requested-max-fs-node' ,
'steps-tres-requested-max-vmem-node' ,
'steps-tres-requested-max-pages-node' ,
'steps-tres-requested-min-cpu-node',
'steps-tres-requested-min-mem-node' ,
'steps-tres-requested-min-energy-node', 
'steps-tres-requested-min-fs-node' ,
'steps-tres-requested-min-vmem-node' ,
'steps-tres-requested-min-pages-node' ]

for x in check:
    b = df_flat_merged['steps-tres-requested-min-pages-node'].equals(df_flat_merged[x])
    print(b)

True
True
False
True
True
True
True
True
True
True
True
True
