In [1]:
import re
import csv
import datetime

In [2]:
result_dir = 'results/slurmsimcont/'
test_name = 'SAF_1641078000/'
batch_name = 'dtstart_50_1/'

In [3]:
# read log file
log_data = {}
with open(result_dir+test_name+batch_name+'slurmctld.log', 'r') as file:
    for line in file:
        if '_slurm_rpc_submit_batch_job' in line:
            # get JobId & InitPrio
            jobid_match = re.search(r'JobId=(\d+)', line)
            initprio_match = re.search(r'InitPrio=(\d+)', line)

            if jobid_match and initprio_match:
                jobid = jobid_match.group(1)
                initprio = initprio_match.group(1)
                log_data[jobid] = {'InitPrio': initprio, 'SCI': None}  # 先存入InitPrio

        elif 'actual sci' in line:
            # get SCI 
            jobid_match = re.search(r'JobId=(\d+)', line)
            sci_match = re.search(r'actual sci is (\d+\.\d+)', line)

            if jobid_match and sci_match:
                jobid = jobid_match.group(1)
                sci = sci_match.group(1)

                if jobid in log_data:
                    log_data[jobid]['SCI'] = sci
                else:
                    log_data[jobid] = {'InitPrio': None, 'SCI': sci}

In [4]:
log_data

{'1001': {'InitPrio': '50000', 'SCI': '1441.758702'},
 '1002': {'InitPrio': '50000', 'SCI': '120.135559'},
 '1003': {'InitPrio': '50000', 'SCI': '120.135559'},
 '1004': {'InitPrio': '50000', 'SCI': '1441.758702'},
 '1005': {'InitPrio': '7142', 'SCI': '1441.758702'},
 '1006': {'InitPrio': '50000', 'SCI': '1441.758702'},
 '1007': {'InitPrio': '14285', 'SCI': '2883.506702'},
 '1008': {'InitPrio': '14285', 'SCI': '1441.758702'},
 '1009': {'InitPrio': '50000', 'SCI': '11533.994702'},
 '1010': {'InitPrio': '7142', 'SCI': '1441.758702'},
 '1011': {'InitPrio': '14285', 'SCI': '120.167450'},
 '1012': {'InitPrio': '7142', 'SCI': '120.135559'},
 '1013': {'InitPrio': '50000', 'SCI': '120.135559'},
 '1014': {'InitPrio': '7142', 'SCI': '1441.758702'},
 '1015': {'InitPrio': '21428', 'SCI': '720.872018'},
 '1016': {'InitPrio': '21428', 'SCI': '240.349566'},
 '1017': {'InitPrio': '21428', 'SCI': '1441.758702'},
 '1018': {'InitPrio': '7142', 'SCI': '1441.758702'},
 '1019': {'InitPrio': '35714', 'SCI': N

In [5]:
# read slurm_acct.out
slurm_data = []
with open(result_dir+test_name+batch_name+'slurm_acct.out', 'r') as file:
    reader = csv.DictReader(file, delimiter='|')
    for row in reader:
        slurm_data.append(row)

In [6]:
slurm_data

[{'JobID': '1001',
  'JobIDRaw': '1001',
  'Cluster': 'micro',
  'Partition': 'normal',
  'Account': 'account2',
  'Group': 'slurm',
  'GID': '1000',
  'User': 'user5',
  'UID': '1005',
  'Submit': '2022-01-01T23:01:05',
  'Eligible': '2022-01-01T23:01:05',
  'Start': '2022-01-01T23:01:05',
  'End': '2022-01-01T23:01:20',
  'Elapsed': '00:00:15',
  'ExitCode': '0:0',
  'State': 'COMPLETED',
  'NNodes': '1',
  'NCPUS': '12',
  'ReqCPUS': '12',
  'ReqMem': '33600M',
  'ReqTRES': 'billing=12,cpu=12,mem=33600M,node=1',
  'Timelimit': '00:07:00',
  'QOS': 'normal',
  'NodeList': 'b1',
  'JobName': 'jobid_1001',
  'NTasks': ''},
 {'JobID': '1002',
  'JobIDRaw': '1002',
  'Cluster': 'micro',
  'Partition': 'normal',
  'Account': 'account1',
  'Group': 'slurm',
  'GID': '1000',
  'User': 'user1',
  'UID': '1001',
  'Submit': '2022-01-01T23:01:54',
  'Eligible': '2022-01-01T23:01:54',
  'Start': '2022-01-01T23:01:54',
  'End': '2022-01-01T23:21:55',
  'Elapsed': '00:20:01',
  'ExitCode': '0:0',

In [7]:
# merge 
merged_data = []
for entry in slurm_data:
    jobid = entry['JobID']
    if jobid in log_data:
        entry['actual_sci'] = log_data[jobid]['SCI']
        entry['priority'] = log_data[jobid]['InitPrio']
    else:
        entry['actual_sci'] = None
    merged_data.append(entry)


In [8]:
merged_data

[{'JobID': '1001',
  'JobIDRaw': '1001',
  'Cluster': 'micro',
  'Partition': 'normal',
  'Account': 'account2',
  'Group': 'slurm',
  'GID': '1000',
  'User': 'user5',
  'UID': '1005',
  'Submit': '2022-01-01T23:01:05',
  'Eligible': '2022-01-01T23:01:05',
  'Start': '2022-01-01T23:01:05',
  'End': '2022-01-01T23:01:20',
  'Elapsed': '00:00:15',
  'ExitCode': '0:0',
  'State': 'COMPLETED',
  'NNodes': '1',
  'NCPUS': '12',
  'ReqCPUS': '12',
  'ReqMem': '33600M',
  'ReqTRES': 'billing=12,cpu=12,mem=33600M,node=1',
  'Timelimit': '00:07:00',
  'QOS': 'normal',
  'NodeList': 'b1',
  'JobName': 'jobid_1001',
  'NTasks': '',
  'actual_sci': '1441.758702',
  'priority': '50000'},
 {'JobID': '1002',
  'JobIDRaw': '1002',
  'Cluster': 'micro',
  'Partition': 'normal',
  'Account': 'account1',
  'Group': 'slurm',
  'GID': '1000',
  'User': 'user1',
  'UID': '1001',
  'Submit': '2022-01-01T23:01:54',
  'Eligible': '2022-01-01T23:01:54',
  'Start': '2022-01-01T23:01:54',
  'End': '2022-01-01T23

In [9]:
time_format = "%Y-%m-%dT%H:%M:%S"

filtered_data = []
for entry in merged_data:
    filtered_entry = {
        'JobID': entry['JobID'],
        'Submit': entry['Submit'],
        'SubmitTime': int(datetime.datetime.strptime(entry['Submit'], time_format).timestamp()),
        'Start': entry['Start'],
        'StartTime': int(datetime.datetime.strptime(entry['Start'], time_format).timestamp()),
        'End': entry['End'],
        'EndTime': int(datetime.datetime.strptime(entry['End'], time_format).timestamp()),
        'SCI': entry['actual_sci']
    }
    filtered_data.append(filtered_entry)

In [10]:
# write into file
with open(result_dir+test_name+batch_name+'new_result.out', 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=filtered_data[0].keys(), delimiter='|')
    writer.writeheader()
    for row in filtered_data:
        writer.writerow(row)