# Ad-hoc analysis of log files using pandas

Load the required modules.

In [23]:
from vsc.pbs.log import PbsLogParser
from vsc.pbs.job_analysis import jobs_to_dataframes
import json
from datetime import datetime, timedelta
import pandas as pd

Load configuration file, and add entry for log directory.

In [2]:
json_file_name = '../conf/config.json'
with open(json_file_name, 'r') as json_file:
    config = json.load(json_file)
config['log_dir'] = '../tests/test/data'

Set start and end date for analysis, this will be used to load the appropriate PBS torque log files.

In [3]:
start_date = '20160607'
end_date = '20160608'

Create a parser, and parse the PBS torque log files.

In [4]:
log_parser = PbsLogParser(config)
log_parser.parse(start_date, end_date)

Determine the number of log entries, i.e., jobs that either ended, or started withing the given time period, or, put differently, the number of running jobs.

In [5]:
len(log_parser._jobs)

902

Find first job that has an end event.

In [6]:
for job_id, job in log_parser.jobs.iteritems():
    if len(job.events) >= 4:
        print job_id
        break

20319759.hpc-p-svcs-10.icts.hpc.kuleuven.be


Print all events for this job.

In [7]:
for event in job.events:
    print event

Q: 2016-06-08 11:40:08
  queue: qdef
Q: 2016-06-08 11:40:08
  queue: q24h
S: 2016-06-08 11:40:38
  Resource_List.partition: thinking
  Resource_List.vmem: 62914560000
  account: lp_3E110810
  group: vsc30777
  ctime: 1465378808
  Resource_List.neednodes: 2:ppn=24:haswell
  Resource_List.mem: 67108864000
  qtime: 1465378808
  Resource_List.nodes: 2:ppn=24:haswell
  Resource_List.feature: mem128
  jobname: alpha-o_E_166
  queue: q24h
  start: 1465378838
  Resource_List.walltime: 72000
  user: vsc30777
  Resource_List.nodect: 2
  owner: vsc30777@hpcblade1-hev7.icts.hpc.kuleuven.be
  Resource_List.pmem: 2621440000
  etime: 1465378808
  exec_host: {'r12i1n16': '0-23', 'r12i1n15': '0-23'}
E: 2016-06-08 14:59:28
  total_execution_slots: 48
  qtime: 1465378808
  Resource_List.feature: mem128
  session: 1619
  owner: vsc30777@hpcblade1-hev7.icts.hpc.kuleuven.be
  group: vsc30777
  Exit_status: 113
  Resource_List.mem: 67108864000
  etime: 1465378808
  resources_used.cput: 23523
  Resource_List.

Print some job information.

In [8]:
print job.name
print job.user
print job.resources_used
print job.exec_host
print 'nodes={0}:ppn={1}'.format(job.resource_spec('nodes')[0]['nodes'], job.resource_spec('nodes')[0]['ppn'])
print job.exit_status

alpha-o_E_166
vsc30777
{'mem': 28601126912, 'vmem': 48246874112, 'cput': 23523, 'energy_used': '0', 'walltime': 11924}
{'r12i1n16': '0-23', 'r12i1n15': '0-23'}
nodes=2:ppn=24
113


In [22]:
print 'resource specs'
for key, value in job._resource_specs.iteritems():
    print '  {0}: {1}'.format(key, value)
print 'resources used'
for key, value in job._resource_specs.iteritems():
    print '  {0}: {1}'.format(key, value)

resource specs
  qos: normal
  features: ['mem128']
  mem: 67108864000
  neednodes: 2:ppn=24:haswell
  partition: thinking
  vmem: 62914560000
  nodect: 2
  nodes: [{'nodes': 2, 'properties': ['haswell'], 'ppn': 24}]
  pmem: 2621440000
  walltime: 72000
resources used
  qos: normal
  features: ['mem128']
  mem: 67108864000
  neednodes: 2:ppn=24:haswell
  partition: thinking
  vmem: 62914560000
  nodect: 2
  nodes: [{'nodes': 2, 'properties': ['haswell'], 'ppn': 24}]
  pmem: 2621440000
  walltime: 72000


Compute data frames for jobs and hosts.

In [11]:
df_jobs, df_hosts = jobs_to_dataframes(log_parser.jobs)

Show the jobs data frame.

In [12]:
df_jobs

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status
0,2016-06-07 00:02:30,20319236.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31448,E,thinking,,,24:00:00,1,20,r2i2n6,255
1,2016-06-07 00:09:30,20318872.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30777,E,thinking,8.1,14:00:22,14:00:00,4,24,r4i2n14 r4i2n13 r4i2n12 r4i2n10,-11
2,2016-06-07 00:10:01,20319237.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31448,E,thinking,,00:00:42,24:00:00,1,20,r2i2n6,0
3,2016-06-07 00:12:52,20318874.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30777,E,thinking,0.0,14:00:09,14:00:00,4,24,r4i2n15 r5i2n5 r5i2n3 r5i2n1,-11
4,2016-06-07 00:19:43,20319238.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31448,E,thinking,0.5,00:05:38,24:00:00,1,20,r2i2n6,0
5,2016-06-07 00:24:43,20318841.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30714,E,thinking,11.0,15:26:35,24:00:00,1,20,r1i0n13,0
6,2016-06-07 00:26:14,20319239.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30703,E,thinking,,,01:00:00,2,20,,0
7,2016-06-07 00:28:39,20319240.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30703,E,thinking,,00:00:03,00:10:00,2,20,r1i0n13 r2i2n6,0
8,2016-06-07 00:29:14,20319241.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31448,E,thinking,0.0,00:00:10,24:00:00,1,20,r2i2n7,143
9,2016-06-07 00:31:32,20319242.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30703,E,thinking,,00:00:01,00:10:00,2,20,r1i0n13 r2i2n6,0


Filter for jobs that were running from 12:00 till 15:00 on Jun 7.

In [13]:
df_jobs[('2016-06-07 12:00:00' <= df_jobs['time']) & (df_jobs['time'] <= '2016-06-07 15:00:00')]

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status
176,2016-06-07 12:07:05,20319391.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31173,E,thinking,15.5,00:45:37,40:00:00,1,20,r1i0n8,1
177,2016-06-07 12:14:14,20319381.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30579,E,thinking,2.7,01:03:05,20:00:00,3,20,r1i1n7 r1i0n15 r1i0n7,0
178,2016-06-07 12:15:40,20319399.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31366,E,thinking,3.4,00:36:25,02:00:00,5,20,r2i2n11 r2i2n10 r2i2n7 r2i2n9 r2i2n8,0
179,2016-06-07 12:27:32,20318876.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30507,E,thinking,19.9,26:13:02,72:00:00,1,24,r5i2n8,0
180,2016-06-07 12:30:10,20319347.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30517,E,thinking,3.9,02:19:23,168:00:00,1,20,r1i2n3,0
181,2016-06-07 12:30:33,20319408.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31461,E,thinking,0.0,00:06:05,03:00:00,1,1,r1i0n7,0
182,2016-06-07 12:40:47,20317312.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30737,E,thinking,1.0,121:03:14,300:00:00,1,2,r1i1n3,0
183,2016-06-07 12:46:03,20319411.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31159,E,thinking,,,10:00:00,1,1,r1i2n10,-1
184,2016-06-07 12:49:37,20319412.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31159,E,thinking,,,10:00:00,1,1,r1i2n10,-1
185,2016-06-07 12:49:50,20319352.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31568,E,thinking,0.9,02:34:56,72:00:00,1,20,r2i1n1,0


How many jobs were running in the GPU partition on June 7 and 8?

In [14]:
len(df_jobs[df_jobs['partition'] == 'gpu'])

25

How many distinct users where active on June 7 and 8?

In [15]:
len(set(df_jobs['user']))

67

List jobs for a particular user.

In [16]:
df_jobs[df_jobs['user'] == 'vsc41730']

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status
372,2016-06-07 22:17:58,20319616.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc41730,E,thinking,6.3,00:00:24,72:00:00,2,10,r1i1n14,271


Show the host data frame.

In [17]:
df_hosts

Unnamed: 0,job_id,host,cores
0,20319208.hpc-p-svcs-10.icts.hpc.kuleuven.be,r3i0n11,0-19
1,20319953.hpc-p-svcs-10.icts.hpc.kuleuven.be,r11i2n16,0-23
2,20319953.hpc-p-svcs-10.icts.hpc.kuleuven.be,r12i0n4,0-23
3,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i1n15,0-19
4,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i1n16,0-19
5,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i2n2,0-19
6,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i2n3,0-19
7,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i2n4,0-19
8,20319234.hpc-p-svcs-10.icts.hpc.kuleuven.be,r2i2n5,0-19
9,20319759.hpc-p-svcs-10.icts.hpc.kuleuven.be,r12i1n15,0-23


Which jobs ran on node `r3i1n14`?

In [26]:
pd.merge(df_jobs, df_hosts[df_hosts['host'] == 'r3i1n14'],
         how='inner', left_on='job_id', right_on='job_id')

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status,host,cores
0,2016-06-08 12:05:18,20319463.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31004,E,thinking,39.3,21:18:12,72:00:00,1,20,r3i1n14,0,r3i1n14,0-19
1,2016-06-08 12:19:21,20319805.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30772,E,thinking,1.4,00:01:40,72:00:00,1,20,r3i1n14,0,r3i1n14,0-19
2,2016-06-08 18:20:05,20319982.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,1.8,00:00:27,24:00:00,16,20,r3i2n16 r3i2n15 r4i1n4 r4i1n10 r4i1n5 r4i1n8 r...,271,r3i1n14,0-19
3,2016-06-08 18:21:43,20319989.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,,00:00:44,24:00:00,16,20,r3i2n16 r3i2n15 r4i1n4 r4i1n10 r4i1n5 r4i1n8 r...,0,r3i1n14,0-19
4,2016-06-08 18:22:37,20319991.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,1.0,00:00:47,24:00:00,16,20,r3i2n16 r3i2n15 r4i1n4 r4i1n10 r4i1n5 r4i1n8 r...,0,r3i1n14,0-19
5,2016-06-08 18:23:53,20319993.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,0.3,00:00:42,24:00:00,16,20,r3i2n16 r3i2n15 r4i1n4 r4i1n10 r4i1n5 r4i1n8 r...,0,r3i1n14,0-19
6,2016-06-08 18:25:15,20319995.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,1.1,00:00:43,24:00:00,16,20,r3i2n16 r4i1n8 r3i2n15 r4i1n4 r4i1n5 r4i1n3 r4...,0,r3i1n14,0-19
7,2016-06-08 22:27:05,20319999.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,E,thinking,3.4,03:47:18,24:00:00,16,20,r3i2n16 r3i2n15 r4i1n4 r2i0n16 r3i1n8 r4i1n5 r...,0,r3i1n14,0-19
8,2016-06-08 22:27:34,20320003.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30627,S,thinking,,,24:00:00,16,20,r3i2n16 r3i2n15 r3i1n6 r4i1n4 r2i0n16 r3i1n8 r...,-1024,r3i1n14,0-19
