# Ad-hoc analysis of log files using pandas

Load the required modules.

In [1]:
from vsc.pbs.job_analysis import PbsLogAnalysis
from datetime import datetime, timedelta
import pandas as pd

Load configuration file, and add entry for log directory.

In [2]:
config_file = '../conf/analysis.json'

Set start and end date for analysis, this will be used to load the appropriate PBS torque log files.

In [3]:
start_date = '20160615'
end_date = '20160707'

Create an analysis, and prepare it for use.

In [4]:
analysis = PbsLogAnalysis(config_file)
analysis.prepare(start_date, end_date)

no log file '/data/leuven/sys/u0065575/pbs_logs/20160707'


Determine the number of log entries, i.e., jobs that either ended, or started withing the given time period, or, put differently, the number of running jobs.

In [5]:
len(analysis.jobs)

7136

Find first job that has an end event.

In [7]:
for job_id, job in analysis.jobs.iteritems():
    if len(job.events) >= 4:
        print job_id
        break

20327734.hpc-p-svcs-10.icts.hpc.kuleuven.be


Print all events for this job.

In [8]:
for event in job.events:
    print event

Q: 2016-07-01 10:10:00
  queue: qdef
Q: 2016-07-01 10:10:00
  queue: q21d
S: 2016-07-01 10:10:35
  Resource_List.partition: thinking
  account: lp_hiv_networks
  group: vsc31147
  ctime: 1467360600
  Resource_List.neednodes: 1:ppn=2:ivybridge
  Resource_List.nodes: 1:ppn=2:ivybridge
  qtime: 1467360600
  Resource_List.walltime: 720000
  jobname: datasetD_run1_010716.pbs
  queue: q21d
  start: 1467360635
  user: vsc31147
  Resource_List.nodect: 1
  owner: vsc31147@hpcblade1-hev6.icts.hpc.kuleuven.be
  Resource_List.pmem: 2621440000
  etime: 1467360600
  exec_host: {'r1i0n4': '4-5'}
E: 2016-07-06 04:22:53
  total_execution_slots: 2
  qtime: 1467360600
  session: 54955
  owner: vsc31147@hpcblade1-hev6.icts.hpc.kuleuven.be
  group: vsc31147
  Exit_status: 0
  start: 1467360635
  etime: 1467360600
  resources_used.cput: 572890
  Resource_List.nodes: 1:ppn=2:ivybridge
  unique_node_count: 1
  exec_host: {'r1i0n4': '4-5'}
  resources_used.mem: 4980457472
  Resource_List.walltime: 720000
  job

Print some job information.

In [9]:
print job.name
print job.user
print job.resources_used
print job.exec_host
print 'nodes={0}:ppn={1}'.format(job.resource_spec('nodes')[0]['nodes'], job.resource_spec('nodes')[0]['ppn'])
print job.exit_status

datasetD_run1_010716.pbs
vsc31147
{'mem': 4980457472, 'vmem': 20068106240, 'cput': 572890, 'energy_used': '0', 'walltime': 411136}
{'r1i0n4': '4-5'}
nodes=1:ppn=2
0


In [10]:
print 'resource specs'
for key, value in job._resource_specs.iteritems():
    print '  {0}: {1}'.format(key, value)
print 'resources used'
for key, value in job._resource_specs.iteritems():
    print '  {0}: {1}'.format(key, value)

resource specs
  qos: normal
  features: []
  neednodes: 1:ppn=2:ivybridge
  partition: thinking
  nodect: 1
  nodes: [{'nodes': 1, 'properties': ['ivybridge'], 'ppn': 2}]
  pmem: 2621440000
  walltime: 720000
resources used
  qos: normal
  features: []
  neednodes: 1:ppn=2:ivybridge
  partition: thinking
  nodect: 1
  nodes: [{'nodes': 1, 'properties': ['ivybridge'], 'ppn': 2}]
  pmem: 2621440000
  walltime: 720000


Compute data frames for jobs and hosts.

In [16]:
jobs = analysis.jobs_df

Show the jobs data frame.

In [5]:
running_jobs_1 = analysis.running_jobs('2016-07-02 20:00:00', '2016-07-02 20:15:00')

In [6]:
len(running_jobs_1)

101

In [8]:
users_1 = set(running_jobs_1.user)

In [9]:
running_jobs_2 = analysis.running_jobs('2016-07-05 16:00:00', '2016-07-02 16:15:00')

In [10]:
users_2 = set(running_jobs_2.user)

In [11]:
users_1.intersection(users_2)

{'vsc30517', 'vsc30767', 'vsc30957', 'vsc31147', 'vsc31514', 'vsc31562'}

In [13]:
running_jobs_1[running_jobs_1.user == 'vsc30767'][['start', 'end', 'nodes', 'ppn', 'exit_status']]

Unnamed: 0,start,end,nodes,ppn,exit_status
19,2016-06-30 11:43:54,2016-07-06 14:58:09,16,24,0
70,2016-07-02 18:57:11,2016-07-03 18:57:24,10,20,-11


In [14]:
running_jobs_2[running_jobs_2.user == 'vsc30767'][['start', 'end', 'nodes', 'ppn', 'exit_status']]

Unnamed: 0,start,end,nodes,ppn,exit_status
5,2016-06-30 11:43:54,2016-07-06 14:58:09,16,24,0


How many jobs were running in the GPU partition on June 7 and 8?

In [17]:
len(jobs[jobs['partition'] == 'gpu'])

122

How many distinct users where active on June 7 and 8?

In [18]:
len(set(jobs['user']))

167

List jobs for a particular user.

In [19]:
jobs[jobs['user'] == 'vsc41730']

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status
6627,2016-06-23 23:47:33,20325962.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc41730,E,thinking,,,24:00:00,2,10,,0
6628,2016-06-23 23:47:33,20325962.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc41730,S,thinking,,,24:00:00,2,10,,0


Show the host data frame.

In [20]:
hosts = analysis.hosts_df

Which jobs ran on node `r3i1n14`?

In [21]:
pd.merge(jobs, hosts[hosts['host'] == 'r3i1n14'],
         how='inner', left_on='job_id', right_on='job_id')

Unnamed: 0,time,job_id,user,state,partition,used_mem,used_walltime,spec_walltime,nodes,ppn,hosts,exit_status,host,cores
0,2016-06-17 11:10:02,20322769.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30002,S,thinking,240.6,01:00:10,01:00:00,50,20,r3i0n14 r3i0n15 r3i0n10 r3i0n11 r3i0n12 r3i0n1...,-11,r3i1n14,0-19
1,2016-06-17 12:11:51,20322769.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30002,E,thinking,240.6,01:00:10,01:00:00,50,20,r3i0n14 r3i0n15 r3i0n10 r3i0n11 r3i0n12 r3i0n1...,-11,r3i1n14,0-19
2,2016-06-17 14:11:27,20322778.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30002,S,thinking,233.0,01:00:12,01:00:00,47,20,r3i1n8 r3i0n14 r3i0n15 r3i0n10 r3i0n11 r3i0n12...,-11,r3i1n14,0-19
3,2016-06-17 15:13:15,20322778.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc30002,E,thinking,233.0,01:00:12,01:00:00,47,20,r3i1n8 r3i0n14 r3i0n15 r3i0n10 r3i0n11 r3i0n12...,-11,r3i1n14,0-19
4,2016-06-17 16:04:23,20322576.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31461,S,thinking,0.0,00:00:16,00:55:00,1,20,r3i1n14,0,r3i1n14,0-19
5,2016-06-17 16:04:41,20322576.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31461,E,thinking,0.0,00:00:16,00:55:00,1,20,r3i1n14,0,r3i1n14,0-19
6,2016-06-19 12:03:10,20323130.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31004,S,thinking,6.1,05:37:29,72:00:00,1,20,r3i1n14,0,r3i1n14,0-19
7,2016-06-19 17:40:41,20323130.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31004,E,thinking,6.1,05:37:29,72:00:00,1,20,r3i1n14,0,r3i1n14,0-19
8,2016-06-19 19:45:24,20323217.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31081,S,thinking,1.6,01:14:49,03:00:00,1,20,r3i1n14,0,r3i1n14,0-19
9,2016-06-19 21:00:14,20323217.hpc-p-svcs-10.icts.hpc.kuleuven.be,vsc31081,E,thinking,1.6,01:14:49,03:00:00,1,20,r3i1n14,0,r3i1n14,0-19
