# Performance Data Lookup

In [1]:
# System dependencies
from os import listdir
import time as clock
from datetime import timedelta
from IPython.display import clear_output

In [2]:
# Data manipulation dependencies
import pandas as pd
import numpy as np
import datetime as dt

In [3]:
# Custom data handling methods
import prep_IO

In [4]:
# Directory to save to
save_dir = '../data/continued/'

# Directory of prev job scans
id_dir = '../data/labels/IDs/'

In [5]:
# Directory of recent saved comet jobs
source_dir = '/oasis/projects/nsf/sys200/stats/xsede_stats/'

In [6]:
contents = listdir(source_dir)
contents

['archive_of_archive',
 'gordon_hostfile_logs',
 'gordon_pickles',
 'comet_accounting',
 'gordon_accounting',
 'comet_pickles',
 'archive',
 '.htaccess',
 'comet_hostfile_logs']

In [7]:
possible = [ source_dir+file_name for file_name in listdir(source_dir) ]

for item in possible:
    try:
        listdir(item)
    except:
        print(item)

/oasis/projects/nsf/sys200/stats/xsede_stats/gordon_hostfile_logs
/oasis/projects/nsf/sys200/stats/xsede_stats/gordon_pickles
/oasis/projects/nsf/sys200/stats/xsede_stats/gordon_accounting
/oasis/projects/nsf/sys200/stats/xsede_stats/comet_pickles
/oasis/projects/nsf/sys200/stats/xsede_stats/.htaccess
/oasis/projects/nsf/sys200/stats/xsede_stats/comet_hostfile_logs


In [8]:
locs = { 'aofa': source_dir+'archive_of_archive',
         'job_info': source_dir+'comet_accounting',
         'arc': source_dir+'archive'
         #'host_info': source_dir+'comet_hostfile_logs',
         #'old_pickles': source_dir+'comet_pickles'
       }

In [9]:
for key,loc in locs.items():
    print(loc)

/oasis/projects/nsf/sys200/stats/xsede_stats/archive_of_archive
/oasis/projects/nsf/sys200/stats/xsede_stats/comet_accounting
/oasis/projects/nsf/sys200/stats/xsede_stats/archive


In [10]:
arc_data = [ locs['arc']+'/'+host_dir+'/'+stamp 
            for host_dir in listdir(locs['arc'])
            for stamp in listdir(locs['arc']+'/'+host_dir)  ]

aofa_data = [ locs['aofa']+'/'+host_dir+'/'+stamp 
            for host_dir in listdir(locs['aofa'])
            for stamp in listdir(locs['aofa']+'/'+host_dir)  ]

In [11]:
# preview
arc_data[0]

'/oasis/projects/nsf/sys200/stats/xsede_stats/archive/comet-10-14.sdsc.edu/1587951665.gz'

In [12]:
acct_info_locs = [ locs['job_info']+'/'+stamp for stamp in listdir(locs['job_info']) ]
dates = [ loc[-14:-4] for loc in acct_info_locs ]

In [13]:
# preview
acct_info_locs[0]

'/oasis/projects/nsf/sys200/stats/xsede_stats/comet_accounting/2019-09-19.txt'

In [14]:
len(arc_data)

299014

In [15]:
len(aofa_data)

1809956

In [16]:
len(acct_info_locs)

1196

### Opt: System Data

In [17]:
### Process available system data ###
#arc_hosts = [ host_to_info_dict( host_file ) for host_file in arc_data ]
#aofa_hosts = [ host_to_info_dict( host_file ) for host_file in aofa_data ]
#acct_dates = job_to_info_dict( acct_info_locs )

# Target(s)

In [18]:
### Process user compiled list of target samples ###
chosen = [('comet-14-72,comet-19-19,comet-28-55', '2020-05-13T02:11:38', '2020-05-13T05:07:34', '33301074'),
         ('comet-30-10', '2020-05-13T10:50:08', '2020-05-13T10:50:12', '33321014'),
          ('comet-06-46,comet-12-52,comet-22-[39,64]', '2020-05-12T06:40:43', '2020-05-13T14:27:44', '33283100'),
          ('comet-21-07', '2020-05-27T08:49:26', '2020-05-28T01:27:49', '33637231'),
          ('comet-22-48', '2020-05-27T09:32:35', '2020-05-28T00:25:06', '33637422'),
         ]

In [26]:
chosen_ids = [ x[3] for x in chosen ]
chosen_ids

['33301074', '33321014', '33283100', '33637231', '33637422']

In [19]:
out = prep_IO.search(mode='l',from_list=chosen)
out

{'Found': ['/oasis/projects/nsf/sys200/stats/xsede_stats/comet_accounting/2020-05-13.txt',
  '/oasis/projects/nsf/sys200/stats/xsede_stats/comet_accounting/2020-05-12.txt'],
 'Not Found': [('comet-14-72', 1589361098, 1589371654, '33301074'),
  ('comet-19-19', 1589361098, 1589371654, '33301074'),
  ('comet-28-55', 1589361098, 1589371654, '33301074'),
  ('comet-06-46', 1589290843, 1589405264, '33283100'),
  ('comet-12-52', 1589290843, 1589405264, '33283100'),
  ('comet-22-39', 1589290843, 1589405264, '33283100'),
  ('comet-22-64', 1589290843, 1589405264, '33283100')],
 'Unable to Search': [('comet-30-10',
   '2020-05-13T10:50:08',
   '2020-05-13T10:50:12',
   '33321014'),
  ('comet-21-07', '2020-05-27T08:49:26', '2020-05-28T01:27:49', '33637231'),
  ('comet-22-48', '2020-05-27T09:32:35', '2020-05-28T00:25:06', '33637422')]}

# Accessing Info

In [27]:
for f in out["Found"]:
    possible = prep_IO.open_txt( f )
    
    for jobid in chosen_ids:
        for chunk in possible:
            if jobid in chunk:
                print(chunk)

33301074|haqi|uic304|2020-05-13T02:11:38|2020-05-13T05:07:34|2020-05-12T02:50:25|compute|1-00:00:00|eq-5MN6-01_sim2|FAILED|3|6|comet-14-72,comet-19-19,comet-28-55

33321014|pschoenh|mia152|2020-05-13T10:50:08|2020-05-13T10:50:12|2020-05-13T10:21:04|gpu-shared|1-00:00:00|Gyroid_unitc/fa98bb55/all/0000/a542ce7d51e6cf29b8f0115a3b9882c8|FAILED|1|6|comet-30-10

33283100|nyporko|jsu101|2020-05-12T06:40:43|2020-05-13T14:27:44|2020-05-11T10:12:17|compute|1-23:59:00|umbrella5|FAILED|4|96|comet-06-46,comet-12-52,comet-22-[39,64]

