In [60]:
import pandas as pd
import os

os.chdir('/global/u1/j/joeschm')
os.system("sacct -u joeschm -X --format=jobid,jobname,state,start,end,timelimit,elapsed -S 7/01/21-00:00:00 -E 7/25/21-00:00:00 -p > test.txt")

0

## Add Job Info to Dataframe

In [61]:
file = open('test.txt', 'r')
read_content = file.read()
chunks = read_content.split('\n')

header = chunks[0].split('|')[:-1]           #header column for dataframe and drop empty '' at the end

jobs_df = pd.DataFrame(columns = header)



for string in chunks[1:-1]:  #run through list of entries omitting first entry which is the header and final entry which is blank
    entry = string.split('|')[:-1]           #drop empty '' at the end
    
    job_entry = pd.DataFrame([entry], columns = header) #create single job entry
    jobs_df = jobs_df.append(job_entry, ignore_index = True)  #append job entry to job dataframe

print(jobs_df)

       JobID JobName      State                Start                  End  \
0   43746296    GENE  COMPLETED  2021-07-01T02:36:53  2021-07-01T06:58:33   
1   43746309    GENE    TIMEOUT  2021-07-01T02:36:53  2021-07-01T12:37:07   
2   43747959    GENE  COMPLETED  2021-07-01T03:08:05  2021-07-01T03:17:29   
3   43747982    GENE  COMPLETED  2021-07-01T03:08:05  2021-07-01T03:31:54   
4   43748055    GENE  COMPLETED  2021-07-01T03:08:28  2021-07-01T03:43:35   
..       ...     ...        ...                  ...                  ...   
66  44498118    GENE  COMPLETED  2021-07-21T12:03:12  2021-07-21T12:09:09   
67  44499189    GENE  COMPLETED  2021-07-21T12:22:33  2021-07-21T12:27:29   
68  44499843    GENE  COMPLETED  2021-07-21T12:40:27  2021-07-21T12:55:47   
69  44500672    GENE  COMPLETED  2021-07-21T13:11:02  2021-07-21T13:25:46   
70  44502193    GENE  COMPLETED  2021-07-21T13:50:28  2021-07-21T14:04:07   

   Timelimit   Elapsed  
0   10:00:00  04:21:40  
1   10:00:00  10:00:14  


## Associate JobID with Output Filepath & Problem

In [62]:
GENE_path = '/global/homes/j/joeschm/GENE'
os.chdir(GENE_path)

col = ["JobID","Path","Problem"]
output_path_df = pd.jobs = pd.DataFrame(columns = col)

for filename in os.listdir(os.getcwd()):
    if filename.startswith("prob_"):
        prob_path = GENE_path + "/" + filename
        os.chdir(prob_path) #change into GENE prob directory to search for GENE.XXXX.out files
        
        #print("")
        #print(filename)
        
        for GENEfile in os.listdir(os.getcwd()):
            #scan through all files in GENE prob directory
            
            path_found = False           #reset filepath data dump for every new GENE.XXXX.out file
            
            if GENEfile.startswith("GENE.") and GENEfile.endswith("out"):
                jobID = GENEfile[5:-4]        #get XXXX jobID in GENE.XXXX.out file

                
                file = open(GENEfile, 'r')    #open text file
                text = file.read()            #read text file
                line_list = text.split('\n')  #split into list using newline

                for line in line_list:
                    line = line.replace(" ","")     #remove spaces from within lines
                    if 'SCANDIR' in line:           #if a line has 
                        for i in range(len(line)):  #scan through line character-by-character
                            if line[i] == '=':
                                data_path = line[i+1:]  #add /filepath 
                                path_found = True
                                break    #once the filepath is found exit the search   
                        break            #break out of the list line search if 'SCANDIR' is present
                

                entry = pd.DataFrame([[jobID, data_path, filename]], columns = col) #create single job entry
                output_path_df = output_path_df.append(entry, ignore_index = True)  #append job entry to job dataframe
            
            else: 
                data_path = False  #if no filepath is found just set it to False
                
                

## Add path to JobID and drop any empty paths and problems

In [63]:
jobs_df = output_path_df.set_index('JobID').combine_first(jobs_df.drop_duplicates().set_index('JobID')).reset_index() #fill in n0_global values according to kymin values

jobs_df = jobs_df.dropna(subset=['JobName'])        #if the job has no JobName (it did not execute) delete that row

jobs_df["Path"].fillna(False, inplace = True)     #Replace blank paths with False
jobs_df["Problem"].fillna(False, inplace = True)  #Replace blank problem name with False

In [64]:
os.chdir('/global/homes/j/joeschm/Notebooks/JobID_Generator')
jobs_df.to_csv('test.csv', index=False)


## Read Parameter Files and Get Important Parameters

In [65]:
def get_pars(filename):
    #This function gets the reference values from the parameters file and outputs the reference omega value, istep_field, and n0_global
    
    suffix = filename[-4:]     #get 00xx suffix for omega file
    par = Parameters()
    par.Read_Pars('parameters_'+suffix)  #read parameter file

    pars = par.pardict                   #create a dictionary of values
       
    return pars, suffix

In [66]:
import sys
sys.path.insert(1, '/global/homes/j/joeschm/IFS_scripts')
from genetools import Parameters

col = ['nz0', 'kymin', 'n0_global', 'istep_field', 'timelim'] #parameters to look for and append

for path in jobs_df["Path"]:
    print(path)
    
    if path == False: #if no filepath exists
        print('No filepath found')
        
    elif os.path.exists(path): #if path exists then cd into it
        os.chdir(path)

        for filename in os.listdir(os.getcwd()):
            if filename.startswith("parameters_"):
                param_list = []     #reset list
                pars, suffix = get_pars(filename)
                
                for key in col:       #cycle through keys in col list that are of interest
                    if key in pars:   #if the key exists then append it to the list
                        param_list.append(pars[key])
                    else:
                        param_list.append(False)

                print(param_list)
        
    else: #if filepath exists but there are no contents inside
        print('Filepath is empty!')

        
#https://stackoverflow.com/questions/68231398/create-and-fill-duplicate-dataframe-values-with-lists/68238253#68238253

False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
False
No filepath found
/global/cscratch1/sd/joeschm/ND_scans/scanfiles0011
[512, 90.0, 20643, 10, 86000]
[512, 9.0, 2064, 10, 86000]
[512, 90.0, 20643, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 9.0, 2064, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 90.0, 20643, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 45.0, 10322, 10, 86000]
[512, 9.0, 2064, 10, 86000]
[512, 90.0, 20643, 10, 86000]
[512, 