## This notebooks records how often each module was used based on the job scripts run

Script looks over all the job scripts submitted in `/global/home/groups/scs/slurm/jobscripts` and looks at the module load list.

In [1]:
import os
import sys
import re
import json
import pandas as pd

In [2]:
username = 'sera4415'

### We only need to run the following cells once to load the information and once with each new month

In [3]:
# Helper function that parses through each script to find the loaded module
# Input: text string
# Output: list of strings (modules)
def find_modules(text):
    modules = re.findall(r'\module load[^\n;]*', text) # Returns list ie ['module load python', 'module load vim']
    modules = [m[12:] for m in modules] # Parses out 'module load' -> ['python', 'vim']
    out = []
    for m in modules: 
        out += m.split(" ")
    return out

# Helper function that parses through each script to find the username 
# Input: text string
# Output: string username
def find_users(text):
    user = re.findall(r'\#SBATCH --account=[^\n;]*', text) # Returns list ie ['module load python', 'module load vim']
    if len(user) == 0: 
        return 'None'
    return user[0][18:].strip()

In [4]:
# Maps each month to another dictionary of modules : popularity 
month_modules_dict = {}

In [9]:
scripts_loc = '/global/home/groups/scs/slurm/jobscripts/'

os.chdir(scripts_loc)

year_folders = os.listdir() 

# Creates mappings for each year/month, telling us which days were used     
month_dict = {}
    
for year in year_folders:
    os.chdir(scripts_loc + year)
    
    day_folders = os.listdir()
    for day in day_folders: 
        month = day[:7]
        if month in month_dict.keys(): 
            month_dict[month].append(day)
        else:
            month_dict[month] = [day]

Processes each months worth of scripts and saves it to users scratch file

We will be saving each month in a yyyy-mm.json file, in the format of `{"module" : {"user" : {"num_uses" : #}}}`

### Month by month

In [13]:
errors = []
user_errors = []

error_dest = '/global/scratch/' + username + '/errors.txt'
user_dest = '/global/scratch/' + username + '/user_errors.txt'
   
# Creates error log files if they don't exist
if not os.path.isfile(error_dest):
    !touch $error_dest
    
if not os.path.isfile(user_dest):
    !touch $user_dest
    
month_dict_keys = list(month_dict.keys())
month_dict_keys.sort()

for month in month_dict_keys: 
    # Prints day so we can make sure it's going through all the folders
    print("******", month, "******")
    save_dest = '/global/scratch/' + username + '/' + month + '.json'
    # If the month has already been loaded skips 
    if os.path.isfile(save_dest):
        continue
    day_files = month_dict[month]
    day_files.sort()
    index = 0
    module_dict = {}
    # Goes through each day
    for day in day_files:
        print(day+" ", end="", flush=True)
        index = 0
        dest = scripts_loc + day[:4] + "/" + day
        os.chdir(dest)
        script_files = os.listdir()
        # Then goes through each script per day
        for script in script_files: 
            if index % 100 == 0: 
                print(".", end="", flush=True)
            index += 1
            # Goes through all the scripts in each day 
            try:
                file = open(script, 'r')    
                script_text = file.read()

                # Finds list of all modules and adds them to our dictionay 
                modules = find_modules(script_text)
                user = find_users(script_text)
                if user == 'None':
                    user_errors.append(day + "/" + script)
                for module in modules: 
                    if module in module_dict.keys(): 
                        users_dict = module_dict[module]
                        if user in users_dict.keys():
                            users_dict[user]["num_uses"] += 1
                        else:
                            users_dict[user] = {"num_uses" : 1}
                    else:
                        users_dict = {}
                        users_dict[user] = {"num_uses" : 1}
                        module_dict[module] = users_dict 
            except UnicodeDecodeError:
                print("Unable to load: " + day + "/" + script)
                errors.append(day + "/" + script)
                
        print()
        os.chdir('..')
             
    !touch $save_dest
    with open(save_dest, 'w') as fp:
        json.dump( module_dict, fp)
        
    with open(error_dest, 'a+') as f: 
        f.write(' '.join(errors))
    errors = []
    with open(error_dest, 'a+') as f: 
        f.write(' '.join(user_errors))
    user_errors = []
        
    month_modules_dict[month] = module_dict

****** 2016-09 ******
****** 2016-10 ******
****** 2016-11 ******
****** 2016-12 ******
****** 2017-01 ******
****** 2017-02 ******
****** 2017-03 ******
****** 2017-04 ******
****** 2017-05 ******
****** 2017-06 ******
****** 2017-07 ******
****** 2017-08 ******
****** 2017-09 ******
****** 2017-10 ******
****** 2017-11 ******
****** 2017-12 ******
****** 2018-01 ******
****** 2018-02 ******
2018-02-01 ..

  """


................
2018-02-02 .........................
2018-02-03 ....................................
2018-02-04 .......................................................................
2018-02-05 ...................
2018-02-06 ..............................
2018-02-07 ..................
2018-02-08 ........................
2018-02-09 ..........................
2018-02-10 ............
2018-02-11 .....
2018-02-12 ............
2018-02-13 ...............
2018-02-14 ...........
2018-02-15 ....Unable to load: 2018-02-15/job2162833
.
2018-02-16 .......
2018-02-17 ......
2018-02-18 ....
2018-02-19 .......
2018-02-20 ..........
2018-02-21 ..........
2018-02-22 ................................
2018-02-23 .............
2018-02-24 ....
2018-02-25 ........
2018-02-26 ....................
2018-02-27 ...........................
2018-02-28 ..................
****** 2018-03 ******
2018-03-01 .........................
2018-03-02 ...............
2018-03-03 .......
2018-03-04 ...
2018-03-05 .........
2018-

### All months

In [11]:
errors = []
user_errors = []

error_dest = '/global/scratch/' + username + '/total_errors.txt'
user_dest = '/global/scratch/' + username + '/total_user_errors.txt'
save_dest = '/global/scratch/' + username + '/total.json'   

month_dict_keys = list(month_dict.keys())
month_dict_keys.sort()

module_dict = {}

for month in month_dict_keys: 
    # Prints day so we can make sure it's going through all the folders
    print("******", month, "******")
    day_files = month_dict[month]
    day_files.sort()
    index = 0
    # Goes through each day
    for day in day_files:
        print(day+" ", end="", flush=True)
        index = 0
        os.chdir(day)
        script_files = os.listdir()
        # Then goes through each script per day
        for script in script_files: 
            if index % 100 == 0: 
                print(".", end="", flush=True)
            index += 1
            # Goes through all the scripts in each day 
            try:
                file = open(script, 'r')    
                script_text = file.read()

                # Finds list of all modules and adds them to our dictionay 
                modules = find_modules(script_text)
                user = find_users(script_text)
                if user == 'None':
                    user_errors.append(day + "/" + script)
                for module in modules: 
                    if module in module_dict.keys(): 
                        users_dict = module_dict[module]
                        if user in users_dict.keys():
                            users_dict[user]["num_uses"] += 1
                        else:
                            users_dict[user] = {"num_uses" : 1}
                    else:
                        users_dict = {}
                        users_dict[user] = {"num_uses" : 1}
                        module_dict[module] = users_dict 
            except UnicodeDecodeError:
                print("Unable to load: " + day + "/" + script)
                errors.append(day + "/" + script)
                
        print()
        os.chdir('..')
      
    
    
!touch $save_dest
with open(save_dest, 'w') as fp:
    json.dump( module_dict, fp)

# Creates error log files if they don't exist
if not os.path.isfile(error_dest):
    !touch $error_dest
    with open(error_dest, 'a+') as f: 
        f.write(' '.join(errors))
        
if not os.path.isfile(user_dest):
    !touch $user_dest
    with open(user_dest, 'a+') as f: 
        f.write(' '.join(user_errors))


****** 2016-09 ******
2016-09-15 .
2016-09-21 ....

  """


..............
2016-09-22 ...............................
2016-09-23 .............
2016-09-24 ..
2016-09-25 ...
2016-09-26 .........................
2016-09-27 .........
2016-09-28 .............
2016-09-29 ........
2016-09-30 ...
****** 2016-10 ******
2016-10-01 ..
2016-10-02 ..
2016-10-03 .....................
2016-10-04 ..............
2016-10-05 ....................
2016-10-06 ........
2016-10-07 .......
2016-10-08 ..
2016-10-09 ...
2016-10-10 .........
2016-10-11 ..............
2016-10-12 .........
2016-10-13 ........................................
2016-10-14 .........
2016-10-15 ........
2016-10-16 .....................................................
2016-10-17 .....................
2016-10-18 .....
2016-10-19 ....
2016-10-20 .......
2016-10-21 ........
2016-10-22 ....
2016-10-23 ....
2016-10-24 ................
2016-10-25 .......................................
2016-10-26 ............................
2016-10-27 .......
2016-10-28 ..................................
2016-10-29 ...

2017-11-01 .......................................................
2017-11-02 ....................................
2017-11-03 ................
2017-11-04 ..............
2017-11-05 ........
2017-11-06 ..........................................................
2017-11-07 .........................................................
2017-11-08 ........................Unable to load: 2017-11-08/job1907450
...............
2017-11-09 ..................................
2017-11-10 ...........................................
2017-11-11 ....................................
2017-11-12 .................................
2017-11-13 .....................................
2017-11-14 .............................................
2017-11-15 ..............................................
2017-11-16 .........................................
2017-11-17 .............................
2017-11-18 .................................
2017-11-19 ..................................
2017-11-20 ...........................
2

### Loads saved information and makes visuals from them

In [13]:
# Insert month you want displayed here
month = 'total'
path = '/global/scratch/' + username + '/' + month + '.json'
with open(path, 'r') as fp:
    data = json.load(fp)
    df = pd.DataFrame.from_dict({(i,j): data[i][j] 
                       for i in data.keys() 
                       for j in data[i].keys()},
                   orient='index')
df
    # loop through all modules and graph its user per user 

Unnamed: 0,Unnamed: 1,num_uses
,,29059
,ac_adesnik,224
,ac_cdcal,6
,ac_neander,11
,ac_scsguest,614
,aiolos_savio_normal,2
,co_aiolos,1307
,co_astro,129
,co_bachtrog,19
,co_lsdi\t\t#Repository to be charged to: co_lsdi,5
