In [1]:
import os
import pandas as pd
from itables import init_notebook_mode
import yaml
import time
from datetime import datetime


from IPython.display import display, clear_output



STATUS_DIR = "/home/hpinkard_waller/GitRepos/microscoBayes/experiments/config_files/"
SAVING_DIR_ROOT = "/home/hpinkard_waller/models/"


names = []

complete = os.listdir(STATUS_DIR + 'complete')
training = os.listdir(STATUS_DIR + 'training')
pending = os.listdir(STATUS_DIR + 'pending')
staging = os.listdir(STATUS_DIR + 'staging')
abandoned = os.listdir(STATUS_DIR + 'abandoned')


config_files = complete + training + pending + staging + abandoned
statuses = len(complete) * ["complete"] + len(training) * ["training"] + len(pending) * ["pending"] + len(staging) * ["staging"] + len(abandoned) * ["abandoned"] 


# Read stuff from its config file
tensorboard_dirs = []
config_paths = []
dates = []
elapsed_times = []
attempts = []

# hyperparameters
all_marker_training = []
arch = []
batch_size = []
density_output = []
learning_rate = []
max_epochs = []
num_mixture_components = []
overshoot_epochs = []
single_marker_early_stopping = []
single_marker_training = []


for config_file, status in zip(config_files, statuses):
    config_file_path = STATUS_DIR + status + '/' + config_file
    with open(config_file_path, "r") as stream:
        config = yaml.safe_load(stream)
        m_time = os.path.getmtime(config_file_path)
        
    if 'training' not in config or 'start_date' not in config['training']:
        dates.append(datetime.fromtimestamp(m_time).strftime("%Y-%m-%d"))
    else:
        dates.append(config['training']['start_date'])
    if 'training' not in config or 'attempt_number' not in config['training']:
        attempts.append('NA')
    else:
        attempts.append(int(config['training']['attempt_number']))

    if 'training' in config:
        if 'tensorboard_dir' not in config['training']:
            tensorboard_dirs.append('NA')
        else:
            tensorboard_dirs.append(config['training']['tensorboard_dir'])
    else:
        tensorboard_dirs.append('pending')
    if status == 'complete':
        if 'elapsed' not in config['training']:
            elapsed_times.append(None)
        else:
            elapsed_times.append(config['training']['elapsed'])
    elif 'training' not in config:
        elapsed_times.append(None)
    else:
        elapsed_times.append(config['training']['elapsed'])

    
    # hyperparameters
    all_marker_training.append(config['hyperparameters']['all_marker_training'])
    arch.append(config['hyperparameters']['arch'])
    batch_size.append(config['hyperparameters']['batch_size'])
    density_output.append(config['hyperparameters']['density_output'])
    learning_rate.append(config['hyperparameters']['learning_rate'])
    max_epochs.append(config['hyperparameters']['max_epochs'])
    num_mixture_components.append(config['hyperparameters']['num_mixture_components'])
    overshoot_epochs.append(config['hyperparameters']['overshoot_epochs'])
    single_marker_early_stopping.append(config['hyperparameters']['single_marker_early_stopping'])
    single_marker_training.append(config['hyperparameters']['single_marker_training'])


    if (elapsed_times[-1] is not None):
        # format date string
        days = f"{int(elapsed_times[-1] // (24 * 60**2))} days  "
        hours, remainder = divmod(elapsed_times[-1], 3600)
        minutes, seconds = divmod(remainder, 60)
        formatted_time = '{}:{:02d}:{:02d}'.format(int(hours), int(minutes), int(seconds))
        elapsed_times[-1] = formatted_time

    config_paths.append(STATUS_DIR.replace(os.path.expanduser('~'), '') + status + '/' + config_file)


d = {
    "date": dates,  
    "name": [cf.replace('_', " ") for cf in config_files],

    # "all_marker_training": all_marker_training,
    # "arch": arch,
    # "batch size": batch_size,
    # "density output": density_output,
    # "learning rate": learning_rate,
    # "max epochs": max_epochs,
    # "num mixture components": num_mixture_components,
    # "overshoot_epochs": overshoot_epochs,
    # "single marker early stopping": single_marker_early_stopping,
    # "single marker training": single_marker_training,

     "status": statuses, 
     "attempts": attempts, 
      "elapsed_time": elapsed_times,
    #  "config": config_paths, 
     "tensorboard": tensorboard_dirs}
df = pd.DataFrame(data=d)
# df.style.set_properties(subset=['config_files'], **{'min-width': '500px'})
pd.options.display.min_rows = 100



df['status'] = pd.Categorical(df['status'], ["complete", "training", "pending", "staging", "abandoned"])
df['date'] = pd.to_datetime(df['date'])
# dont show time, only show the date
df['date'] = df['date'].dt.date
df = df.sort_values(by=['status', 'date'])

def stylize(x):
    if x == 'complete':
        return 'color: green'
    elif x == 'training':
        return 'color: orange'
    elif x == 'pending':
        return 'color: blue'
    elif x == 'staging':
        return 'color: lightblue'
    elif x == 'abandoned':
        return 'color: gray'
    return None
    

df = df.style.applymap(stylize)  

    
display(df)


Unnamed: 0,date,name,status,attempts,elapsed_time,tensorboard
0,2023-03-01,marginals 1d scalar scalar hparams CD3 finetune.yaml,training,4.0,8:30:24,/home/hpinkard_waller/models/marginals_1d_scalar_scalar_hparams_CD3_finetune/tensorboard/
1,2023-03-01,marginals 1d scalar scalar hparams CD3 only.yaml,training,2.0,12:55:08,/home/hpinkard_waller/models/marginals_1d_scalar_scalar_hparams_CD3_only/tensorboard/
2,2023-03-01,marginals 1d scalar density hparams CD3 finetune.yaml,training,4.0,8:24:32,/home/hpinkard_waller/models/marginals_1d_scalar_density_hparams_CD3_finetune/tensorboard/
3,2023-03-01,marginals 1d scalar scalar hparams CD3 early stopping.yaml,training,4.0,17:50:32,/home/hpinkard_waller/models/marginals_1d_scalar_scalar_hparams_CD3_early_stopping/tensorboard/
4,2023-03-01,marginals 1d scalar scalar hparams all markers.yaml,pending,,,pending
5,2023-03-01,marginals 1d scalar density hparams CD3 early stopping.yaml,pending,,,pending
6,2023-03-01,marginals 1d scalar density hparams CD3 only.yaml,pending,,,pending
7,2023-03-01,Different input contrasts DF 50 Bottom trial2.yaml,staging,,,pending
8,2023-03-01,Different input contrasts All channels trial2.yaml,staging,,,pending
9,2023-03-01,Different input contrasts DPC raw and solved trial4.yaml,staging,,,pending


In [14]:
log

NameError: name 'log' is not defined