In [2]:
import os
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator


In [15]:

# Define the directory path
directory = "/net/projects/cmap/model-outputs/"

# Create an empty list to store data
data = []

# Iterate through all folders in the directory
for root, dirs, files in os.walk(directory):
    for file in files:
        if file == "dsi.py":
            # Open and read parameters from dsi.py
            dsi_file_path = os.path.join(root, file)
            with open(dsi_file_path, 'r') as dsi_file:
                dsi_content = dsi_file.read()
            
            # Execute the Python script in dsi.py to define variables
            exec(dsi_content, globals())  # Execute the Python script with global namespace
            
            dsi_params = {
                'Folder_Title': os.path.basename(root),
                'MODEL': MODEL,
                'BACKBONE': BACKBONE,
                'WEIGHTS': WEIGHTS,
                'BATCH_SIZE': BATCH_SIZE,
                'PATCH_SIZE': PATCH_SIZE,
                'NUM_CLASSES': NUM_CLASSES,
                'LR': LR,
                'NUM_WORKERS': NUM_WORKERS,
                'EPOCHS': EPOCHS,
                'IGNORE_INDEX': IGNORE_INDEX
            }
             # Append parameters to the data list
            data.append(dsi_params)

        if "events.out.tfevents" in file:
            # Open and read statistics from events.out.tfevents
            events_file_path = os.path.join(root, file)
            event_acc = EventAccumulator(events_file_path)
            event_acc.Reload()
            # Get the final statistics
            # Get all available tags
            available_tags = event_acc.Tags()
            
            # Loop through available tags and extract data
            for tag in available_tags['scalars']:
                scalar_events = event_acc.Scalars(tag)
                for event in scalar_events:
                    data.append({'Folder_Title': os.path.basename(root),
                                 'Tag': tag,
                                 'Step': event.step,
                                 'Value': event.value})
# Convert data to DataFrame
df = pd.DataFrame(data)

# Display DataFrame
print(df)


        Folder_Title MODEL  BACKBONE WEIGHTS  BATCH_SIZE  PATCH_SIZE  \
0      VectorDataset  unet  resnet50    True        32.0       512.0   
1    20240218-172022  unet  resnet50    True        32.0       512.0   
2    20240218-172022   NaN       NaN     NaN         NaN         NaN   
3    20240218-172022   NaN       NaN     NaN         NaN         NaN   
4    20240218-172022   NaN       NaN     NaN         NaN         NaN   
..               ...   ...       ...     ...         ...         ...   
766  20240218-154842  unet  resnet50   False        32.0       512.0   
767  20240213-111329   NaN       NaN     NaN         NaN         NaN   
768  20240213-111329   NaN       NaN     NaN         NaN         NaN   
769  20240213-111329   NaN       NaN     NaN         NaN         NaN   
770  20240213-111329   NaN       NaN     NaN         NaN         NaN   

     NUM_CLASSES     LR  NUM_WORKERS  EPOCHS  IGNORE_INDEX           Tag  \
0            5.0  0.001          8.0    11.0           0.0 

In [21]:
import pandas as pd

# Assuming df is your DataFrame containing the extracted data
# First, group the DataFrame by 'Folder_Title' and then use transform to duplicate the values
df[['MODEL', 'BACKBONE', 'WEIGHTS', 'BATCH_SIZE', 'PATCH_SIZE', 'NUM_CLASSES', 'LR', 
    'NUM_WORKERS', 'EPOCHS', 'IGNORE_INDEX']] = \
    df.groupby('Folder_Title')[['MODEL', 'BACKBONE', 'WEIGHTS', 'BATCH_SIZE', 'PATCH_SIZE', 
                                 'NUM_CLASSES', 'LR', 'NUM_WORKERS', 'EPOCHS', 'IGNORE_INDEX']] \
        .transform('first')

# Drop duplicates to keep only unique rows
df = df.drop_duplicates()

# Display DataFrame
df


Unnamed: 0,Folder_Title,MODEL,BACKBONE,WEIGHTS,BATCH_SIZE,PATCH_SIZE,NUM_CLASSES,LR,NUM_WORKERS,EPOCHS,IGNORE_INDEX,Tag,Step,Value
0,VectorDataset,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,11.0,0.0,,,
1,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,,,
2,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,1.0,0.314062
3,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,2.0,0.302922
4,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,3.0,0.307505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
766,20240218-154842,unet,resnet50,False,32.0,512.0,5.0,0.001,8.0,2.0,0.0,,,
767,20240213-111329,,,,,,,,,,,Loss/train,1.0,0.324335
768,20240213-111329,,,,,,,,,,,Metric/train,1.0,0.236456
769,20240213-111329,,,,,,,,,,,Loss/test,1.0,0.553813


In [22]:
df[:20]

Unnamed: 0,Folder_Title,MODEL,BACKBONE,WEIGHTS,BATCH_SIZE,PATCH_SIZE,NUM_CLASSES,LR,NUM_WORKERS,EPOCHS,IGNORE_INDEX,Tag,Step,Value
0,VectorDataset,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,11.0,0.0,,,
1,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,,,
2,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,1.0,0.314062
3,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,2.0,0.302922
4,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,3.0,0.307505
5,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,Loss/train,4.0,0.289852
6,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,IoU/train,1.0,0.285215
7,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,IoU/train,2.0,0.362658
8,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,IoU/train,3.0,0.377329
9,20240218-172022,unet,resnet50,True,32.0,512.0,5.0,0.001,8.0,5.0,0.0,IoU/train,4.0,0.359183


In [30]:
df.groupby(["Folder_Title", "Tag"]).last().sort_values("EPOCHS", ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,MODEL,BACKBONE,WEIGHTS,BATCH_SIZE,PATCH_SIZE,NUM_CLASSES,LR,NUM_WORKERS,EPOCHS,IGNORE_INDEX,Step,Value
Folder_Title,Tag,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
sjne-20240215-0,Loss/test,deeplabv3+,resnet50,True,32.0,512.0,5.0,0.001,8.0,50.0,0.0,50.0,0.492339
sjne-20240215-0,Loss/train,deeplabv3+,resnet50,True,32.0,512.0,5.0,0.001,8.0,50.0,0.0,50.0,0.269171
sjne-20240215-0,Jaccard/test,deeplabv3+,resnet50,True,32.0,512.0,5.0,0.001,8.0,50.0,0.0,50.0,0.398847
sjne-20240215-0,Jaccard/train,deeplabv3+,resnet50,True,32.0,512.0,5.0,0.001,8.0,50.0,0.0,50.0,0.525160
patchsize1024,Loss/test,unet,resnet50,True,8.0,1024.0,5.0,0.001,8.0,30.0,0.0,30.0,0.490615
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240215-130242,Loss/train,,,,,,,,,,,30.0,0.247669
20240215-132952,Jaccard/test,,,,,,,,,,,6.0,0.053661
20240215-132952,Jaccard/train,,,,,,,,,,,6.0,0.348194
20240215-132952,Loss/test,,,,,,,,,,,6.0,0.464661
