In [1]:
import os
import json
import pandas as pd
from datetime import datetime

# Directory where all runs are stored
wandb_dir = 'wandb/'

# Filter timestamp range (adjust as necessary)
start_time = datetime.strptime("20240911_163739", "%Y%m%d_%H%M%S")
end_time = datetime.strptime("20250913_030736", "%Y%m%d_%H%M%S")

# List to store data
data = []

# Loop through all subdirectories
for subdir in os.listdir(wandb_dir):
    subdir_path = os.path.join(wandb_dir, subdir)

    if os.path.isdir(subdir_path) and subdir.startswith('run-'):
        # Extract timestamp from the directory name
        timestamp_str = subdir.split('-')[1]
        timestamp = datetime.strptime(timestamp_str[:15], "%Y%m%d_%H%M%S")

        # Filter by the given timestamp range
        if start_time <= timestamp <= end_time:
            # Load the wandb-summary.json file
            summary_path = os.path.join(subdir_path, 'files', 'wandb-summary.json')
            metadata_path = os.path.join(subdir_path, 'files', 'wandb-metadata.json')

            try:
                with open(summary_path, 'r') as f_summary, open(metadata_path, 'r') as f_metadata:
                    summary_data = json.load(f_summary)
                    metadata_data = json.load(f_metadata)
                    
                    # Extract necessary data
                    run_id = subdir.split('-')[-1]  # Extract id from folder name
                    args = metadata_data.get('args', [])
                    
                    # Convert args to a dictionary format
                    args_dict = {args[i]: args[i+1] for i in range(0, len(args), 2)}

                    # Compile data
                    row = {
                        'timestamp': timestamp,
                        'id': run_id,
                        **summary_data,   # Include all keys from wandb-summary.json
                        **args_dict       # Include all args as columns
                    }
                    
                    # Append row to data list
                    data.append(row)

            except FileNotFoundError as e:
                print(f"Error loading files for {subdir}: {e}")

# Create DataFrame from data
df = pd.DataFrame(data)

# Display or save the table
# df.to_csv('wandb_runs_summary.csv', index=False)
df.head()


Unnamed: 0,timestamp,id,Noise Mean,Noise Std,Noise Skewness,Noise Kurtosis,Noise Range,Noise Entropy,Monobit Frequency Test,Block Frequency Test,...,_wandb,--project_wandb,--noise_type,--noise_std,--noise_mean,--noise_alpha,--noise_beta,--noise_lambda,--noise_min,--noise_max
0,2024-09-15 23:40:38,u9ovesdl,0.368981,0.037066,0.231794,0.011892,0.267568,-710.956648,1.0,,...,{'runtime': 1501},gan-noise-investigation-10,lognormal,0.1,-1.0,,,,,
1,2024-09-14 10:55:34,tbu5gtf8,0.011035,0.014249,4.747998,37.085775,0.201247,-1782.186695,1.0,,...,{'runtime': 3433},gan-noise-investigation-4,lognormal,1.0,-5.0,,,,,
2,2024-09-12 21:00:40,rqvypwf4,5.183114,10.068073,-0.024752,0.103477,70.254623,5.285577,1.0,,...,{'runtime': 130},,normal,10.0,5.0,,,,,
3,2024-09-14 00:53:20,fvlxthel,5.115795,5.027834,0.046577,-0.051536,37.914513,7.968917,1.0,,...,{'runtime': 236},gan-noise-investigation-6,normal,5.0,5.0,,,,,
4,2024-09-15 01:53:13,dade2kbb,3.049962,1.767428,1.153877,1.779179,12.999602,14.23963,1.0,,...,{'runtime': 314},gan-noise-investigation-4,gamma,,,3.0,1.0,,,


In [4]:
# Filter based on criteria for 'D(x)' and 'D(G(z))'
relevant_columns = ['--noise_type',	'--noise_std',	'--noise_mean',	'--noise_alpha',	'--noise_beta',	'--noise_min',	'--noise_max',	'--noise_lambda']
filtered_df_1 =df.loc[(df['D(x)'] <= 0.98) & (df['D(G(z))']>=0.02), relevant_columns].fillna('NA').value_counts().reset_index()

filtered_df_1

Unnamed: 0,--noise_type,--noise_std,--noise_mean,--noise_alpha,--noise_beta,--noise_min,--noise_max,--noise_lambda,count
0,normal,0.5,0.0,,,,,,10
1,normal,1.0,0.0,,,,,,10
2,normal,2.0,0.0,,,,,,10
3,normal,5.0,0.0,,,,,,10
4,normal,5.0,1.0,,,,,,10
5,uniform,,,,,-1.0,1.0,,10
6,normal,10.0,1.0,,,,,,9
7,normal,10.0,-1.0,,,,,,8
8,normal,5.0,-1.0,,,,,,8
9,lognormal,5.0,-5.0,,,,,,7


In [6]:
# ensures that trunning with this parameter set has converged N times
filtered_df = filtered_df_1[filtered_df_1['count']>=7]

# Prepare the output list
output_list = []

# Iterate through the filtered rows
for _, row in filtered_df.iterrows():
    noise_dict = {}
    
    # Check for different noise types and add corresponding parameters
    # Check for different noise types and add corresponding parameters
    if row.get('--noise_type') == 'uniform':
        noise_dict['noise_type'] = 'uniform'
        noise_dict['noise_max'] = float(row.get('--noise_max', 1.0))  # Default if not present
        noise_dict['noise_min'] = float(row.get('--noise_min', -1.0))  # Default if not present

    elif row.get('--noise_type') == 'normal':
        noise_dict['noise_type'] = 'normal'
        noise_dict['noise_mean'] = float(row.get('--noise_mean', 0.0))  # Default if not present
        noise_dict['noise_std'] = float(row.get('--noise_std', 1.0))  # Default if not present

    elif row.get('--noise_type') == 'lognormal':
        noise_dict['noise_type'] = 'lognormal'
        noise_dict['noise_mean'] = float(row.get('--noise_mean', 0.0))  # Default if not present
        noise_dict['noise_std'] = float(row.get('--noise_std', 1.0))  # Default if not present

    elif row.get('--noise_type') == 'exponential':
        noise_dict['noise_type'] = 'exponential'
        noise_dict['noise_lambda'] = float(row.get('--noise_lambda', 1.0))  # Default if not present

    elif row.get('--noise_type') == 'gamma':
        noise_dict['noise_type'] = 'gamma'
        noise_dict['noise_alpha'] = float(row.get('--noise_alpha', 1.0))  # Default if not present
        noise_dict['noise_beta'] = float(row.get('--noise_beta', 1.0))  # Default if not present

    elif row.get('--noise_type') == 'poisson':
        noise_dict['noise_type'] = 'poisson'
        noise_dict['noise_lambda'] = float(row.get('--noise_lambda', 1.0)) # Default if not present

    elif row.get('--noise_type') == 'random_binary':
        noise_dict['noise_type'] = 'random_binary'
        # No additional parameters for random_binary

    # Append the dictionary to the output list
    output_list.append(noise_dict)

print(len(output_list))
# Print or return the formatted list
output_list

10


[{'noise_type': 'normal', 'noise_mean': 0.0, 'noise_std': 0.5},
 {'noise_type': 'normal', 'noise_mean': 0.0, 'noise_std': 1.0},
 {'noise_type': 'normal', 'noise_mean': 0.0, 'noise_std': 2.0},
 {'noise_type': 'normal', 'noise_mean': 0.0, 'noise_std': 5.0},
 {'noise_type': 'normal', 'noise_mean': 1.0, 'noise_std': 5.0},
 {'noise_type': 'uniform', 'noise_max': 1.0, 'noise_min': -1.0},
 {'noise_type': 'normal', 'noise_mean': 1.0, 'noise_std': 10.0},
 {'noise_type': 'normal', 'noise_mean': -1.0, 'noise_std': 10.0},
 {'noise_type': 'normal', 'noise_mean': -1.0, 'noise_std': 5.0},
 {'noise_type': 'lognormal', 'noise_mean': -5.0, 'noise_std': 5.0}]