In [25]:
import pandas as pd
import numpy as np
from glob import glob
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from collections import defaultdict

recursivedict = lambda: defaultdict(recursivedict)


def to_regular_dict(d):
    if isinstance(d, defaultdict):
        d = {k: to_regular_dict(v) for k, v in d.items()}
    return d


def merge_dicts(dict1, dict2):
    merged_dict = dict1.copy()
    for key, value in dict2.items():
        if (
            key in merged_dict
            and isinstance(merged_dict[key], dict)
            and isinstance(value, dict)
        ):
            merged_dict[key] = merge_dicts(merged_dict[key], value)
        else:
            merged_dict[key] = value
    return merged_dict


def myround(n, i=0):
    return int(n * 10**i + 0.5) / 10**i


prefix = "../results"

In [3]:
pbar = tqdm(total=2*2*2*5*5*7*5)
for exp in ["ratio", "height"]:
    for mt in ['', '_more']:

        stats = recursivedict()
        dfs = []
        histories = []
        for model in ["VGG", 'ResNet']:
            for ctype in [1, 2, 3, 4, 5]:
                for level in [1, 2, 4, 8, 16]:
                    for method in ["IID", "COV", "ADV", "OOD", "IID_FEATURE", "ADV_FEATURE", "OOD_FEATURE"]:
                        run_cnt = 0
                        for run in range(5):
                            pbar.update(1)
                            if level == 1 and method != 'IID':
                                continue
                            if level == 16 and ctype == 5:
                                continue
                            if level == 16 and "_FEATURE" in method:
                                continue
                            # continue if file not exist
                            if not os.path.exists(f'{prefix}/{model}/cnn_{exp}/{method}/{level}/type{ctype}/{run}{mt}.p'):
                                continue
                            
                            with open(f'{prefix}/{model}/cnn_{exp}/{method}/{level}/type{ctype}/{run}{mt}.p', 'rb') as f:
                                stat = pickle.load(f)
                            
                            n = len(stat['y_pred'])
                            test_data = stat['test_data']
                            dft = pd.DataFrame({
                                'Model':[model] * n, 'Run':[run] * n, 'Type':[ctype] * n, 
                                'Method':[method] * n, 'Level':[level] * n, 
                                'h':np.min(test_data, axis=1), 'H':np.max(test_data, axis=1), 
                                'Pred':stat['y_pred'].flatten(),
                                'TrainCover': (max(stat['train_samples']) - min(stat['train_samples']) + 1) / 94, # 94 is the total number of samples
                            })
                            dfs.append(dft)

                            if 'loss' in stat.keys():
                                n = len(stat['loss'])
                                history = pd.DataFrame({
                                    'Model':[model] * n, 'Run':[run] * n, 'Type':[ctype] * n, 
                                    'Method':[method] * n, 'Level':[level] * n, 
                                    'Epoch': np.arange(n) + 1,
                                    'train_mse':stat['loss'], 'train_mae':stat['mae'],
                                    'val_mse':stat['val_loss'], 'val_mae':stat['val_mae'], 
                                })
                                histories.append(history)
                            
                            for k in [
                                'test_data', 'y_test', 'y_pred', 'MAE', 'MLAE', 'y_min', 'y_max', 
                                'loss', 'val_loss', 'mse', 'val_mse', 'mae', 'val_mae',
                            ]:
                                if k in stat.keys():
                                    del stat[k]
                            
                            stats[model][ctype][method][level][run] = stat
        if len(dfs) > 0:
            df = pd.concat(dfs, ignore_index=True)
            df_history = pd.concat(histories, ignore_index=True)
            for k in ['Model', 'Method', 'Level', 'Type', 'Level', 'Run']:
                df[k] = df[k].astype('category')
                df_history[k] = df_history[k].astype('category')
            for k in ['H', 'h']:
                df[k] = df[k].astype(int)
            df.to_feather(f'{prefix}/formated_data/cnn_{exp}{mt}.feather')
            df_history.to_feather(f'{prefix}/formated_data/cnn_{exp}{mt}.history.feather')
            stats = to_regular_dict(stats)
            with open(f'{prefix}/formated_data/cnn_{exp}{mt}.p', 'wb') as f:
                pickle.dump(stats, f)
pbar.close()

  0%|          | 0/12000 [00:00<?, ?it/s]