# Results

This notebook gathers results from evaluation JSON files and prints them as a list. 

### Setup

- Set the root folder environment variable with `export CLIPORT_ROOT=<cliport_root>`
- Train and evaluate agents by following the [README guide](https://github.com/cliport/cliport#single-task-training--evaluation).

In [1]:
import os
import sys
import json

from cliport import agents
from cliport import tasks

2023-10-12 16:40:49.452175: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /apps/software/standard/compiler/gcc/9.2.0/nodejs/12.14.1/lib:/apps/software/standard/core/gcc/9.2.0/lib/gcc/x86_64-pc-linux-gnu/9.2.0:/apps/software/standard/core/gcc/9.2.0/lib64:/apps/software/standard/core/gcc/9.2.0/lib:/opt/slurm/current/lib:/share/rci_apps/common/lib64
2023-10-12 16:40:49.452234: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from .autonotebook import tqdm as notebook_tqdm


### Settings

In [2]:
import os
os.environ['CLIPORT_ROOT'] = '/sfs/weka/scratch/ys5hd/cliport'

root_folder = os.environ['CLIPORT_ROOT']
exp_folder = os.path.join(root_folder, 'cliport_quickstart') # replace 'cliport_quickstart' with your exps folder

### Gather JSON Results

In [3]:
tasks_list = list(tasks.names.keys())
agents_list = list(agents.names.keys())
demos_list = [1, 10, 100, 1000]

results = {}
for t in tasks_list:
    for a in agents_list:
        for d in demos_list:
            task_folder = f'{t}-{a}-n{d}-train'
            task_folder_path = os.path.join(exp_folder, task_folder, 'checkpoints')

            if os.path.exists(task_folder_path):
                jsons = [f for f in os.listdir(task_folder_path) if '.json' in f]
                for j in jsons:
                    model_type = 'multi' if 'multi' in j else 'single'
                    eval_type = 'val' if 'val' in j else 'test'
                    
                    with open(os.path.join(task_folder_path, j)) as f:
                        res = json.load(f)
                    
                    results[f'{t}-{a}-n{d}-{model_type}-{eval_type}'] = res

In [10]:
results.keys()

dict_keys(['stack-block-pyramid-seq-seen-colors-cliport-n1000-multi-test', 'stack-block-pyramid-seq-seen-colors-cliport-n1000-multi-val'])

In [34]:
results['stack-block-pyramid-seq-seen-colors-cliport-n1000-multi-test']['steps=400000-val_loss=0.00014655.ckpt']['episodes'][10]

[0.9999999999999999,
 {'5': [[0.5843750000000001, 0.125, 0.0025],
   [0.0, 0.0, 0.9830140325787236, 0.18353041097680883],
   [0.05, 0.05, 0.005]],
  '6': [[0.6020428591072722, 0.17428179138594824, 0.02499001815950449],
   [-9.580712303505592e-07,
    7.362469967571153e-07,
    -0.19084821078731384,
    0.9816195599303332],
   [0.04, 0.04, 0.04]],
  '7': [[0.5875747033351035, 0.12578922976810689, 0.024989973787982976],
   [2.2069486014429927e-06,
    -2.0821136550253833e-07,
    0.5461998675216455,
    0.8376548840151431],
   [0.04, 0.04, 0.04]],
  '8': [[0.56939774839501, 0.07805020953598153, 0.024989760536879298],
   [3.2709270734519392e-06,
    -4.512937264569032e-06,
    0.5405700888255509,
    0.8412989831421827],
   [0.04, 0.04, 0.04]],
  '9': [[0.5944270013957793, 0.1440924389947819, 0.06497878183527833],
   [2.6694128470635445e-05,
    -5.631892994313575e-06,
    0.5654753612891443,
    0.8247651878144573],
   [0.04, 0.04, 0.04]],
  '10': [[0.5811886040666713, 0.1000471496745445

### Print Results

In [16]:
print(f'Experiments folder: {exp_folder}\n')

for eval_type in ['val', 'test']:
    print(f'----- {eval_type.upper()} -----\n')
    for t in tasks_list:
        for a in agents_list:
            for d in demos_list:
                for model_type in ['single', 'multi']:
                    eval_key = f'{t}-{a}-n{d}-{model_type}-{eval_type}'
                    
                    if eval_key in results:    
                        print(f'{t} | Train Demos: {d}')
                        
                        res = results[eval_key]
                        best_score, best_ckpt = max(zip([v['mean_reward'] for v in list(res.values())], 
                                                        res.keys())) # TODO: test that this works for full results folder
                        
                        print(f'\t{best_score*100:1.1f} : {a} | {model_type}\n')                        

Experiments folder: /sfs/weka/scratch/ys5hd/cliport/cliport_quickstart

----- VAL -----

stack-block-pyramid-seq-seen-colors | Train Demos: 1000
	97.3 : cliport | multi

----- TEST -----

stack-block-pyramid-seq-seen-colors | Train Demos: 1000
	96.5 : cliport | multi



In [18]:
results[eval_key]

KeyError: 'towers-of-hanoi-seq-full-two_stream_clip_film_lingunet_lat_transporter-n1000-multi-test'