# Analysis: Visualizing Model Training <a class="tocSkip">

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-&amp;-Config" data-toc-modified-id="Imports-&amp;-Config-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports &amp; Config</a></span></li><li><span><a href="#Visualize-Training" data-toc-modified-id="Visualize-Training-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Visualize Training</a></span></li></ul></div>

## Imports \& Config

In [None]:
# Set editor width to something sane
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

%load_ext autoreload
%autoreload 2
%matplotlib inline

from pathlib import Path
import sys
# get root repository path
a = !pwd
repo_root = a[0].rsplit('ProGraML', maxsplit=1,)[0] + 'ProGraML'
print(repo_root)
#insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, repo_root)
repo_root = Path(repo_root)

import pandas as pd

import pickle, json
from matplotlib import pyplot as plt
import numpy as np
import tqdm
import torch
from torch_geometric.data import Data, DataLoader

## Visualize Training

In [None]:
# set log_dir
log_dir = repo_root / 'deeplearning' / 'ml4pl' / 'poj104' / 'classifyapp_logs'

In [None]:
!ls -a {log_dir}

In [None]:
def get_all_runs(log_dir, subfolders=False, exclude=['test_only']):
    assert subfolders == False, 'not implemented'
    logs = {}
    hyps = {}
    for file in log_dir.glob('*_log.json'):
        with open(file, 'r') as f:
            run_name = file.name.rsplit('_log.json')[0]
            # load hyps
            hyp_file = file.parent / (run_name + '_params.json')
            try:
                with open(hyp_file, 'r') as f:
                    hyp = json.load(f)
            except FileNotFoundError:
                hyp
            
            hyps[run_name] = hyp
            
            # skip weird files
            if run_name[:2] == '._':
                continue

            print(run_name)
            
            log = pd.read_json(f, orient='records')
            
            # handle 'test_only' epochs later!
            if log['epoch'].values[0] == 'test_only':
                continue
            
            # flatten dataframe
            valid = pd.DataFrame(log.valid_results.tolist(), columns=['valid_loss', 'valid_acc', 'valid_speed'])
            train = pd.DataFrame(log.train_results.tolist(), columns=['train_loss', 'train_acc', 'train_speed'])
            if hasattr(log, 'test_results'):
                test = pd.DataFrame(log.test_results.tolist(), columns=['test_loss', 'test_acc', 'test_speed'])
                df = pd.concat([log.epoch, log.time, train, valid, test], axis=1)
            else:
                df = pd.concat([log.epoch, log.time, train, valid], axis=1)

            logs[run_name] = df
    return logs, hyps

In [None]:
logs, hyps = get_all_runs(log_dir)

In [None]:
log = logs[list(logs)[2]]

In [None]:
plt.figure(figsize=(24, 12))
#color=iter(plt.cm.rainbow(np.linspace(0,1,len(logs))))
cycle_colors = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
for name, log in logs.items():
    c = next(cycle_colors) #next(color)
    plt.plot(log['epoch'], log['valid_acc'], label='valid' + name, ls='dashed', c=c)
    plt.plot(log['epoch'], log['train_acc'], label='train' + name, linestyle='dotted', c=c)
    if hasattr(log, 'test_acc'):
        plt.plot(log['epoch'], log['test_acc'], label='test' + name, c=c)
#plt.minorticks_on()
plt.yticks(np.arange(0, 1, step=0.03))
plt.grid(which='both', linestyle='-')
plt.legend(loc='best')

plt.show()