In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

parameters = {'axes.labelsize': 25,
              'axes.titlesize': 35,
              'xtick.labelsize': 20,
              'ytick.labelsize': 20,
              'legend.fontsize': 20,
              }
plt.rcParams.update(parameters)

DUNE_ND_LAR_RECO_DIR = "/gpfs/slac/staas/fs1/g/neutrino/jwolcott/app/dune-nd-lar-reco"
TRAIN_DIR = "/scratch/jwolcott/train"
VALID_DIR = "/scratch/jwolcott/valid"
PLOT_DIR = "/scratch/jwolcott/plots"

# if you have multiple training subdirectories in the same dir,
# like I often do, you can use this to pick out the one you want
#SAMPLE = "uresnet+ppn-380Kevs-25Kits-batch32"
#SAMPLE = "uresnet+ppn-380Kevs-50Kits-batch32"
#SAMPLE = "track+showergnn-380Kevs-15Kits-batch32"
#SAMPLE = "track+showergnn-380Kevs-15Kits-batch16-attempt2"
#SAMPLE = "track+intergnn-1400evs-1000Kits-batch8"
#SAMPLE = "tests"
SAMPLE = ""

In [None]:
target_dir = os.path.join(TRAIN_DIR, SAMPLE)
  
csvs=[os.path.join(target_dir,f) for f in os.listdir(target_dir) if f.endswith('.csv')]
dfs=[pd.read_csv(f) for f in csvs]
for idx in np.argsort([df.iter.min() for df in dfs]):
    df=dfs[idx]
    print(csvs[idx],df.iter.min(),'=>',df.iter.max())
df=pd.concat([dfs[idx] for idx in np.argsort([df.iter.min() for df in dfs])])
#print(sorted(df.keys()))
print("losses:", sorted(k for k in df.keys() if "loss" in k))

In [None]:
import pathlib
import re

valid_dir = os.path.join(VALID_DIR, SAMPLE, "log_inference")
print("validation file dir:", valid_dir)
dfs_valid = []
filepattern = re.compile('.*log-(\d+).*')
for f in pathlib.Path(valid_dir).glob("**/*.csv"):
    f = str(f)
    matches = filepattern.match(f)
    if not matches:
        continue
    
    dfs_valid.append(pd.read_csv(f))
    dfs_valid[-1]['iter'] = int(matches.group(1))

df_valid = None
if len(dfs_valid) > 0:
    df_valid = pd.concat([dfs_valid[idx] for idx in np.argsort([df.iter.min() for df in dfs_valid])])

In [None]:
plotdir=os.path.join(PLOT_DIR, SAMPLE)
if not os.path.isdir(plotdir):
    os.mkdir(plotdir)

loss_types = {
#      "ppn_loss": "PPN loss",
#      "seg_loss": "SS loss",
    "uresnet_loss": "SS loss",
    "loss_ppn1": "PPN1 loss",
    "loss_ppn2": "PPN2 loss",
#    "shower_edge_loss": "Shower GNN edge loss",
#    "shower_node_loss": "Shower GNN node loss",
#    "track_edge_loss": "Track GNN edge loss",
#    "inter_edge_loss": "Interaction GNN edge loss"
    "loss": "Total loss",
}

fig,ax=plt.subplots(figsize=(12,8),facecolor='w')
sdf=df

colors = {}
test = {}
for loss_name, loss_title in loss_types.items():
    print("considering loss:", loss_name)
    if loss_name in sdf:
        p = ax.plot(sdf.iter, sdf[loss_name], label=loss_title + " (train)", alpha=0.75)
        if loss_title not in colors:
            colors[loss_title] = p[-1].get_color()

    if df_valid and loss_name in df_valid:
        test[loss_title] = ax.plot(df_valid.iter, df_valid[loss_name], label=loss_title + " (test)", marker='o')[0]

# go back and set the "test" samples correctly
for title, color in colors.items():
    if title in test:
        test[title].set_color(color)

ax.set_yscale('log')
#ax.set_ylim(1e-10,1)
ax.set_xlabel("Iteration")
ax.set_ylabel("Loss")
ax.legend()
ax.grid(True)
plt.show()

for ext in ("pdf", "png"):
    fig.savefig(os.path.join(plotdir, "loss." + ext))

In [None]:
print("accuracies:", sorted(k for k in df.keys() if "accuracy" in k))

import sys
sys.path.append("/gpfs/slac/staas/fs1/g/neutrino/jwolcott/app/larcv2/python")
from larcv import larcv

classes = {}
for name in ['Michel','Track','Shower','LEScatter','Delta', 'Ghost', 'Unknown']:
    classes[getattr(larcv,'kShape%s' % name)] = name
print(classes)

accuracy_types = {
    "shower_node_accuracy":  "Shower node acc.",
    "shower_edge_accuracy":  "Shower edge acc.",
    "track_edge_accuracy":   "Track edge acc.",
    "accuracy":              "Total accuracy",
}
for class_value, class_name in classes.items():
    accuracy_types["accuracy_class_%d" % class_value] = "%s class acc." % class_name
    
fig,ax=plt.subplots(figsize=(12,8),facecolor='w')
# ax.plot(df["fraction_positives_ppn1"], label="positives PPN1")
# ax.plot(df["fraction_positives_ppn2"], label="positives PPN2")
for acc_name, acc_title in accuracy_types.items():
    if acc_name not in df:
        continue
    ax.plot(df[acc_name], label=acc_title)
ax.set_xlabel("Iteration")
# ax.set_ylabel("Fraction")
ax.legend()

In [None]:
import re

fig,ax=plt.subplots(figsize=(12,8),facecolor='w')
ax.plot(df["titer"])
ax.set_xlabel("Iteration")
ax.set_ylabel("Train time (s)")

plt.legend()