In [None]:
%matplotlib inline
#%pylab
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mpc
from matplotlib.ticker import FormatStrFormatter
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

import scipy
from scipy import interpolate
import pandas as pd
import itertools

import os
import commands

IMAGES_PER_BATCH=50.
IMAGES_PER_EPOCH=50000.

def round(val,digit):
    return int(val * np.power(10,digit)) / np.power(10.,digit)

In [None]:
files=[f for f in os.listdir('.') if f.endswith('.csv') and f.find('_iter_')>=0 and not f.startswith('score')]
files_m={}
for f in files:
    flavor=f.split('_iter_')[0]
    if not flavor in files_m:
        files_m[flavor]={}
    iteration = int(f.replace('.csv','').split('_')[-1])
    files_m[flavor][iteration]=f    

In [None]:
nevents=0
for flavor,files in files_m.iteritems():
    print flavor
    iterations = files.keys()
    iterations.sort()
    iterations = np.array(iterations).astype(np.int32)
    
    ydata_all = []
    ydata_shower = []
    ydata_track = []
    xdata = []

    fig,ax = plt.subplots(figsize=(12,8),facecolor='w')
    for iteration in iterations:
        df = pd.read_csv(files[iteration]).query('total_npx>0')
        if nevents >0:
            df = df.query('entry<%d' % nevents)
        if nevents >0 and df.index.size<nevents: continue
        subdf1 = df.query('label_shower_npx>0')
        subdf2 = df.query('label_track_npx>0')
        if subdf1.index.size<1 or subdf2.index.size<1 or df.query('total_npx>0').index.size<1:
            print 'Corrupted iteration:',iteration
            continue
        ydata_shower.append(1. - (subdf1.correct_shower_npx.values / subdf1.label_shower_npx.values).mean())
        ydata_track.append(1. - (subdf2.correct_track_npx.values / subdf2.label_track_npx.values).mean())
        ydata_all.append(1. - (df.correct_npx.values / df.total_npx.values).mean())
        xdata.append(iteration)
        print iteration,'---',
        print round(ydata_all[-1],4) * 100, '...',
        print round(ydata_shower[-1],4) * 100, '...', 
        print round(ydata_track[-1],4) * 100

    xdata = np.array(xdata) * IMAGES_PER_BATCH / (IMAGES_PER_EPOCH)
        
    plt.plot(xdata, ydata_all, marker='o',color='black',label='Combined')
    plt.plot(xdata, ydata_shower, marker='o',color='blue',label='Shower')
    plt.plot(xdata, ydata_track, marker='o', color='red',label='Track')

    ax.get_yaxis().get_major_formatter().set_useOffset(False)
    #ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    plt.tick_params(labelsize=20)
    #plt.xlim(0,31)
    plt.ylim(0,0.4)
    plt.grid()
    plt.xlabel('Epoch',fontsize=20,fontweight='bold',fontname='Georgia')
    plt.ylabel('Accuracy',fontsize=20,fontweight='bold',fontname='Georgia')
    leg=plt.legend(fontsize=16,loc=4)
    leg_frame=leg.get_frame()
    leg_frame.set_facecolor('white')
    plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(12,8),facecolor='w')
label_m = {'segmentation': 'class-wise loss',
           'pretrain_segmentation': 'class-wise loss + pre-training',
           'pretrain_segmentation_spweights': 'spatial weight map + pre-training',
           'segmentation_spweights': 'spatial weight map'}


for flavor,files in files_m.iteritems():
    print flavor
    iterations = files.keys()
    iterations.sort()
    iterations = np.array(iterations).astype(np.int32)
    
    xdata = []
    ydata = []

    for iteration in iterations:
        df = pd.read_csv(files[iteration]).query('total_npx>0')
        ydata.append(1. - (df.correct_npx.values / df.total_npx.values).mean())
        xdata.append(iteration)

    xdata = np.array(xdata) * IMAGES_PER_BATCH / IMAGES_PER_EPOCH

    plt.plot(xdata, ydata, marker='o',label=label_m[flavor])

ax.get_yaxis().get_major_formatter().set_useOffset(False)
#ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.tick_params(labelsize=20)
#plt.xlim(0,31)
plt.ylim(0.0,0.2)
plt.grid()
plt.xlabel('Epoch',fontsize=20,fontweight='bold',fontname='Georgia')
plt.ylabel('Error',fontsize=20,fontweight='bold',fontname='Georgia')
leg=plt.legend(fontsize=16,loc=1)
leg_frame=leg.get_frame()
leg_frame.set_facecolor('white')
plt.show()

In [None]:
#name = 'segmentation'
#name = 'pretrain_segmentation'
#name = 'segmentation_spweights'
name  = 'pretrain_segmentation_spweights'
iteration = 6000

iters = files_m[name].keys()
iters.sort()
print iters
df = pd.read_csv(files_m[name][iteration]).query('total_npx>0')
df1 = df.query('label_shower_npx>0')
df2 = df.query('label_track_npx>0')

totdata, _ = np.histogram(df.correct_npx.values / df.total_npx.values,
                          weights=([1./df.index.size] * df.index.size),
                          bins=40, 
                          range=(0.,1.))

print len(totdata)

fig,ax = plt.subplots(figsize=(12,8),facecolor='w')

plt.hist(df1.correct_shower_npx.values / df1.label_shower_npx.values, 
         weights=[1./df1.index.size] * df1.index.size, 
         bins=40, range=(0.,1.), color='blue', alpha=0.5, label='Shower')
plt.hist(df2.correct_track_npx.values / df2.label_track_npx.values, 
         weights=[1./df2.index.size] * df2.index.size, 
         bins=40, range=(0.,1.), color='red',  alpha=0.5, label='Track')

plt.plot(np.arange(0,1.,0.025) + 0.0125, totdata, color='black', marker='o', label='All')

ax.get_yaxis().get_major_formatter().set_useOffset(False)
#ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.tick_params(labelsize=20)
#plt.xlim(0,31)
#plt.ylim(0,0.71)
plt.grid()
plt.xlabel('Correct Fraction',fontsize=20,fontweight='bold',fontname='Georgia')
plt.ylabel('Event Fraction',fontsize=20,fontweight='bold',fontname='Georgia')
leg=plt.legend(fontsize=20,loc=2)
leg_frame=leg.get_frame()
leg_frame.set_facecolor('white')

plt.show()

if True:
    fig,ax = plt.subplots(figsize=(12,8),facecolor='w')

    #print df.total_npx.values[0] / (512*512.)
    
    plt.hist(df.total_npx.values / (512*512.) * 100.,
             weights=[1./df.index.size] * df.index.size, 
             bins=30, range=(0.,3), color='blue')
    ax.get_yaxis().get_major_formatter().set_useOffset(False)
    #ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    plt.tick_params(labelsize=20)
    #plt.xlim(0,31)
    #plt.ylim(0,0.8)
    plt.grid()
    plt.xlabel('Occupied Pixel Fraction [%]',fontsize=20,fontweight='bold',fontname='Georgia')
    plt.ylabel('Event Fraction',fontsize=20,fontweight='bold',fontname='Georgia')
    #leg=plt.legend(fontsize=20,loc=2)
    #leg_frame=leg.get_frame()
    #leg_frame.set_facecolor('white')
    plt.show()



In [None]:
df=pd.read_csv('score_pretrain_segmentation_spweights_iter_26000.csv')
shower_df=df.query('type==1')
track_df=df.query('type==2')
fig,ax=plt.subplots(figsize=(12,8),facecolor='w')
shower_data,_ = np.histogram(shower_df.score_shower.values,weights=[1./shower_df.index.size]*shower_df.index.size,bins=50,range=(0.,1.))
track_data,_  = np.histogram(track_df.score_shower.values,weights=[1./track_df.index.size]*track_df.index.size,bins=50,range=(0.,1.))
#plt.hist(shower_df.score_shower.values,weights=[1./shower_df.index.size]*shower_df.index.size,bins=50,range=(0.,1.),alpha=0.5)
#plt.hist(track_df.score_shower.values,weights=[1./track_df.index.size]*track_df.index.size,bins=50,range=(0.,1.),alpha=0.5)
plt.plot(np.arange(0,1,0.02) + 0.01,shower_data,marker='o',label='Shower Pixels',color='blue')
plt.plot(np.arange(0,1,0.02) + 0.01,track_data,marker='o',label='Track Pixels',color='red')

ax.get_yaxis().get_major_formatter().set_useOffset(False)
#ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.tick_params(labelsize=20)
plt.xlabel('Shower Score',fontsize=20,fontweight='bold',fontname='Georgia')
plt.ylabel('Fraction of Pixels',fontsize=20,fontweight='bold',fontname='Georgia')
leg=plt.legend(fontsize=24,loc=9)
leg_frame=leg.get_frame()
leg_frame.set_facecolor('white')

plt.gca().set_yscale('log')
plt.grid(True,which='both')
plt.show()

In [None]:
df=pd.read_csv('score_pretrain_segmentation_spweights_iter_26000.csv')
shower_df=df.query('type==1')
track_df=df.query('type==2')

shower_data,_ = np.histogram(shower_df.score_shower.values,weights=[1./shower_df.index.size]*shower_df.index.size,bins=50,range=(0.,1.))
track_data,_  = np.histogram(track_df.score_shower.values,weights=[1./track_df.index.size]*track_df.index.size,bins=50,range=(0.,1.))

print shower_data.sum()

shower_data_sum=[]
track_data_sum=[]
for x in xrange(len(shower_data)):
    shower_data_sum.append(shower_data[0:x+1].sum())
    track_data_sum.append(track_data[x:-1].sum())

fig,ax=plt.subplots(figsize=(12,8),facecolor='w')
#plt.hist(shower_df.score_shower.values,weights=[1./shower_df.index.size]*shower_df.index.size,bins=50,range=(0.,1.),alpha=0.5)
#plt.hist(track_df.score_shower.values,weights=[1./track_df.index.size]*track_df.index.size,bins=50,range=(0.,1.),alpha=0.5)
plt.plot(np.arange(0,1,0.02) + 0.01,shower_data_sum,marker='o',label='Shower Pixels',color='blue')
plt.plot(np.arange(0,1,0.02) + 0.01,track_data_sum,marker='o',label='Track Pixels',color='red')

ax.get_yaxis().get_major_formatter().set_useOffset(False)
#ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.tick_params(labelsize=20)
plt.xlabel('Shower Score',fontsize=20,fontweight='bold',fontname='Georgia')
plt.ylabel('Fraction of Pixels',fontsize=20,fontweight='bold',fontname='Georgia')
leg=plt.legend(fontsize=24,loc=9)
leg_frame=leg.get_frame()
leg_frame.set_facecolor('white')
plt.ylim(0.005,1.2)

plt.gca().set_yscale('log')
plt.grid(True,which='both')
plt.show()