In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import analyze # takes some time since inits hax
from channel_dict import channel_dict
from tqdm import tqdm
import datetime
import pickle
from scipy.stats import norm
import shutil

%matplotlib inline

# Job Submission

In [None]:
from make_runlist_new import write_spe_lists

#dry run
write_spe_lists(write=False)

In [None]:
#writes new runlists
written=write_spe_lists(write=True)
print(written)

### This will take some time to run as job submission takes several hours. Alternatively, in the terminal you can run ./submit_jobs.sh ./runlists/[runlist name] for a single runlist, or ./large_submission.sh to submit jobs for all runlists in ./runlists

In [None]:
def submit_job(file):
    command = "./submit_jobs.sh %s" % file
    print(command)
    !{command}

In [None]:
for f in written:
    submit_job(f)

# Acc Vs Time

### Calculating acceptance and error data

In [None]:
import hax
#hax already initiated when analyze is imported

from spe_acceptance import data_dir_base, rawdata_dir


def data_exists(run_number):
    data_path=os.path.join(data_dir_base, 'run_%05d.h5' %int(run_number))
    return os.path.exists(data_path)

def all_data_exists(runlist):
    return all([data_exists(run) for run in runlist])

def file_to_list(runlist_file):
    return [int(run) for run in runlist_file.split('.')[0].split('_')[1:4]]

def get_run_time(run):
    return hax.runs.datasets[hax.runs.datasets.number == run].start.values[0]
    
def find_file(run):
    return [f for f in os.listdir('runlists') if any(int(run)==elem for elem in file_to_list(f))]

#### Retrieves previously loaded data from pickle file

In [None]:
#all runlists
runlists = [f for f in os.listdir('./runlists')]
bottom_runs = []
accs = []

ch_acc_dict={}
ch_err_l={}
ch_err_u={}

upper_errs=[]
lower_errs=[]
errors = []

missing_runs = []
LED_off = []
resubmit_files = []

with open('/project/lgrandi/xenon1t/spe_acceptance/ch_data/ch_data.pkl', 'rb') as cd:
    if os.stat('/project/lgrandi/xenon1t/spe_acceptance/ch_data/ch_data.pkl').st_size==0:
        print('No data in pickle file')
        data=[]
    else:
        data=pickle.load(cd)
saved_runlists=[]

for runlist in runlists:
    runs=file_to_list(runlist)
    if not all_data_exists(runs):
        resubmit_files.append(runlist)
        for r in runs:
            if not data_exists(r):
                missing_runs.append(r)
        continue
    else:
        bottom_runs.append(runs[0])

for cd in data:
    if len(data)==0:
        continue
        
    saved_runlists.append(cd.runlist)
    
    acc=cd.acc
    on_acc=cd.on_acc
    acc_errs_l=cd.acc_errs_l
    acc_errs_u=cd.acc_errs_u
    acc_sys=cd.acc_sys
    acc_stat=cd.acc_stat
    occ=cd.occ
    on_occ=cd.on_occ
    occ_sys=cd.occ_sys
    occ_stat=cd.occ_stat
    on_channels = np.where(occ > 0.05)[0]
        
    if len(on_channels) < 200:
        LED_off.append(cd.runlist)
    
    ch_acc_dict[cd.runlist]=acc
    
    #make dicts of runlist with errors for each channel
    ch_err_l[cd.runlist]=acc_errs_l
    ch_err_u[cd.runlist]=acc_errs_u
    
    runlist = file_to_list(cd.runlist)
    
    bottom_run = runlist[0]
    topbulk_run = runlist[1]
    topring_run = runlist[2]
    
    accs.append(np.mean(on_acc))
    #mean to get average per runlist
    lower_errs.append(np.mean(acc_errs_l**2))
    upper_errs.append(np.mean(acc_errs_u**2))
        
print("Data already exists for: ", saved_runlists)

In [None]:
thresholds = analyze.get_thresholds(analyze.find_regular_run(10064))

accs, acc_errs, sys_errs, stat_errs, t=analyze.acceptance_fraction(10064, thresholds[:248])
print(t)
print(*stat_errs[1, 200, 100:130])
print(stat_errs[1, 200, t])

In [None]:
def MC_errors(run_number,thresholds):
        path = os.path.join(data_dir_base, 'run_%05d.h5' % run_number)
        if not os.path.exists(path):
            print("Acceptance data does not exist for run %d" % run_number)
        s=analyze.SPE(path)
        #initialize error arrays
        bin0 = np.where(s.data['bin_centers'] == 0.5)[0][0]
        thresholds=np.array(thresholds[:248])
        t=thresholds+bin0
        sigma_l=np.zeros((248, len(s.data['bin_centers'])))
        sigma_u=np.zeros((248, len(s.data['bin_centers'])))
        ch_index = np.arange(248)
        res, sigma_res=s.residual(6, 'amplitude')
        #loop over channels
        for ch in ch_index:
            #make MC acc curves
            acc_curves=s.acc_MC(res[ch], sigma_res[ch], 1000)
            
            acc_curves_t=acc_curves[ch, t[ch]]
            #stats errors from MC
            sigma_l[ch,:]=np.percentile(acc_curves[ch],16, axis=0)-np.mean(acc_curves[ch], axis=0)
            sigma_u[ch,:]=np.percentile(acc_curves[ch], 84, axis=0)-np.mean(acc_curves[ch], axis=0)
        print(*sigma_u[100,:])
        stat_errs=np.array([sigma_l, sigma_u])
       
        return stat_errs

In [None]:
stat_errs=MC_errors(10064, thresholds)
#print(*stat_errs[1, 200, 100:130])
stat_errs[1, 100, t]

#### Calculates acceptance, errors for new runlist

In [None]:
#just newest runlist

newest_runlist= find_file(sorted(bottom_runs)[-1])[0]
print("Newest Runlist: ", newest_runlist)
runlist = file_to_list(newest_runlist)

if str(newest_runlist) in saved_runlists: #not in saved_runlists:
    if not all_data_exists(runlist):
        resubmit_files.append(newest_runlist)
        for r in runlist:
            if not data_exists(r):
                missing_runs.append(r)
            continue


    bottom_run = runlist[0]
    topbulk_run = runlist[1]
    topring_run = runlist[2]

    thresholds = analyze.get_thresholds(analyze.find_regular_run(bottom_run))

    acc, acc_errs, acc_sys, acc_stat = analyze.acceptance_3runs(bottom_run, topbulk_run, topring_run, thresholds)
    occ, occ_sys, occ_stat = analyze.occupancy_3runs(bottom_run, topbulk_run, topring_run)
    print(acc_errs)

    on_channels = np.where(occ > 0.05)[0]

    if len(on_channels) < 200:
        LED_off.append(newest_runlist)

    on_acc = acc[on_channels]
    on_occ = occ[on_channels]

    ch_acc_dict[newest_runlist]=acc

    acc_errs_l = acc_errs[0]
    acc_errs_u = acc_errs[1]

    ch_err_l[newest_runlist]=acc_errs_l
    ch_err_u[newest_runlist]=acc_errs_u
    print(acc_errs_l)

    accs.append(np.mean(on_acc))
    lower_errs.append(np.mean(acc_errs_l**2))
    upper_errs.append(np.mean(acc_errs_u**2))
    bottom_runs.append(bottom_run)

    #cd=analyze.ch_data(newest_runlist, get_run_time(bottom_run), acc, on_acc, acc_errs_l, acc_errs_u, acc_sys, acc_stat, occ, on_occ, occ_sys, occ_stat)
    #data.append(cd)

else:
    print("Data already in pickle file")

errors=np.array([lower_errs, upper_errs])
print(lower_errs[-1])
ch_err_l[newest_runlist]

In [None]:
print(accs[-1])
channels=np.arange(248)
plt.figure(figsize=(12,8))
plt.errorbar(channels, acc_df.loc[:, newest_runlist], yerr=[acc_errs_l, acc_errs_u], linestyle='None', marker='.' )
#plt.ylim(0.7,1.1)

In [None]:
#just newest runlist

newest_runlist= find_file(sorted(bottom_runs)[-1])[0]
print("Newest Runlist: ", newest_runlist)
#runlist = file_to_list(newest_runlist)

#if str(newest_runlist) not in saved_runlists:
for runlist in tqdm(runlists):
    runs=file_to_list(runlist)
    if not all_data_exists(runs):
        resubmit_files.append(runlist)
        for r in runs:
            if not data_exists(r):
                missing_runs.append(r)
        continue


    bottom_run = runs[0]
    topbulk_run = runs[1]
    topring_run = runs[2]

    thresholds = analyze.get_thresholds(analyze.find_regular_run(bottom_run))

    acc, acc_errs, acc_sys, acc_stat = analyze.acceptance_3runs(bottom_run, topbulk_run, topring_run, thresholds)
    occ, occ_sys, occ_stat = analyze.occupancy_3runs(bottom_run, topbulk_run, topring_run)

    on_channels = np.where(occ > 0.05)[0]

    if len(on_channels) < 200:
        LED_off.append(runlist)

    on_acc = acc[on_channels]
    on_occ = occ[on_channels]

    ch_acc_dict[runlist]=acc

    acc_errs_l = acc_errs[0]
    acc_errs_u = acc_errs[1]

    ch_err_l[runlist]=np.mean(acc_errs_l**2, axis=1)
    ch_err_u[runlist]=np.mean(acc_errs_u**2, axis=1)

    accs.append(np.mean(on_acc))
    lower_errs.append(np.mean(acc_errs_l**2))
    upper_errs.append(np.mean(acc_errs_u**2))
    bottom_runs.append(bottom_run)

    cd=analyze.ch_data(runlist, get_run_time(bottom_run), acc, on_acc, acc_errs_l, acc_errs_u, acc_sys, acc_stat, occ, on_occ, occ_sys, occ_stat)
    data.append(cd)

#else:
 #   print("Data already in pickle file")

errors=np.array([lower_errs, upper_errs])

#### Dumps all data into pickle file, prints runs with LED off and runs with missing data, deletes raw data for processed runs

In [None]:
from spe_acceptance import change_permissions

os.remove('/project/lgrandi/xenon1t/spe_acceptance/ch_data/ch_data.pkl')
with open('/project/lgrandi/xenon1t/spe_acceptance/ch_data/ch_data.pkl', 'wb') as cd:
    pickle.dump(data, cd)
change_permissions('/project/lgrandi/xenon1t/spe_acceptance/ch_data/ch_data.pkl')

In [None]:
#Delete raw data after processed data is in the pickle file
from get_name import get_name
no_data=[]
for runlist in runlists:
    with open(os.path.join('./runlists', runlist)) as r:
        for run in r.readlines():
            name=get_name(int(run))
            path=os.path.join(rawdata_dir, name)
            if not os.path.exists(path):
                no_data.append(run)
                continue
            if os.path.exists(path):
                if os.path.exists(os.path.join(data_dir_base, run_%05d.h5 %run)):
                    shutil.rmtree(path)
                    print("Deleting raw data for: ", run, name)
    
print("These runs are missing data: ", missing_runs)
print("LED likely off for these files: ", LED_off)

#### Resubmits jobs for runs missing data

In [None]:
for f in resubmit_files:
    runs=file_to_list(f)
    submit=os.path.join('./runlists', f)
    if runs[0]<6731:
        continue
    submit_job(submit)

#### Acceptance dataframe with channels as row index, runlists as column index

In [None]:
acc_df=pd.DataFrame(ch_acc_dict)
channels_df=pd.DataFrame({"channels": [i for i in range(0,248)]})
ch_df=pd.concat([acc_df, channels_df], axis=1)
ch_df.set_index('channels', inplace=True)
ch_df

#### Lower, upper errors dataframes with channels as row index, runlist as column index

In [None]:
lerr_df=pd.DataFrame(ch_err_l)
ch_lerr_df=pd.concat([lerr_df, channels_df], axis=1)
ch_lerr_df.set_index('channels', inplace=True)
ch_lerr_df

In [None]:
uerr_df=pd.DataFrame(ch_err_u)
ch_uerr_df=pd.concat([uerr_df, channels_df], axis=1)
ch_uerr_df.set_index('channels', inplace=True)
ch_uerr_df

## Newest Runlist

#### All channels for newest runlist

In [None]:
acc_df.loc[:, newest_runlist]

#### Channels that look off, note that this includes both channels that are actually off and channels that aren't performing properly

In [None]:
#prints list of off pmts
my_off = np.where(occ < 0.05)[0]
print("Channels that look off for %s: " %newest_runlist , my_off)

#### Acceptance histogram for one runlist

In [None]:
#plot the acceptance of one runlist
plt.hist(on_acc, bins=50, range=(0,1.1))
plt.title("Acceptance for %s " %newest_runlist)
plt.xlabel("SPE Acceptance")
plt.ylabel("Counts")
plt.xlim(0, 1.1)
plt.show()

#### Occupancy histogram for one runlist

In [None]:
#plot the occupancy of one runlist
plt.hist(on_occ, bins=50, range=(0,1.1))
plt.title("Occupancy for %s " %newest_runlist)
plt.xlabel("Occupancy")
plt.ylabel("Counts")
plt.xlim(0,0.25)
plt.show()

#### Acceptance of all channels for the newest runlist

In [None]:
#plot acceptance of each channel
plt.figure()
plt.figure(figsize=(12,8))
plt.errorbar(on_channels, on_acc, yerr=[ch_lerr_df.loc[on_channels, newest_runlist], ch_uerr_df.loc[on_channels,newest_runlist]], marker='.', linestyle='none')
plt.ylabel('Acceptance Fraction')
plt.xlabel('Channel #')
plt.title('Acc by Ch for %s' %newest_runlist)
plt.show()

#### PMT Plot for Newest Runlist

In [None]:
analyze.plot_acceptances(acc_df.loc[:,newest_runlist])

#### Prints low acceptance channels

In [None]:
#prints list of low acc ch
cha_dict_new={}

for ch, a in zip(on_channels, on_acc):
    cha_dict_new[a]=ch
    
new_low_acc_ch=[]
for a in on_acc:
    if a<0.5:
        new_low_acc_ch.append(cha_dict_new[a])
        
print('Low acc ch for %s: ' %newest_runlist, new_low_acc_ch)

#### Mean, Median, Errors for runlist

In [None]:
#maybe including off channels in mean
acc_mean=np.nanmean(ch_acc_dict[newest_runlist][on_channels])
acc_median=np.nanmedian(ch_acc_dict[newest_runlist])

print("Runlist: ", newest_runlist)
print("Mean Acceptance: ", acc_mean)
print("Median Acceptance: ", acc_median)
print("Errors [l,u]: ", [np.nanmean(ch_err_l[newest_runlist]), np.nanmean(ch_err_u[newest_runlist])])

## Average Acceptance per Runlist

In [None]:
#plot acceptance vs time using full runlists

import matplotlib.dates as mdates
years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
fmt = mdates.DateFormatter('%Y-%m')

bottom_runs = np.array(bottom_runs)
dates = np.array([get_run_time(run) for run in bottom_runs])
accs = np.array(accs)

noisy_runs = [12768, 13837]
sr1 = np.where(bottom_runs > 6731)
sr1 = [i for i in sr1[0] if bottom_runs[i] not in noisy_runs]

plot_dates = dates[sr1]
plot_runs = bottom_runs[sr1]
plot_accs = accs[sr1]
plot_errors = [errors[0][sr1],errors[1][sr1]]

f, ax = plt.subplots(figsize=(12,8))
ax.errorbar(plot_dates, plot_accs, yerr=plot_errors, linestyle='None', marker='.')
ax.set_ylim(0.75, 1.0)
plt.xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=20)
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(fmt)
ax.grid()
ax.set_xlabel('date')
ax.set_ylabel('spe acceptance')
plt.title("Acceptance vs Time")
plt.savefig('new_moneyplot.png')
plt.show()

#### Mean, Median, Errors for all runlists

In [None]:
#add table of mean/median w errors for time evo, print days that are outliers

evo_mean=np.nanmean(plot_accs)
evo_median=np.nanmedian(plot_accs)
evo_errs=[np.nanmean(plot_errors[0]), np.nanmean(plot_errors[1])]

evo_std=np.nanstd(plot_accs)

print("All Runlists")
print("Mean :", evo_mean)
print("Median :", evo_median)
print("Errors [l, u]: ", evo_errs)

## Acceptance for Individual Channels

### One Channel

#### Plots acceptances of one channel for all runlists

In [None]:
ch=19

#list of lists of runlists
sr1_rl=[find_file(i) for i in bottom_runs[sr1]]

#list of runlists
sr1_rl=[runlist for elem in sr1_rl for runlist in elem]

plt.figure(figsize=(12,8))
plt.errorbar(plot_dates, ch_df.loc[ch, sr1_rl] , yerr=[ch_lerr_df.loc[ch, sr1_rl], ch_uerr_df.loc[ch, sr1_rl]], linestyle='None', marker='.')
plt.ylim(0.75, 1.1)
plt.xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=20)
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(fmt)
plt.grid()
plt.title("Channel %s" %ch)
plt.xlabel('date')
plt.ylabel('spe acceptance')
plt.show()

In [None]:
for runlist in sr1_rl:
    if ch_uerr_df.loc[ch, runlist]>0.05:
        print(runlist, ch_uerr_df.loc[ch,runlist])

#### Prints runlists with low acceptance for one channel

In [None]:
#prints list of low acc runlists

ch_accs=np.array(ch_df.loc[ch])

low_acc=np.where(ch_accs<0.5)

low_acc_rl=list(ch_df.columns[low_acc])
        
print('Low acc runlists for ch %s: ' %ch, low_acc_rl)

# Further Analysis

### Find outliers, plot the acceptance curve, acceptance histogram, and occupancy histogram

#### Change the index in run=outliers[index] in order to change which outlier gets plotted

In [None]:
#Find outliers, plot acc of one of them

outlier_indices_low = np.array(np.where(accs < (evo_mean-1.5*evo_std))).flatten()
outlier_indices_high= np.array(np.where(accs > (evo_mean+1.5*evo_std))).flatten()


outlier_indices=outlier_indices_low.tolist() + outlier_indices_high.tolist()

outliers = bottom_runs[outlier_indices]
print("Outliers :", outliers)
run = outliers[0] #change index to change which outlier
print('Run: ',run)
runlist = file_to_list(find_file(run)[0])

bot_run=runlist[0]
tb_run=runlist[1]
tr_run=runlist[2]

x, acc, err = analyze.acceptance_curve_3runs(bot_run, tb_run, tr_run)

plt.figure(figsize=(9,6))
for ch, a in enumerate(acc):
    plt.plot(x, a, color='black', linewidth=1, alpha=0.6)
    plt.title("Acceptance curve for: %d" %run)
    
plt.xlim(-10, 200)
plt.xlabel('threshold')
plt.ylabel('spe acceptance')
plt.show()

In [None]:
#doesn't save the data
thresholds = analyze.get_thresholds(analyze.find_regular_run(bot_run))
acc, acc_errs, acc_sys, acc_stat = analyze.acceptance_3runs(bot_run, tb_run, tr_run, thresholds)
occ, occ_sys, occ_stat = analyze.occupancy_3runs(bot_run, tb_run, tr_run)

on_channels = np.where(occ > 0.05)[0]

acc=acc[on_channels]
occ=occ[on_channels]

In [None]:
#plot the occupancy of one runlist
plt.hist(occ, bins=50, range=(0,1.1))
plt.title("Occupancy for %s " %find_file(run)[0])
plt.xlabel("Occupancy")
plt.ylabel("Counts")
plt.xlim(0,0.25)
plt.show()

In [None]:
#plot the acceptance of one runlist
plt.hist(acc, bins=50, range=(0,1.1))
plt.title("Acceptance for %s " %find_file(run)[0])
plt.xlabel("SPE Acceptance")
plt.ylabel("Counts")
plt.xlim(0, 1.1)
plt.show()

In [None]:
runlist= find_file(run)[0]

#plot acceptance of each channel
plt.figure()
plt.figure(figsize=(12,8))
plt.errorbar(on_channels, acc, yerr=[ch_lerr_df.loc[on_channels, runlist], ch_uerr_df.loc[on_channels,runlist]], marker='.', linestyle='none')
plt.ylabel('Acceptance Fraction')
plt.xlabel('Channel #')
plt.title('Acc by Ch for %s' %runlist)
plt.show()

In [None]:
cha_dict={}

for ch, a in zip(on_channels, acc):
    cha_dict[a]=ch
    
low_acc_ch=[]
for a in acc:
    if a<0.5:
        low_acc_ch.append(cha_dict[a])
        
print('Low acc ch: ', low_acc_ch)