In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

from scipy.optimize import curve_fit
from scipy.interpolate import interp1d

import gm2
import trfp
import plotting_functions as plt2
import helper_functions as helper

import field_map_config_run1 as map_config
import muon_dist_config_run1 as dist_config

rates = np.array([0.0002440148957033999, 7.174497059079315e-06, 5.597953677893461e-06,
                  2.471179896215746e-06, 2.469063223225018e-05, 4.98031484960896e-07,
                  2.3557763230871967e-06, 1.3267701986068543e-06, 2.733157063334935e-06])
                  ## 27 Jan, `sync_offset_studies`, uses station-by-station widths and correlation matrix

def parameterize_beam(beam, beam_x, beam_y):
    x, y = np.meshgrid(beam_x, beam_y)

    dx = np.mean(np.diff(beam_x))
    dy = np.mean(np.diff(beam_y))

    order = trfp.matrices._MULTIPOLE_ORDER
    skew = trfp.matrices._MULTIPOLE_SKEW
    multipole = trfp.matrices.__multipole

    n = len(order)
    k = np.zeros(n)

    for i in range(n):
        f = multipole(order[i], skew[i], 1, x, y)
        k[i] = np.sum(f*beam*dx*dy)/np.sum(beam*dx*dy)
    return k

def cov(t1, t2, T, M=1):  # BB, units for M are Hz^2/sec
    u = np.minimum(t1, t2)
    v = np.maximum(t1, t2)
    return M * (T - v) * u / T

def cov2(t1, t2, T, M=1):  # RW, units for M are Hz^2/sec
    u = np.minimum(t1, t2)
    v = np.maximum(t1, t2)
    return M * u


Welcome to JupyROOT 6.22/02


In [2]:
columns = ['ctags', 'km_total', 'BB_total',
           'k1', 'm1', 'km1', 'BB1',
           'k2', 'm2', 'km2', 'BB2',
           'k3', 'm3', 'km3', 'BB3',
           'k4', 'm4', 'km4', 'BB4',
           'k5', 'm5', 'km5', 'BB5',
           'k6', 'm6', 'km6', 'BB6',
           'k7', 'm7', 'km7', 'BB7',
           'k8', 'm8', 'km8', 'BB8',
           'k9', 'm9', 'km9', 'BB9']

index = map_config.subruns
# index = ['1a1']#, '1b2']#, '1d4', '1d5', '1d6']

run_1_df = pd.DataFrame(0.0, index=index, columns=columns)

data_file = '/data2/aetb/2021-02-17_hybrid_maps.h5'


In [3]:
%%time

for run in index:
    
    print 'Starting Run ' + run

    key = 'run_'+run

    vtm_df = pd.read_hdf(data_file, key=key)

    config_dict = dist_config.config_dict
    data_run = run

    vtm_file = config_dict[data_run][0]
    vtm_key = config_dict[data_run][1]
    interp_file = config_dict[data_run][2]
    interp_key_1 = config_dict[data_run][3]
    interp_key_2 = config_dict[data_run][4]
    gold_subruns_file = config_dict[data_run][5]
    beam_files = config_dict[data_run][6]

    tr_interp_df_1 = pd.read_hdf(interp_file, key=interp_key_1)
    t_start = np.mean(tr_interp_df_1.index.values)
    tr_interp_df_2 = pd.read_hdf(interp_file, key=interp_key_2)
    if data_run == '1d6': t_end = np.inf  # deals with not having a closing trolley run for 1d6
    else: t_end = np.mean(tr_interp_df_2.index.values)

    # Import a gold subrun list

    subrun_df = pd.read_hdf(interp_file, key='subrun_df')
    gold_subruns = np.loadtxt(gold_subruns_file)
    gold_subruns_df = pd.DataFrame(gold_subruns.astype(int), columns=['run', 'subrun']).merge(subrun_df, on=['run', 'subrun'])
    gold_subruns_df = gold_subruns_df[(gold_subruns_df['start_gps']>=t_start) & (gold_subruns_df['end_gps']<=t_end)]
    
    # Aside: Do the RW/BB calculation here
    ## a vector is number of ctags in each subrun divided by the number of ctags in all used subruns
    a = gold_subruns_df['ctags'].values/float(gold_subruns_df['ctags'].sum())
    ## make vector of subrun times after trolley run (use avg)
    t_subrun = ((gold_subruns_df['start_gps']+gold_subruns_df['end_gps'])/2).values - t_start
    t_period = t_end - t_start
    ## build the covariance matrix sigma
    ## t1, t2 are the two times under consideration, can be matrices
    ## make t1 a matrix of proper size with time by row; t2 is by column; ~10 sec
    t1 = np.outer(t_subrun, np.ones(t_subrun.size))
    t2 = np.outer(np.ones(t_subrun.size), t_subrun)
    ## run the cov function (note, this takes ~1 min)
    if (run == '1d6'):
        sigma = cov2(t1, t2, t_period, M=1)  # use M=1, scale with rates later to save time
    else:
        sigma = cov(t1, t2, t_period, M=1)
    sigma_avg = np.dot(a, np.dot(sigma, a))

    # need to bin into subruns

    vtm_interp = interp1d(vtm_df.index, vtm_df.values, axis=0)
    times = gold_subruns_df['start_gps'].append(gold_subruns_df['end_gps'])

    boundary_df = pd.DataFrame(vtm_interp(times), index=times, columns=vtm_df.columns)

    vtm_interp_df = vtm_df.append(boundary_df).sort_index()

    boundary_cut = pd.IntervalIndex.from_arrays(gold_subruns_df['start_gps'], gold_subruns_df['end_gps'], closed='both')
    vtm_cut = pd.cut(vtm_interp_df.index, boundary_cut)

    def avg_technique(bin_):
        numer = np.trapz(bin_, x=bin_.index.values, axis=0)
        denom = np.max(bin_.index.values)-np.min(bin_.index.values)

        return numer/denom

    avg_field = vtm_interp_df.groupby(vtm_cut).apply(avg_technique)
    avg_df = pd.DataFrame.from_dict(dict(zip(avg_field.index, avg_field.values)), orient='index', columns=vtm_df.columns)
    avg_df['start_gps'] = [interval[0] for interval in avg_df.index.to_tuples().values]
    avg_df['end_gps'] = [interval[1] for interval in avg_df.index.to_tuples().values]

    output_df = gold_subruns_df[['run', 'subrun', 'start_gps', 'end_gps', 'ctags']].copy()
    output_df = output_df.merge(avg_df, on=['start_gps', 'end_gps'])

    output_columns = ['run', 'subrun', 'start_gps', 'end_gps', 'ctags'] + ['st'+str(st)+',m'+str(m+1) for st in range(72) for m in range(9)]
    output_df = output_df[output_columns]

    ###################################################
    ## make a time varying, azimuthally varying profile
    ###################################################

    azi_run_k_df = pd.DataFrame()

    for ii in range(len(beam_files)):

        beam_filename = 'BeamSpot_'+str(beam_files[ii][0])+'_'+str(beam_files[ii][1])+'.root'

        beam, beam_x, beam_y, phi = gm2.trfp.conv.loadBeamNew(beam_filename, path='/data1/newg2/Run1TrackerData/22Sep2020/', integrated=False)
        phi = phi + 18.35
        beam_x = beam_x/10
        beam_y = beam_y/10
        for hh in range(len(beam)):
            beam[hh] = np.transpose(beam[hh])

        k_azi = np.empty([14, 72])
        for ll in range(72):
            k_azi[:,ll] = parameterize_beam(beam[ll], beam_x, beam_y)

        k_azi_interp = np.empty([14,72*3])
        k_azi_interp[:,0:72] = k_azi
        k_azi_interp[:,72:144] = k_azi
        k_azi_interp[:,144:216] = k_azi

        phi_interp = np.array([phi-360, phi, phi+360]).flatten()

        k_interp = interp1d(phi_interp, k_azi_interp, axis=1, kind='cubic')

        station_centers = (trfp.geometry.STATION_BARCODE_EDGES[:-1] + trfp.geometry.STATION_BARCODE_EDGES[1:])/2
        station_centers[2] = station_centers[2] + 180

        k_station = k_interp(station_centers)

        run_range = np.arange(beam_files[ii][0], beam_files[ii][1]+1)

        for jj in range(len(run_range)):
            run_append_dict = {}
            run_append_dict['run'] = run_range[jj]
            for st in range(72):    
                for kk in range(9):
                    run_append_dict['st'+str(st)+',k'+str(kk+1)] = [k_station[kk,st]]
            azi_run_k_df = azi_run_k_df.append(pd.DataFrame.from_dict(run_append_dict))

    stk = ['st'+str(st)+',k'+str(k+1) for st in range(72) for k in range(9)]
    azi_run_k_df = azi_run_k_df.set_index(azi_run_k_df['run'].values)
    azi_run_k_df = azi_run_k_df[stk]

    ## need to add in a run 15954 for Run 1a

    if run == '1a1':
        append_df = ((azi_run_k_df.loc[15953] + azi_run_k_df.loc[15955])/2)
        append_df.name = 15954
        azi_run_k_df = azi_run_k_df.append(append_df)
        azi_run_k_df = azi_run_k_df.sort_index()

    stkm = ['st'+str(st)+',km'+str(km+1) for st in range(72) for km in range(9)]
    stk = ['st'+str(st)+',k'+str(k+1) for st in range(72) for k in range(9)]
    stm = ['st'+str(st)+',m'+str(m+1) for st in range(72) for m in range(9)]

    def subrun_bin(df):
        stm = ['st'+str(st)+',m'+str(m+1) for st in range(72) for m in range(9)]

        bin_df = pd.Series(index=['start_gps', 'end_gps', 'ctags']+stm)
        bin_df['start_gps'] = df['start_gps'].min()
        bin_df['end_gps'] = df['end_gps'].max()
        bin_df['ctags'] = df['ctags'].sum()
        bin_df[stm] = df['ctags'].dot(df[stm])/df['ctags'].sum()

        return bin_df

    bin_df = output_df.groupby('run').apply(subrun_bin)

    run_df = (azi_run_k_df[stk].rename(columns=dict(zip(stk,stm))) * bin_df[stm]).dropna()
    run_df = run_df.rename(columns=dict(zip(stm,stkm)))
    run_df['start_gps'] = bin_df['start_gps']
    run_df['end_gps'] = bin_df['end_gps']
    run_df['ctags'] = bin_df['ctags']
    run_df = run_df[['start_gps', 'end_gps', 'ctags']+stkm]

    weight = (trfp.STATION_BARCODE_EDGES[1:] - trfp.STATION_BARCODE_EDGES[:-1])
    weight[2] = (weight[2] + 360)
    weight = weight/360

    run_sum = run_df['ctags'].dot(run_df[stkm])/run_df['ctags'].sum()

    run_km = np.empty(9)
    for km in range(9):
        _stkm = ['st'+str(st)+',km'+str(km+1) for st in range(72)]
        run_km[km] = run_sum[_stkm].multiply(weight).sum()


    # print np.round(run_km[0], decimals=5)
    # print np.round(run_km[1], decimals=5)
    # print np.round(run_km[2], decimals=5)
    # print np.round(run_km[3], decimals=5)
    # print np.round(run_km[4], decimals=5)
    # print
    # print np.round(np.sum(run_km), decimals=5)

    for m in range(9):

        # get azi and time averaged m values
        stm_list = ['st'+str(st)+',m'+str(m+1) for st in np.arange(72)]
        bin_df['m'+str(m+1)] = bin_df[stm_list].multiply(weight).sum(axis=1)
        run_1_df['m'+str(m+1)].loc[run] = (bin_df['ctags']*bin_df['m'+str(m+1)]).sum()/bin_df['ctags'].sum()

        stk_list = ['st'+str(st)+',k'+str(m+1) for st in np.arange(72)]
        azi_run_k_df['k'+str(m+1)] = azi_run_k_df[stk_list].multiply(weight).sum(axis=1)
        run_1_df['k'+str(m+1)].loc[run] = (bin_df['ctags']*azi_run_k_df['k'+str(m+1)]).sum()/bin_df['ctags'].sum()

        stkm_list = ['st'+str(st)+',km'+str(m+1) for st in np.arange(72)]
        run_df['km'+str(m+1)] = run_df[stkm_list].multiply(weight).sum(axis=1)
        run_1_df['km'+str(m+1)].loc[run] = (run_df['ctags']*run_df['km'+str(m+1)]).sum()/run_df['ctags'].sum()
        
        # scale RW/BB uncertainties by rate, put in output df
        run_1_df['BB'+str(m+1)].loc[run] = np.sqrt(sigma_avg*rates[m])

    run_1_df['ctags'].loc[run] = run_df['ctags'].sum()
    run_1_df['km_total'].loc[run] = run_1_df[['km'+str(km+1) for km in range(9)]].loc[run].sum()
    run_1_df['BB_total'].loc[run] = np.sqrt(np.dot(run_1_df[['k'+str(m+1) for m in range(9)]].loc[run].values**2,
                                                   run_1_df[['BB'+str(m+1) for m in range(9)]].loc[run].values**2))

Starting Run 1a1
Starting Run 1b1
Starting Run 1b2
Starting Run 1c1
Starting Run 1c2
Starting Run 1c3
Starting Run 1d2
Starting Run 1d3
Starting Run 1d4
Starting Run 1d5
Starting Run 1d6
CPU times: user 13min 2s, sys: 2min 6s, total: 15min 9s
Wall time: 17min 52s


In [5]:
run_1_df.to_csv('run_1_df_2021-02-17.csv')

In [4]:
run_1_df.head()

Unnamed: 0,ctags,km_total,BB_total,k1,m1,km1,BB1,k2,m2,km2,...,km7,BB7,k8,m8,km8,BB8,k9,m9,km9,BB9
1a1,485123992.0,51874.69893,2.631486,1.0,51881.842153,51881.842153,2.629353,0.150495,-4.464607,-0.218858,...,0.043441,0.25835,7.3e-05,3.676687,-0.031824,0.193882,-0.000274,-83.72823,0.018608,0.278274
1b1,562216478.0,51944.382182,2.453179,1.0,51952.245993,51952.245993,2.451712,0.1325,-19.407303,-2.040268,...,0.037505,0.240895,-0.000184,4.027598,-0.03298,0.180784,0.000439,-83.073329,-0.038978,0.259474
1b2,114792076.0,51927.567127,2.49899,1.0,51935.942175,51935.942175,2.497538,0.13237,-25.436809,-2.761347,...,0.043319,0.245398,-0.000424,4.045005,-0.032608,0.184163,0.000575,-83.10808,-0.051755,0.264324
1c1,220729523.0,51825.44361,2.539644,1.0,51829.871078,51829.871078,2.537636,0.15048,13.910536,2.522709,...,0.043352,0.249338,-0.000289,4.058795,-0.034339,0.18712,0.000711,-82.878713,-0.070877,0.268568
1c2,280475611.0,51868.435913,2.712771,1.0,51869.872431,51869.872431,2.710388,0.157957,32.279093,5.704643,...,0.066771,0.266312,0.000242,3.070846,-0.034314,0.199858,0.000928,-82.990848,-0.085048,0.28685


In [None]:
stk = ['st'+str(st)+',k3' for st in range(72)]

print np.mean(azi_run_k_df[stk].multiply(weight).sum(axis=1)/360)

In [None]:
run_km[2]/np.mean(azi_run_k_df[stk].multiply(weight).sum(axis=1)/360)*16  # *0.0222*16/np.sqrt(2)