In [4]:
import tables
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
from scipy.optimize import curve_fit
import matplotlib as mpl
import math
import random
import itertools
from scipy.stats import norm as nm
import matplotlib.mlab as mlab
import MyMultiSimTools as mst
import copy
import os
import h5py

%matplotlib inline

mpl.rc('font', family='Bitstream Vera Sans') 
mpl.rc('font', serif='Helvetica Neue') 
mpl.rc('text', usetex='false') 
mpl.rcParams.update({'font.size': 20})

pi = math.pi

plt.rc('text', usetex=True)
plt.rc('font', family='serif')


In [3]:
class import_MC(object):
    def __init__(self, infiles, MC, desired_keys=''): 
        
        self.infiles    = infiles
        self.MC         = MC
        
        if 'Multisim' in infiles[0]:
            print('Looks like a Multisim file.')
            self.Multisim = True
        else:
            self.Multisim = False

        self.desired_keys = desired_keys
        self.allocate_variables(infiles,desired_keys,self.Multisim)
    
    def allocate_variables(self, infiles, desired_keys,Multisim):
        
        variable_dict = {}
        f = tables.openFile(infiles[0])
        for i,j in f.root.keysDict.coldescrs.iteritems():
            if desired_keys:
                    if i in desired_keys:
                        variable_dict[i] = []
            else:
                variable_dict[i] = []

        MultisimAmplitudesArray = []
        MultisimPhasesArray = []
        MultisimModesArray = []
        
        for infile in infiles:
            scale_factor = float(infile.split('_')[-1].split('.h5')[0])/5000.
            f = tables.openFile(infile) 
            try:
                all_charges  = f.root.charges.cols.item[:]
                charge_array = [all_charges[x:x+60] for x in xrange(0, len(all_charges), 60)]
                self.charges = np.sum(charge_array, axis=0)
            except:
                pass

            if Multisim:
                ##print(f.root.MultisimAmplitudes.cols.item[:])
                #print(np.shape(f.root.MultisimAmplitudes.cols.item[:]))
                
                MultisimAmplitudesArray.extend(f.root.MultisimAmplitudes.cols.item[:])
                MultisimPhasesArray.extend(f.root.MultisimPhases.cols.item[:])
                MultisimModesArray.extend(f.root.MultisimModes.cols.item[:])

            for i,j in f.root.keysDict.coldescrs.iteritems():
                if desired_keys:
                    if i == 'weights':
                        variable_dict['weights'] += np.ndarray.tolist(f.root.keysDict[:]['weights']/scale_factor)
                        
                    elif i in desired_keys:
                        variable_dict[i] += np.ndarray.tolist(f.root.keysDict[:][i])
                else:
                    if i == 'weights':
                        variable_dict['weights'] += np.ndarray.tolist(f.root.keysDict[:]['weights']/scale_factor)
                        
                    else:
                        variable_dict[i] += np.ndarray.tolist(f.root.keysDict[:][i])
                        
            for i,j in f.root.keysDict.coldescrs.iteritems():
                if desired_keys:
                    if i == 'weights':
                        if self.MC == False:
                            print('Uploading Data')
                            setattr(self, 'weights', np.ones(len(variable_dict['weights'])))
                        else:
                            setattr(self, 'weights', np.asarray(variable_dict['weights']))
                    elif i in desired_keys:
                        setattr(self, i, np.asarray(variable_dict[i]))

                else:
                    if i == 'weights':
                        setattr(self, 'weights', np.asarray(variable_dict['weights']))
                    else:
                        setattr(self, i, np.asarray(variable_dict[i]))
        
        len_of_array = int(len(MultisimAmplitudesArray)/len(self.weights))
        
        self.MultisimAmplitudes = np.asarray([MultisimAmplitudesArray[x:x+len_of_array] for x in xrange(0, len(MultisimAmplitudesArray), 
                                                                                                        len_of_array)])
        self.MultisimPhases     = np.asarray([MultisimPhasesArray[x:x+len_of_array] for x in xrange(0, len(MultisimPhasesArray), 
                                                                                                    len_of_array)])
        self.MultisimModes      = np.asarray([MultisimModesArray[x:x+len_of_array] for x in xrange(0, len(MultisimModesArray), 
                                                                                                   len_of_array)])
        
        self.rate = sum(self.weights)
        print('Rate: '+ str(sum(self.weights))+' mHz +/- '+ str(np.sqrt(sum(np.power(self.weights,2))))+' mHz')

In [1]:
######### Configure Input and Output Paths

outpath = "/data/user/twatson/"

infiles = ["Ares_IC86.AVG_1.27_EZ_platinum_98635.h5",
           "Ares_IC86.AVG_1.27_EZ_platinum_18745.h5"]

indirs = ["/data/ana/SterileNeutrino/IC86/HighEnergy/Multisim/Systematics/Multisim_HE/Ares/IC86.AVG/Merged/",
          "/data/ana/SterileNeutrino/IC86/HighEnergy/Multisim/Systematics/Multisim_LE/Ares/IC86.AVG/Merged/" ]

inpaths = []
heads = []
tails = []

for i in range(len(infiles)):
    if ".h5" or ".hdf5" in infiles[i]:
        
        inpaths.append(os.path.join(indirs[i], infiles[i]))
        
        MC_name = infiles[i].replace('.h5','')
        MC_name = MC_name.replace('.hdf5','')
        
        heads.append(str( MC_name[ 0 : (MC_name.rfind('_')) ]))
        
        if '_LE' in indirs[i]:    
            tails.append('LE-' + ( MC_name[ (MC_name.rfind('_') + 1) : len(MC_name) ] ))
        if '_HE' in indirs[i]:    
            tails.append('HE-' + ( MC_name[ (MC_name.rfind('_') + 1) : len(MC_name) ] ))   
            
    else:
        inpaths.append(os.path.join(indirs[i], infiles[i] + '.h5'))

for i in range(len(list(set(heads)))):
    if i == 0:
        head = list(set(heads))[0] + '_'
    else:
        head = head  + list(set(heads))[i] + '_'
        
for i in range(len(list(set(tails)))):
    if i == 0 and i != max(range(len(list(set(tails))))):
        tail = list(set(tails))[0] + '_'
    if i == max(range(len(list(set(tails))))) and i != 0:
        tail = tail  + list(set(tails))[i]
    if i != max(range(len(list(set(tails))))) and i != 0:
        tail = tail  + list(set(tails))[i] + '_'
    if i == max(range(len(list(set(tails))))) and i == 0:
        tail = list(set(tails))[i] 
        
if ('outpath' in vars()) or ('outpath' in globals()):
    outpath = os.path.join(outpath, head + tail)
else:
    outpath = os.path.join(os.getcwd(), head + tail)
    
if os.path.exists(outpath):
    print("Writing Output to Existing Directory.\n")
    print("Output Directory:     " + str(outpath))
else:
    print("Writing Output to new Directory.\n")
    print("Creating Directory:     " + str(outpath))
    os.mkdir(outpath)

NameError: name 'os' is not defined

In [5]:
#### Load MC (Takes a while....)

print("Loading Monte Carlo (this will take a while)")



outpath = "/data/user/twatson/Ice/work/test" 
inpaths = ["/data/user/twatson/Ice/work/test/Ares_IC86.AVG_1.27_EZ_platinum_500.h5"]
inpaths = ["/data/ana/SterileNeutrino/IC86/HighEnergy/Multisim/Systematics/Multisim_HE/Ares/IC86.AVG/Merged/Ares_IC86.AVG_1.27_EZ_platinum_500.h5"]
Multisim = True

my_MC = import_MC(inpaths, MC=True)


Loading Monte Carlo (this will take a while)
Looks like a Multisim file.
Rate: 1.24636193275 mHz +/- 0.00304460376917 mHz


In [14]:
print(len(my_MC.MuExEnergy))
print(len(my_MC.weights))

256200
256200


In [9]:
########## Configure My Parameters:

SplitPhases        = True
FractionalGradient = True
DoEnergy           = True
WriteMode          = True
PlotDists          = False
PlotGrads          = True

############ SET SPLIT PARAMETERS

splitmodes = np.arange(0,12)

nevents     = 400

splitpoint  = 0

nmodes      = len(splitmodes)

############# Determine Naming Stuff

str_1 = 'Absolute Gradient'
str_2 = 'Zenith'
str_3 = 'Amplitudes'

if FractionalGradient:
    str_1 = 'Fractional Gradient'
if DoEnergy:
    str_2 = 'Energy'   
if SplitPhases:
    str_3 = 'Phases'

print('Creating ' + str_1 + ' in ' + str_2 + ', Spiltting Along ' + str_3)

if FractionalGradient:
    str_a = "FractGrad_" 
else:
    str_a = "AbsoGrad_" 
if SplitPhases:
    str_b = "Phs_"
else:
    str_b = "Amp_"      
if DoEnergy:
    str_c = "Energy_"
else:
    str_c = "Zenith_"

mc_pos        =    my_MC.MuExEnergy[ my_MC.MultisimPhases[:,1] > 0][:nevents]
mc_neg        =    my_MC.MuExEnergy[ my_MC.MultisimPhases[:,1] < 0][:nevents]

print('(' + str(len(mc_pos)) + ' positive events) + ' +
      '(' + str(len(mc_neg)) + ' negative events) = ' +
            str(len(mc_pos) + len(mc_neg)) + ' total events')

fname = str(str_a ) + str(str_b) + str(str_c) + 'n-'+str(len(mc_pos) + len(mc_neg)) +'.csv'

outfile = os.path.join(outpath, fname) 
    
if os.path.isfile(outfile):
    print('THIS GRADIENT FILE EXISTS! Not writing to file.')
    
if WriteMode and not(os.path.isfile(outfile)):
    print('Writing gradients to: ' + str(outfile)) 
    
############ SET BINNING

if DoEnergy:
    x_step = 0.1 # logspace step

    x_lo = 400
    x_hi = 10000

    n_bins = 1 + int((np.log10(x_hi) - np.log10(x_lo))/x_step)
    
    x_binning = np.logspace(np.log10(x_lo), np.log10(x_hi), num=n_bins, endpoint=True)
    x_bin_centers = (x_binning[:-1] + x_binning[1:])/2
else:   
    x_step = 0.05 

    x_lo = -1
    x_hi = 0.2
    
    n_bins = 1 + int((x_hi - x_lo)/x_step)
    
    x_binning = np.logspace(x_lo, x_hi, num = n_bins, endpoint=True)
    x_bin_centers = (x_binning[:-1] + x_binning[1:])/2

######## SPECIFY GRADIENT INFO TO KEEP ########

gradients = []

###################### Set Plotting params

fig = plt.figure(figsize=(15,12))

plots_per_row = 4

dim_row = int(np.ceil(len(splitmodes)/plots_per_row))
dim_col = int(np.ceil(plots_per_row))


if PlotGrads and PlotDists: 
    PlotDists == False
    print("Cannot Plot Gradients and Distributions Simultaneously!")

############################################################
######### Begin Loop over Modes ############################
############################################################

for splitmode in splitmodes:
    print("Splitting Mode " + str(splitmode))
######## Specify subplot location
    
    row = int(np.floor_divide(splitmode, plots_per_row))
    col = int(np.remainder(np.where(splitmodes == splitmode), plots_per_row)[0])
    
########### LOAD DATA FROM (PRELOADED) MONTE CARLO
   
    splitindex    =    np.where(my_MC.MultisimModes[0] == splitmode)[0][0]

    if SplitPhases:
        if DoEnergy:
            mc_pos        =    my_MC.MuExEnergy[ my_MC.MultisimPhases[:,splitindex] > splitpoint][:nevents]
            mc_neg        =    my_MC.MuExEnergy[ my_MC.MultisimPhases[:,splitindex] < splitpoint][:nevents]
            weight_pos    =    my_MC.weights[    my_MC.MultisimPhases[:,splitindex] > splitpoint][:nevents]
            weight_neg    =    my_MC.weights[    my_MC.MultisimPhases[:,splitindex] < splitpoint][:nevents]
        else:
            mc_pos        =    np.cos(my_MC.MuExZenith[ my_MC.MultisimPhases[:,splitindex] > splitpoint][:nevents])
            mc_neg        =    np.cos(my_MC.MuExZenith[ my_MC.MultisimPhases[:,splitindex] < splitpoint][:nevents])
            weight_pos    =    my_MC.weights[    my_MC.MultisimPhases[:,splitindex] > splitpoint][:nevents]
            weight_neg    =    my_MC.weights[    my_MC.MultisimPhases[:,splitindex] < splitpoint][:nevents]            
    else:
        if DoEnergy:
            mc_pos        =    my_MC.MuExEnergy[ my_MC.MultisimAmplitudes[:,splitindex] > splitpoint][:nevents]
            mc_neg        =    my_MC.MuExEnergy[ my_MC.MultisimAmplitudes[:,splitindex] < splitpoint][:nevents]
            weight_pos    =    my_MC.weights[    my_MC.MultisimAmplitudes[:,splitindex] > splitpoint][:nevents]
            weight_neg    =    my_MC.weights[    my_MC.MultisimAmplitudes[:,splitindex] < splitpoint][:nevents]
        else:
            mc_pos        =    np.cos(my_MC.MuExZenith[ my_MC.MultisimAmplitudes[:,splitindex] > splitpoint][:nevents])
            mc_neg        =    np.cos(my_MC.MuExZenith[ my_MC.MultisimAmplitudes[:,splitindex] < splitpoint][:nevents])
            weight_pos    =    my_MC.weights[    my_MC.MultisimAmplitudes[:,splitindex] > splitpoint][:nevents]
            weight_neg    =    my_MC.weights[    my_MC.MultisimAmplitudes[:,splitindex] < splitpoint][:nevents]     

######## CONFIGURE HISTOGRAM 

    pos_hist, bins = np.histogram(mc_pos, bins=n_bins, weights=weight_pos, range=(x_lo, x_hi))
    neg_hist, bins = np.histogram(mc_neg, bins=n_bins, weights=weight_pos, range=(x_lo, x_hi)) 
    
    if DoEnergy:
        my_bins = np.logspace(np.log10(bins[0]), np.log10(bins[-1]),len(bins))
    else:
        my_bins = np.linspace(bins[0], bins[-1],len(bins))
    bin_centers = (my_bins[:-1] + my_bins[1:]) / 2
    bin_widths  = abs((my_bins[1:] - my_bins[:-1]) / 2)

######## CALCULATE WEIGHTED UNCERTAINTIES
    
    pos_errors = np.zeros(len(bin_centers))
    neg_errors = np.zeros(len(bin_centers))
    
    for i in range(len(mc_pos)):
        for j in range(len(my_bins)-1):
            if mc_pos[i] > my_bins[j] and mc_pos[i] < my_bins[j+1]:
                pos_errors[j] = pos_errors[j] + (weight_pos[i])**2

    for i in range(len(mc_neg)):
        for j in range(len(my_bins)-1):
            if mc_neg[i] > my_bins[j] and mc_neg[i] < my_bins[j+1]:
                neg_errors[j] = neg_errors[j] + (weight_neg[i])**2

    pos_errors = np.sqrt(pos_errors)
    neg_errors = np.sqrt(neg_errors)

########### PLOT DISTRIBUTION #####

    if PlotDists:     

        dist_ax = plt.subplot2grid( (dim_row, dim_col), (row, col))     
        
        mc_pos_dist, bins, patch = plt.hist( mc_neg, bins=my_bins, weights=weight_neg, 
                                                histtype='stepfilled', normed=False, color='r', alpha=0.3)

        mc_neg_dist, bins, patch = plt.hist( mc_pos, bins=my_bins, weights=weight_pos, 
                                                 histtype='stepfilled', normed=False, color='b', alpha=0.3)

        plt.errorbar( bin_centers, mc_neg_dist, c='b', xerr = bin_widths, yerr = neg_errors , 
                     linewidth=0, elinewidth = 3, alpha=0.7, markersize=0, label='Negatively Perturbed')

        plt.errorbar( bin_centers, mc_pos_dist, c='r', xerr = bin_widths, yerr = pos_errors , 
                     linewidth=0, elinewidth = 3, alpha=0.7, markersize=0, label='Positively Perturbed')

        legend = plt.legend(loc='best')
        legend.get_frame().set_facecolor('white')

        plt.xlim(x_lo, x_hi)

        plt.grid(True)

        if SplitPhases:
            plt.title('Phase Split - Mode ' + str(splitmode))
        else:
            plt.title('Amplitude Split - Mode ' + str(splitmode))

        if DoEnergy:
            plt.xlabel('Energy (GeV)')
            plt.xscale('log')
            plt.yscale('log')
        else:
            plt.xlabel('Cosine Zenith')
            plt.yscale('log')

    else:
        mc_pos_dist, bins = np.histogram( mc_neg, bins=my_bins, weights=weight_neg, 
                                                normed=False)

        mc_neg_dist, bins = np.histogram( mc_pos, bins=my_bins, weights=weight_pos, 
                                                 normed=False)       
############# CALCULATE GRADIENT ############

    mc_dist = mc_pos_dist + mc_neg_dist
    mc_diff = mc_pos_dist - mc_neg_dist

    if FractionalGradient:        
        mc_gradient = np.sqrt(math.pi/2) * mc_diff/np.abs(mc_dist) # FRACTIONAL GRADIENT
        gradient_error = np.sqrt(neg_errors**2 + pos_errors**2)/np.abs(mc_dist) # FRACTIONAL GRADIENT ERRORS
    else:  
        mc_gradient = np.sqrt(math.pi/2) * mc_diff #  GRADIENT
        gradient_error = np.sqrt(neg_errors**2 + pos_errors**2) # GRADIENT ERRORS

############## PLOT GRADIENT #################

    if PlotGrads:
        ax = plt.subplot2grid( (dim_row, dim_col), (row, col))       

        plt.axhline(0, c='k', linestyle='--', linewidth = 2)
        
        if SplitPhases:
            plt.errorbar( bin_centers, mc_gradient, c='r', xerr = bin_widths, yerr = gradient_error, 
                          linewidth=0, elinewidth = 3, alpha=0.7, markersize=0, zorder = 1000, color='purple', 
                          label='Phs Mode ' + str(splitmode))
        else:
            plt.errorbar( bin_centers, mc_gradient, c='r', xerr = bin_widths, yerr = gradient_error, 
                          linewidth=0, elinewidth = 3, alpha=0.7, markersize=0, zorder = 1000, color='purple', 
                          label='Amp Mode ' + str(splitmode))

        if FractionalGradient:
            my_ylabel = 'Fractional Gradient'
        else:
            my_ylabel = 'Gradient'            
            
        if DoEnergy:
            my_xlabel = 'Energy (GeV)'
            plt.xscale('log')
        else:
            my_xlabel = 'Cosine Zenith'
            
        #plt.xlim(x_lo, x_hi)
        #plt.ylim(-0.1, 0.1)     
        
        if col == 0:
            plt.ylabel(my_ylabel, fontsize=17) 
            plt.setp(ax.get_yticklabels()[0],  visible=False)    
            plt.setp(ax.get_yticklabels()[-1], visible=False) 
        else:
            ax.yaxis.set_ticklabels([])     
            
        if row == int(np.floor_divide(len(splitmodes), plots_per_row)) - 1:
            plt.xlabel(my_xlabel, fontsize=17)
            plt.setp(ax.get_xticklabels()[0],  visible=False)    
            plt.setp(ax.get_xticklabels()[-1], visible=False)
        else:
            ax.xaxis.set_ticklabels([])

        l = plt.legend(loc='upper left', fontsize=15)
        l.set_zorder(20000)  # put the legend on top
        frame = l.get_frame()
        frame.set_facecolor('white')
        plt.grid(True)  

############## STORE GRADIENT INFO ####################
    
    gradients.append([bin_centers, bin_widths, mc_gradient, gradient_error]) 

######### WRITE GRADIENT INFO TO FILE ############### 

plt.subplots_adjust(hspace=0.01)
plt.subplots_adjust(wspace=0.01)
plt.show()

if WriteMode and not(os.path.isfile(outfile)):
    myfile = open(outfile,'w') 
    for i in range(len(gradients)):
        for j in range(len(gradients[i][0])):
            myfile.write(str(i)           + ', ' + 
                  str(gradients[i][0][j]) + ', ' + 
                  str(gradients[i][1][j]) + ', ' + 
                  str(gradients[i][2][j]) + ', ' +
                  str(gradients[i][3][j]) + '\n')
    myfile.close()


Creating Fractional Gradient in Energy, Spiltting Along Phases
(400 positive events) + (400 negative events) = 800 total events


NameError: name 'outpath' is not defined