In [None]:
import pickle
import pandas
from glob import glob
import os
import gzip
import configparser
import matplotlib
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

font = {'family' : 'normal',
        'size'   : 18}

matplotlib.rc('font', **font)

# show all columns in outputs
pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_rows',None)

In [None]:
# set storage location
output_folder = "../output2"
abs_storage_location = os.path.abspath(output_folder)

# here the used shift paramters get stored
shift_parameters = []
shift_values = {}

# get channels
channels = os.listdir(abs_storage_location)
print("channels which are studied: ", channels)

In [None]:
# get all pickle files for each channel and merge them
# generate dict with channel linked to DataFrame
variables = {}

for channel in channels:
    
    channel_path = os.path.join(abs_storage_location, channel)
    analysed_path = os.path.join(channel_path, "analysed")
    
    variables_df = []
    
    for input_file in glob(os.path.join(analysed_path, "*variables.pkl")):
        input_path = os.path.join(analysed_path, input_file)
        
        variables_pkl = pandas.read_pickle(input_path)
        variables_df.append(pandas.DataFrame.from_dict(variables_pkl))
        
    variables_all_df = pandas.concat(variables_df,ignore_index=True)
    variables.update({channel : variables_all_df})
    
    # get the shift parameters
    for item, row in variables[channel].iterrows():
        shift_param = row['shift_parameter']
        if not shift_param in shift_parameters:
            shift_parameters.append(shift_param)
            
    # get the values for every shift parameter
    for shift in shift_parameters:
        shift_val = []
        for item, row in variables[channel].iterrows():
            if row['shift_parameter'] == shift and row['shift_value'] not in shift_val:
                shift_val.append(row['shift_value'])
        
        shift_values.update({shift : shift_val})

print('shifts which where studied: ', shift_values)

In [None]:
# generate plots
for channel in channels:
    
    # generate the plots folder in the channel output directory
    channel_path = os.path.join(abs_storage_location, channel)
    plot_path = os.path.join(channel_path, "plots")
    if not os.path.exists(plot_path):
        os.makedirs(plot_path)
    
    # loop over all variables in the data frame, exclude the variables connected to the shift parameters
    # todo: possible for all variables, but only interested in fastReco variables at the moment
    #variables_studied = [variable for variable in variables[channel].columns 
    #                     if not "shift" in variable]
    variables_studied = [variable for variable in variables[channel].columns 
                         if "fast_reco" in variable]
    
    for variable in variables_studied:
        for shift_param in shift_parameters:

            shift_vals = shift_values[shift_param]
            
            # setup the binning and the limits for the plots out of the data
            max_value = variables[channel][variable].max() + 0.5
            min_value = variables[channel][variable].min() - 0.5
            hist_range = (min_value, max_value)
            
            if "number" in variable:
                hist_bins = int(max_value - min_value)
            else:
                hist_bins = 50
            
            plt.figure(figsize=(10,10))
            for value in shift_vals:

                variable_df = variables[channel]
                variable_ws_df = variable_df[(variable_df.shift_parameter == shift_param) &
                                            (variable_df.shift_value == value)]
                
                plt.hist(variable_ws_df[variable],
                         bins=hist_bins,
                         range=hist_range,
                         histtype='step',
                         label=str(value),
                         lw=2)
            
            plt.xlabel(variable)
            plt.ylabel('Entries')
            plt.title(channel)
            plt.legend()
            plt.tight_layout()
            plot_file = os.path.join(plot_path, "hist_{}_{}_{}.png".format(channel, variable, shift_param))
            plt.savefig(plot_file)
            plt.clf()