In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import glob
import os
sys.path.insert(0,"/home/daqtest/Processor/sandpro")
import sandpro
import configparser
import json
import scipy.stats
from scipy.optimize import curve_fit
import datetime
import pandas as pd
import re

sys.path.insert(0,"../")
import util
from run_selection_single_channel import RunInfo
from dataclasses import dataclass
import WaveformProcessor
import FitSPE


In [None]:
# df = pd.read_csv("../run_info_single_channel.csv", parse_dates=["date_time"],delimiter=",",quotechar='\0')
df = pd.read_csv("../run_info_single_channel_20240701_2.csv", parse_dates=["date_time"],delimiter=",",quotechar='"', skipinitialspace=True, encoding="utf-8")

In [None]:
df

In [None]:
df.columns

In [None]:
# v_dirname = np.vectorize(os.path.dirname)
# v_basename = np.vectorize(os.path.basename)
# v_basename = np.vectorize(os.path.basename)


In [None]:
# dir_name = v_dirname(df.file_path)
# bin_name = v_basename(df.file_path)


In [None]:
@dataclass
class run_info_data:
    def __init__(self, input: object):
        self.import_data(input)
    
    def import_data(self, df: pd.DataFrame):
        for col_name in df.columns:
            setattr(self, col_name, df[col_name].to_numpy())
            
    def import_data(self, dictionary: dict):
        for var_name in dictionary.keys():
            setattr(self, var_name, np.array(dictionary[var_name]))
            
    def apply_mask(self, mask, inplace = False):
        print("before cut: ", len(self.__dict__['file_path']))
        new_dict = {}
        for i in self.__dict__.keys():
            if inplace:
                self.__dict__[i] = self.__dict__[i][mask]
            else:
                new_dict[i] = self.__dict__[i][mask]
        
        if inplace:   
            first = next(iter(self.__dict__.values()))
            print("after cut: ", len(first))
            return
        else:
            first = next(iter(new_dict.values()))
            print("after cut: ", len(first))
            return run_info_data(new_dict)
    
    def __len__(self):
        # the length of all array should be the same
        # so just picked a random one
        first = next(iter(self.__dict__.values()))
        return len(first)
            

In [None]:
def _re_search(pattern: str, string: str):
    return bool(re.search(pattern, string))
v_re_search = np.vectorize(_re_search, excluded=["pattern"])

In [None]:
info = run_info_data(df)
print(info.__dict__.keys())

In [None]:
info = run_info_data(df)
print(info.__dict__.keys())

mask_run_tag = v_re_search('GXe/gain_calibration', info.run_tag)
mask_run_tag_remove_trash = ~v_re_search('trash', info.run_tag)
mask_time = (info.date_time > np.datetime64('2024-05-18'))
mask_record_length_nan = ~np.isnan(info.record_length_sample)
mask_start_index_nan = ~np.isnan(info.start_index)
mask_nevents_nan = ~np.isnan(info.number_of_events)

# FIXME: mask channel

mask = mask_run_tag & mask_run_tag_remove_trash & mask_time & mask_record_length_nan & mask_start_index_nan & mask_nevents_nan

# check how much data is removed
info.apply_mask(mask, inplace=True)


In [None]:
# check data type all correct 
for i in info.__dict__.keys():
    print(i, type(info.__dict__[i][0]))

In [None]:

# config_name = "_".join(parts[1:4]) + ".ini"
# spd.threshold_adc = parts[3]
# spd.channel = int(parts[2])
# spd.timestamp_str = f"{int(parts[4])}_{int(parts[5].split('.')[0])}"

# spd.datetime_obj = datetime.datetime.strptime(f"{parts[4]} {parts[5].split('.')[0]}", '%Y%m%d %H%M%S')

# # ignore the channels that in the ignore_channel_list
# if spd.channel in self.ignore_channel_list: 
#     return

# # read from meta data file
# with open(meta_data, "r") as f:
#     data_taking_settings = json.load(f)

# spd.n_events = int(data_taking_settings["number_of_events"])
# spd.bias_voltage = float(data_taking_settings["voltage_config"]["preamp_1"]) # assuming all preamp has the same bias voltage; can be easily changed
# # bias_voltage_list.append(bias_voltage)


In [None]:
info.file_path[0]

In [None]:
number_of_channels_list = np.unique(info.number_of_channels)
record_length_list = np.unique(info.record_length_sample)
n_sample_list = np.unique(info.baseline_n_samples)
n_sample_avg_ist = np.unique(info.baseline_n_samples_avg)

integral_window = (0.3,0.6)

for nchs in number_of_channels_list:
    for record_length_sample in record_length_list:
        for sample_selection in n_sample_list:
            for samples_to_average in n_sample_avg_ist:
                mask =  (info.number_of_channels == nchs) & (info.record_length_sample == record_length_sample) & (info.baseline_n_samples == sample_selection) & (info.baseline_n_samples_avg == samples_to_average)
                # check how much data is removed
                _tmp__selection = info.apply_mask(mask)
                
                process_config = {"nchs": nchs,
                                "nsamps": record_length_sample,
                                "sample_selection": sample_selection,
                                "samples_to_average": samples_to_average}
                
                # dump the config to a json file
                with open("process_config.json", "w") as f:
                    json.dump(process_config, f)  
                    
                processor= sandpro.processing.rawdata.RawData(config_file = "process_config.json",
                                                            perchannel=False)
                
                for i in range(len(_tmp__selection)):
                    start_index = _tmp__selection.start_index[i]
                    end_index = _tmp__selection.start_index[i] + _tmp__selection.n_processed_events[i]
                    
                    data = processor.get_rawdata_numpy(n_evts=_tmp__selection.number_of_events[i]-1,
                                            file=_tmp__selection.file_path[i],
                                            bit_of_daq=14,
                                            headersize=4,inversion=False)
                    
                    wfp = WaveformProcessor.WFProcessor(os.path.dirname(_tmp__selection.file_path[i]), 
                                                        volt_per_adc=2/2**14)
                    wfp.set_data(data["data_per_channel"][start_index:end_index,0], in_adc = False)
                    wfp.process_wfs()
                    
                    areas = wfp.get_area(sum_window=integral_window)
                    heights = wfp.get_height(search_window=integral_window)

                    data_processed = data["data_per_channel"][start_index:end_index,0,:]
                    hist_count,bin_edges = np.histogram(areas,bins=200,range=(-0.1,10))

                    spe_fit = FitSPE.FitSPE(hist_count, bin_edges, show_plot=False, save_plot=False)
                    

In [None]:
(info.file_path[0])

In [None]:

# set the board number and integral window according to the board number (board 0 and 1 have different integral windows)
# board_number = 0
# local_channel = channel
integral_window = (0.3,0.6)
# local_channel = channel - 16


# data_file_basename = meta_data_basename.replace("meta_", "").replace(".json", f"_board_{board_number}.bin")

try:
    data = processor.get_rawdata_numpy(n_evts=df.number_of_events-1,
                                file=os.path.join(dir_name, bin_name),
                                bit_of_daq=14,
                                headersize=4,inversion=False)
    spd.start_index, spd.end_index = 2000, spd.n_events-1-500 #first 1000 events are noisy
    print(f"analysing events from range: {spd.start_index} to {spd.end_index}")
except Exception as e:
    print(e)
    data = processor.get_rawdata_numpy(1999,
                                file=os.path.join(self.data_folder, data_file_basename),
                                bit_of_daq=14,
                                headersize=4,inversion=False)

wfp = WaveformProcessor.WFProcessor(self.data_folder, volt_per_adc=2/2**14)
wfp.set_data(data["data_per_channel"][spd.start_index:spd.end_index,0], in_adc = False)
wfp.process_wfs()

spd.baseline_std = np.mean(wfp.baseline_rms)
spd.baseline_mean = np.mean(wfp.baseline)
spd.n_processed_events = len(wfp.baseline_rms)

spd.areas = wfp.get_area(sum_window=spd.integral_window)
spd.heights = wfp.get_height(search_window=spd.integral_window)

In [None]:
# plt.hist2d(df.voltage_preamp1, df.baseline_std, bins=[50,50],range=[[0,100],[0,0.001]], cmap='viridis',norm="log")
plt.plot(df.voltage_preamp1, df.baseline_std, 'o')
plt.xlabel("Voltage Preamp 1 (V)")
plt.ylabel("Baseline Std (to be determined)")

In [None]:
# plt.hist2d(df.voltage_preamp1, df.baseline_std, bins=[50,50],range=[[0,100],[0,0.001]], cmap='viridis',norm="log")
runtime = df.runtime.to_numpy()
baseline = df.baseline_std.to_numpy()

    
plt.xlabel("Runtime of previous file")
plt.ylabel("Baseline Std (to be determined)")

# plt.xlim(0,5000)
# bins=[200,100],range=[[-0.1,10],[0,60]],
plt.hist2d(runtime,baseline,cmap='viridis',norm="log")


In [None]:
runtime[0] 

In [None]:
df['file_path'] = df['file_path'].astype(str)
df['comment'] = df['comment'].astype(str)

In [None]:
df.dtypes

In [None]:
pd.to_datetime(df['date_time'], format="%Y%m%d_%H%M%S")

In [None]:
# types of df columns
print(df.dtypes)