## Import

In [None]:
from glob import glob
from natsort import natsorted, ns
from pathlib import Path
import hist as Hist
from copy import copy
import matplotlib.pyplot as plt
import pandas as pd

## Simple translator - 1100 + counter

### Useful functions

In [None]:
# Function to convert the remaining bits to decimal
def binary_to_decimal(binary_string):
    return int(binary_string, 2)

# Step 1: Read the text file and convert it to a DataFrame
def text_file_to_dataframe(file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()
    
    # Create a DataFrame with a column named "bitstream"
    df = pd.DataFrame({"bitstream": lines})
    
    # Remove any leading/trailing spaces or newlines from the "bitstream" column
    df["bitstream"] = df["bitstream"].str.strip()
    
    return df

# Step 2 and 3: Process the DataFrame to drop the first four bits and convert the remaining bits to decimal
def process_dataframe(df):
    # Drop the first four bits from each row
    df["bitstream"] = df["bitstream"].str[4:]
    
    # Convert the remaining bits to decimal
    df["decimal"] = df["bitstream"].apply(binary_to_decimal)
    
    return df

# Step 4: Calculate the difference between consecutive rows and filter rows where the difference is not equal to 1
def collect_rows_with_difference(df):
    # Calculate the difference between consecutive rows using the diff() function
    df["difference"] = df["decimal"].diff()

    # Create a mask to identify rows where the difference is not equal to 1
    mask = (df["difference"] != 1)

    # Get the rows where the difference is not equal to 1 and include the previous row as well
    # collected_df = pd.concat([df[mask], df[mask].shift(1)], ignore_index=True)
    collected_df = df.loc[mask | mask.shift(-1)].dropna()

    return collected_df

# Process multiple files and combine the results into a single DataFrame
def process_multiple_files(file_paths):
    dfs = []
    for file_path in file_paths:
        df = text_file_to_dataframe(file_path)
        df = process_dataframe(df)
        dfs.append(df)
    
    # Concatenate all individual DataFrames into a single DataFrame
    result_df = pd.concat(dfs, ignore_index=True)
    return result_df

In [None]:
target = '../../ETROC-Data/nots_read_cycbuf8/'
dir = Path(target)

## Limit reading maximum 10 files
files = glob(str(dir)+'/*Data_[0-9].dat')
files = natsorted(files)
files

In [None]:
# Process multiple files and get the single DataFrame
result_df = process_multiple_files(files)

# Print the resulting DataFrame
# print(result_df)

# Collect rows with difference not equal to 1
collected_df = collect_rows_with_difference(result_df)

# Print the collected DataFrame
collected_df

## Pattern finding and counter

In [None]:
for ifile in files:
    with open(ifile, 'r') as infile:
        for line in infile.readlines():
            if line[0:4] == '1100':
                line.strip()[4:]

In [None]:
# natural sorting in python
# dir=Path('../../ETROC-Data/2023-06-29_Array_Test_Results/counter_write_cycbuf')
dir=Path('../../ETROC-Data/2023-06-29_Array_Test_Results/counter_read_cycbuf7')
# dir=Path('../../ETROC-Data/2023-06-29_Array_Test_Results/counter_rawdata_beftrigfifo')
# dir=Path('../../ETROC-Data/2023-06-29_Array_Test_Results/counter_write_trigbuf_v2_exttest')

files = glob(str(dir)+'/*Data_[0-9]*.dat')
files = natsorted(files)
files

In [None]:
h = (
        Hist.new.Regular(100, 0, 20000, name="diff", label="Diff of consecutive lines [decimal]")
        .Double()
      )
nz_counter = 0

In [None]:
oldnum = -1
line_counter = 0
err_line_counter = 0
total_skip = 0
for ifile in files:
    with open(ifile, 'r') as infile:
        # make data bitstream in a single line
        for line in infile.readlines():
            if line[0:4] == '1100':
                line_counter += 1
                num = int(line.strip()[4:], base=2)
                if (num-oldnum != 1 and oldnum!=-1):
                    err_line_counter += 1
                    # print(line[:4], '||', line[4:-1],".", format(oldnum, '028b'),".", num, oldnum, num-oldnum)
                    # break
                    total_skip += num-oldnum-1
                    h.fill(num-oldnum)
                oldnum = num
                # print(int(line.strip()[4:], base=2))
print(err_line_counter, "Out of a total of", line_counter, "Failed.", 100*err_line_counter/line_counter, "%", f"Losing {total_skip} lines")

In [None]:
#%%
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(dpi=200, figsize=(8,4.5))
gs = fig.add_gridspec(1,1)
ax = fig.add_subplot(gs[0,0])
h[::2j].plot1d(ax=ax, lw=2, histtype="fill")
ax.set_xticks(ax.get_xticks(),ax.get_xticklabels(), rotation='vertical', size=5)
plt.tight_layout()


In [None]:
for ifile in files:
    with open(ifile, 'r') as infile:
        # make data bitstream in a single line
        for line in infile.readlines():
            if line[0:2] == '10':
                # print(line.strip()[:2], '||', line.strip()[2:6], '||', line.strip()[6:])
                if line.strip()[2:6] == '0101':
                    print(line.strip()[:2], '||', line.strip()[2:6], '||', line.strip()[6:])

In [None]:
def find_pattern_first_index(given_pattern, bitstream):
    pattern_length = len(given_pattern) * 4  # 4 bits per hexadecimal digit

    for i in range(len(bitstream) - pattern_length + 1):
        current_substring = bitstream[i:i+pattern_length]
        hexadecimal = hex(int(current_substring, 2))[2:]  # Convert binary to hexadecimal
        if hexadecimal == given_pattern:
            return i

    return -1  # Pattern not found

def find_pattern_multiple_indexs(given_pattern, bitstream):
    pattern_length = len(given_pattern) * 4  # 4 bits per hexadecimal digit

    indices = []  # List to store the indices of pattern occurrences

    for i in range(len(bitstream) - pattern_length + 1):
        current_substring = bitstream[i:i+pattern_length]
        hexadecimal = hex(int(current_substring, 2))[2:]  # Convert binary to hexadecimal
        if hexadecimal == given_pattern:
            indices.append(i)

    return indices  # Return the list of indices

def process_bitstream(given_pattern, bitstream):
    pattern_length = len(pattern) * 4  # 4 bits per hexadecimal digit
    broken_lengths = []

    while len(bitstream) >= pattern_length:
        index = find_pattern_first_index(given_pattern, bitstream)

        if index is None:
            broken_lengths.append(len(bitstream))
            break

        current_word = bitstream[index:index+pattern_length]
        if current_word[:8] != pattern:
            broken_lengths.append(index)
            bitstream = bitstream[index+pattern_length:]
        else:
            binary = current_word[8:]
            decimal = int(binary, 2)
            bitstream = bitstream[index+pattern_length:]

    return broken_lengths


# pattern = "00111100"
# pattern_length = len(pattern)
# broken_lengths = []
# previous_num = -1
# current_num = -1
# while len(dummy_bs) >= pattern_length:
#     index = find_pattern_first_index(dummy_bs)

#     if index is None:
#         broken_lengths.append(len(dummy_bs))
#         break
#     if dummy_bs[index+40:index+pattern_length+40]!=pattern:
#         broken_lengths.append(len(dummy_bs[:index+pattern_length]))
#         dummy_bs = dummy_bs[index+pattern_length:]
#         continue

#     current_num = int(dummy_bs[index+pattern_length:index+pattern_length+32], base=2)
#     if(previous_num>-1 and current_num-previous_num!=1):
#         h2.fill(current_num-previous_num)
#         nz_counter +=1
#     previous_num = current_num
#     dummy_bs = dummy_bs[index+40:]

In [None]:
fail_count = 0
newidx = 0

for i in range(int(len(bitstream)/40)-1):
    left = idx+(40*i)
    right = idx+(40*(i+1))
    word = bitstream[left:right]
    
    if(word[0:8]!='00111100'): 
        print(word[0:8], i)
        fail_count+=1
        newidx = i
        break
    
    current_counter = int(word[8:], base=2)

# bitstream = bitstream[newidx:]
# idx = find_pattern_first_index(bitstream)
# print('new index:', idx)

## Verify the new idx is real "3c" pattern
# for i in range(11):
#     left = idx+(40*i)
#     right = idx+(40*(i+1))
#     word = bitstream[left:right]

#     if(word[0:8]!='00111100'):
#         print('This is not a real 3c pattern')
#         bitstream = bitstream[idx+8:]
#         break

# for i in range(int(len(bitstream)/40)-1):
#     left = idx+(40*i)
#     right = idx+(40*(i+1))
#     word = bitstream[left:right]
    
#     if(word[0:8]!='00111100'): 
#         faih = (
        Hist.new.Regular(100, 0, 20000, name="diff", label="Diff of consecutive lines [decimal]")
        .Double()
      )
nz_counter = 0dx = i
#         break
    
#     current_counter = int(word[8:], base=2)

In [None]:
k = 10986
word = bitstream[k*40:(k+1)*40]
print(word)
print(word[:8], int(word[8:], base=2))

In [None]:
worded_bitstream = ''
previous_num = 0
fail_count = 0
for i in range(int(len(bitstream)/40)):
    left = idx+(40*i)
    right = idx+(40*(i+1))
    word = bitstream[left:right]
    worded_bitstream = worded_bitstream + '||' + word
    if(word[0:8]!='00111100'): 
        fail_count+=1
        print(i)
    current_num = int(word[8:], base=2)
    # print(current_num, previous_num)
    previous_num = copy(current_num)

print(worded_bitstream)

In [None]:
bitstream = ''

In [None]:
# ifile = '../../ETROC-Data/2023-06-29_Array_Test_Results/counter_write_cycbuf/TDC_Data_1.dat'
ifile = '../../ETROC-Data/2023-06-29_Array_Test_Results/counter_write_cycbuf_latch/TDC_Data_0.dat'

with open(ifile, 'r') as infile:
    # make data bitstream in a single line
    for line in infile.readlines():
        if line[0:4] == '1100':
            bitstream += line.strip()[4:]

idx = find_pattern_first_index(bitstream)

In [None]:
dummy_bs = copy(bitstream)

In [None]:
h2 = (
        Hist.new.Regular(100, 0, 50000, name="diff", label="Diff of consecutive lines [decimal]")
        .Double()
      )
nz_counter = 0

pattern = "00111100"
pattern_length = len(pattern)
broken_lengths = []
previous_num = -1
current_num = -1
while len(dummy_bs) >= pattern_length:
    index = find_pattern_first_index(dummy_bs)

    if index is None:
        broken_lengths.append(len(dummy_bs))
        break
    if dummy_bs[index+40:index+pattern_length+40]!=pattern:
        broken_lengths.append(len(dummy_bs[:index+pattern_length]))
        dummy_bs = dummy_bs[index+pattern_length:]
        continue

    current_num = int(dummy_bs[index+pattern_length:index+pattern_length+32], base=2)
    if(previous_num>-1 and current_num-previous_num!=1):
        h2.fill(current_num-previous_num)
        nz_counter +=1
    previous_num = current_num
    dummy_bs = dummy_bs[index+40:]

In [None]:
print(nz_counter, len(bitstream)/40, 100*nz_counter/(len(bitstream)/40))

In [None]:
fig = plt.figure(dpi=200, figsize=(8,4.5))
gs = fig.add_gridspec(1,1)
ax = fig.add_subplot(gs[0,0])
h2[:].plot1d(ax=ax, lw=2, histtype="fill")
ax.set_xticks(ax.get_xticks(),ax.get_xticklabels(), rotation='vertical', size=5)
plt.tight_layout()


### Translate binary to readable data

In [None]:
def translate_with_indices(input_stream, positions, parent_dir, output, chipID):
    f = open(parent_dir+'/'+output, 'w')
    residual = ''
    
    # hex chipID to binary
    binID = format(int(chipID, 0), '017b')
    wordlength = 40
    printline = ''
    key = ''
    
    for i, index in enumerate(positions):

        word = input_stream[index:index+wordlength]
        printline = "ETROC2 0 "# + "{:d} ".format(channel)

        if len(word) != 40:
            residual = word
            break
        elif len(word) == 40:
            # Header
            if word[0:16] == '0011110001011100' and word[16:18] == '00':
                printline += "HEADER "
                printline += "L1COUNTER " + word[18:26] + " "
                printline += "TYPE " + word[26:28] + " "
                printline += "BCID " + f"{int(word[28:40], base=2)}" + "\n"
                key = 'header'
            # Frame filler
            elif word[0:16] == '0011110001011100' and word[16:18] == '10':
                printline += "FRAMEFILLER "
                printline += "L1COUNTER " + word[18:26] + " "
                printline += "EBS " + word[26:28] + " "
                printline += "BCID " + f"{int(word[28:40], base=2)}" + "\n"
                key = 'filler'
            # Firmware filler
            elif word[0:16] == '0011110001011100' and word[16:18] == '11':
                printline += "FIRMWAREFILLER "
                printline += "MISSINGCOUNT " + word[18:40] + "\n"
                key = 'filler'
            else:
                printline += "NOT DEFINED " + word[0:16] + " " + word[16:18] + " " + word[18:] + "\n"
                key = 'whatisthis'
                pass
        
        # Save if the data is filler
        if key == 'filler':
            f.write(printline)
        
        # if the data is header, try to find the data and trailer 
        elif key == 'header':
            try: 
                loop = int((positions[i+1] - positions[i])/40)
                for k in range(1, loop):
                    word = input_stream[index+(wordlength*k):index+(wordlength*(k+1))]
                    # Trailer
                    if word[0:18] == '0'+str(binID):
                        printline += "TRAILER "
                        printline += "CHIPID " + f"{hex(int(word[1:18], base=2))}" + " "
                        printline += "STATUS " + word[18:24] + " "
                        printline += "HITS " + f"{int(word[24:32], base=2)}" + " "
                        printline += "CRC " + word[32:40] + "\n"
                        key = 'trailer'
                    
                    elif word[0] == '1':
                        printline += "DATA "
                        printline += "EA " + word[1:3] + " "
                        printline += "COL " + "{:d} ".format(int(word[3:7], base=2))
                        printline += "ROW " + "{:d} ".format(int(word[7:11], base=2))
                        printline += "TOA " + "{:d} ".format(int(word[11:21], base=2))
                        printline += "TOT " + "{:d} ".format(int(word[21:30], base=2))
                        printline += "CAL " + "{:d} ".format(int(word[30:40], base=2)) + "\n"
                        key = 'data'
                    
                    # if the data is trailer, write the output in the file
                    if key == 'trailer':
                        f.write(printline)
            except:
                # out of index range, move to the next file
                residual = word

    f.close()
    return residual        

In [None]:
# install natsort module (skip if this module is installed already!)
import sys
!{sys.executable} -m pip install natsort

### Let's convert!

In [None]:
from glob import glob
from natsort import natsorted, ns
from tqdm import tqdm
from pathlib import Path

# natural sorting in python
dir=Path('/home/jongho/Physics/ETROC/etroc2_translate/test9')
files = glob(str(dir)+'/*Data_[0-9]*.dat')
files = natsorted(files)

# print the list of files to check 
# for file in files: print(file)

In [None]:
residual = ''

import os
if not os.path.exists(str(dir/'StandaloneTranslate')):
    os.mkdir(str(dir/'StandaloneTranslate'))

for i, ifile in enumerate(tqdm(files)):
    # if i > 2: break
    # Let's make a very long bitstream single line
    bitstream = residual + ''
    with open(ifile, 'r') as infile:
        # make data bitstream in a single line
        for line in infile.readlines():
            if line[0:4] == '1100':
                bitstream += line.strip()[4:]

    positions = find_pattern_multiple_indexs(bitstream)
    outname = 'TDC_Data_translated_'+str(i)+'.dat'
    residual = translate_with_indices(bitstream, positions, str(dir/'StandaloneTranslate'), outname, '0x17f0f')