# DAQ - data compression
Auhtor: Piotr Podlaski

In [None]:
import numpy as np
from ROOT import *
gStyle.SetPalette(55)
gStyle.SetOptStat(0)

import os
                  
%jsroot on

## Pixel detectors with binary information about the hits (0-no hit, 1-hit)

### Example: 
* write a function to generate square pixel matrix of arbitrary size $N$, filled randomly with occupancy $O$. 
* write a function that will fill the 2-dimensional histogram with pixel values to visualize the generated sensor data
* test operation of your code for different sensor sizes and occupacies
* write a function that checks size of a file
* write the array to a text file and check its size for $N=1000$

In [None]:
def generateRandomMatrix(nPoints, O):   # o represents occupancy
    selected=np.random.random((N,N))<O
    data=np.zeros((N,N))
    data[selected]=1
    return data

def fillSensorHisto(h,data):
    for i in range(len(data)):
        for j in range(len(data[i])):
            if data[i,j]:
                h.Fill(j,i,)
def getFileSize(fname):
    return os.path.getsize(fname)



In [None]:
N=1000 # size of the array
O=0.01 #occupancy

data=generateRandomMatrix(N,O)
hSensor=TH2I("hSensor,","pixel distribution",N,0,N,N,0,N)
fillSensorHisto(hSensor,data)

#drawing:
c=TCanvas("c","dd",1024,768)
hSensor.Draw("colz")
c.Draw()


fname='./data.txt'
np.savetxt(fname, data, fmt='%d',delimiter='')  # %d is decimal
fullSize=getFileSize(fname)
print("Size of full sensor data file: {}" .format(fullSize))

### Task:
* write to a file only coordinates of pixels that have signal and check file size, typically numbers represented by a fixed number of bits are used to store information, to mimic this use zero-padded 3-digit intigers to describe position in a sensor (e.g. instead of 3 we write 003)
* read the data from the file and compare resulting 2D array with the original one

In [None]:
fnameCoord='data_coord.txt'
dataCoord=np.transpose(np.nonzero(data))    # list of coordinates that are non zero; %03d is the format to have 3 leading zeros before our number
print(dataCoord)

#save data here:
np.savetxt(fnameCoord, dataCoord, fmt='%03d',delimiter='')  # %d is decimal
nonzerosize=getFileSize(fnameCoord)     # get file size

print(nonzerosize)

data_read=np.zeros((N,N))
with open(fnameCoord,'r') as f:
    for l in f.read().splitlines():
        strsplit = l.split()
        print(strsplit)

np.array_equal(data,data_read)

# B represents number of characters in one line in file data_coord.txt 

B = 7
onew = (N ** 2 + N) / (B * N ** 2)
onew

$o < \frac{N^2 + N}{BN^2}$
---
where $B$ is number of cooridinates needed to properly identify pixel position and calculate occupancy $o$

## Detectors storing waveforms with ADC values on each channel

### Example:
* a function that generates fixed size waveform with a set of gaussian signals of a given amplitudes and positions, width of the gaussians is fixed to $\sigma=10$
* width of the waveform is fixed to 1000 samples
* values are integer numbers in range 0-4095 (12 bit resolution of the ADC)
* generate and plot a single waveform with exactly one signal (random position $x\in[50,950]$ and amplitude $a\in[0,4095]$) in it

In [None]:
sigma=10
N=1000

def gauss(x, pos, amp):     # generate gauss function; IMPORTANT: NOT NORMALIZED!
    return amp*np.exp(-(x-pos)**2/(2*sigma**2))/np.sqrt(2*np.pi)
    # return amp*np.exp(-(x-pos)**2/(2*sigma**2))   # now 'amp' is real normalized amplitude

def generateWvfmWithGauss(pos, amp):    # generate gaussian wave signal
    wvfm=np.zeros(N)
    for p,a in zip(pos,amp):
        args=np.linspace(0,N-1,N)   # generate random arguments for gaussian functions
        wvfm+=gauss(args,p,a)
    wvfm=wvfm.astype(int)
    return wvfm
        
def buildGraph(wvfm):   # create ROOT graph with this wave form
    gr=TGraph()
    for w in wvfm:
        gr.SetPoint(gr.GetN(),gr.GetN(),w)
    return gr

def printWvfms(wvfms,fname):
    canv=TCanvas("cplot","",1024,768)
    gr=TGraph()
    canv.Print(fname+'[')   # open pdf file
    for w in wvfms:
        for i in range(len(w)):
            gr.SetPoint(i,i,w[i])
        gr.Draw("AL")   # create graph in pdf
        gr.SetMaximum(4095)
        canv.Print(fname)   
    canv.Print(fname+']')   # close pdf file and have multiple graphs in one pdf file
        

In [None]:
Nsignals=1
w=generateWvfmWithGauss(np.random.randint(50,950,Nsignals),np.random.randint(0,4095,Nsignals))
g=buildGraph(w)
g.SetMaximum(4095)
c.cd()
g.Draw("AL")
c.Draw()

w.shape
# suppressSingleWaveform(w) 

### Task:
* generate $N_{\mathrm{wvfms}}$ such waveforms, with $N_{\mathrm{signals}}$ gaussian signals of random amplitude, distribute signals randomly, **one channel can have multiple signals**
* without creating the file with all waveforms, what would be its size? (ADC values in plain text are four digit integers padded with zeros, one integer is one byte)
* implement a zero-suppression algorithm: instead of writting all ADC values we store only the non zero ones, with their locations
   * ID of the channel
   * for every joint signal in a channel:
     * ID of the first non-zero sample
     * length of the signal
     * a set of samples representing the signal
* Check size of resulting compressed data for $N_{\mathrm{wvfms}}=1000$ and $N_{\mathrm{signals}}=100$. Instead of writing to a file use `np.size()` - it returns number of numerical fields in an array.

In [89]:
def generateWaveforms(channels,signals):    # generate multiple waveforms
    wvfms=[]
    signalPositions=np.random.randint(0,N*channels,signals)     # generate random signal positions
    for i in range(channels):
        indInThisWvfm=np.logical_and(signalPositions>=i*N,signalPositions<(i+1)*N)  # check if this position is in my waveform?
        pos=signalPositions[indInThisWvfm]-i*N
        amps=np.random.randint(0,4095,len(pos))     # random amplitudes
        wvfms.append(generateWvfmWithGauss(pos,amps))
    return np.array(wvfms)
    
    
def suppressSingleWaveform(w):  # this function gives us indices where our signal begin 
    signalFragments=[]
    fragment=[]
    for i in np.nonzero(w)[0]:  # loop over all non zero indices of the waveform
        if fragment:
            if i-fragment[-1]!=1:   # check if we are still in the same fragment
                signalFragments.append(np.array(fragment))
                fragment=[]
        fragment.append(i)
    signalFragments.append(np.array(fragment))
    suppressed=[]
    print(signalFragments)
    # print(fragment)
    for f in signalFragments:

        suppressed.append(f[0])      # channel id
        suppressed.append(len(f))    # length of the sigal

        for samp in f:
            # print(w[samp])
            suppressed.append(w[samp])

    return np.array(suppressed)

def suppressWaveforms(wvfms):
    suppr = []
    for i in range(len(wfvms)):
        supWvfm = [i]
        supWvfm += suppressSingleWaveform(wfvms[i])
        suppr.append(supWvfm)

    return suppr

Example

In [90]:
wfvms = generateWaveforms(1, 2)
# sw = suppressSingleWaveform(w)    # supress multiple waveforms
printWvfms(wfvms, "A.pdf")

w = generateWaveforms(1, 1)


# g=buildGraph(w)
# g.SetMaximum(4095)
# c.cd()
# g.Draw("AL")
# c.Draw()

# print(w.shape, sw.shape)

Info in <TCanvas::Print>: pdf file A.pdf has been created
Info in <TCanvas::Print>: Current canvas added to pdf file A.pdf
Info in <TCanvas::Print>: pdf file A.pdf has been closed


Test zone

In [93]:
# wnew = np.ravel(w)  # proper shape for buildGraph

# sw = suppressSingleWaveform(w)   # this fu

suppr = suppressWaveforms(wfvms)
# wnew
# g=buildGraph(wnew)
# g.SetMaximum(4095)
# c.cd()
# g.Draw("AL")
# c.Draw()

# print(wnew[299], wnew[sw[0]])

# print(wfvms)
suppr

[array([189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
       202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214,
       215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
       228, 229]), array([712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724,
       725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737,
       738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750,
       751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763,
       764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776,
       777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788])]


[array([ 189,   41,    1,    1,    1,    1,    2,    2,    3,    3,    4,
           4,    5,    5,    6,    6,    6,    7,    7,    8,    8,    8,
           8,    8,    8,    8,    7,    7,    6,    6,    6,    5,    5,
           4,    4,    3,    3,    2,    2,    1,    1,    1,    1,  712,
          77,    1,    1,    2,    3,    4,    6,    8,   11,   16,   21,
          28,   38,   49,   64,   81,  103,  129,  160,  197,  239,  288,
         343,  405,  473,  546,  626,  709,  795,  883,  972, 1058, 1140,
        1217, 1286, 1345, 1393, 1428, 1450, 1457, 1450, 1428, 1393, 1345,
        1286, 1217, 1140, 1058,  972,  883,  795,  709,  626,  546,  473,
         405,  343,  288,  239,  197,  160,  129,  103,   81,   64,   49,
          38,   28,   21,   16,   11,    8,    6,    4,    3,    2,    1,
           1])]

In suppressWaveforms output is as follows (example for 2 signals in 1 channel)

[array([ 189 <- here is the index of the start of the signal,   41 <- length of the signal, -> values    1,    1,    1,    1,    2,    2,    3,    3,    4, \
           4,    5,    5,    6,    6,    6,    7,    7,    8,    8,    8, \
           8,    8,    8,    8,    7,    7,    6,    6,    6,    5,    5, \
           4,    4,    3,    3,    2,    2,    1,    1,    1,    1, -> new signal  712, \
          77,    1,    1,    2,    3,    4,    6,    8,   11,   16,   21, \
          28,   38,   49,   64,   81,  103,  129,  160,  197,  239,  288, \
         343,  405,  473,  546,  626,  709,  795,  883,  972, 1058, 1140, \
        1217, 1286, 1345, 1393, 1428, 1450, 1457, 1450, 1428, 1393, 1345, \
        1286, 1217, 1140, 1058,  972,  883,  795,  709,  626,  546,  473, \
         405,  343,  288,  239,  197,  160,  129,  103,   81,   64,   49, \
          38,   28,   21,   16,   11,    8,    6,    4,    3,    2,    1, \
           1])]

In [95]:
wfvms

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

## Homework
for $N_{\mathrm{wvfms}}=100$
* write a decoder for compressed waveforms and verify its operation with `np.array_equal()` as in the case of pixel detectors
* check how size of compressed data depends on number of signals present in waveforms for $N_{\mathrm{signals}} \in [0,1000]$ with step of 20. Plot size of the compressed and raw data as a function of number of signals on a single plot.