In [252]:
#import libraries
from scipy import stats
import uproot
import matplotlib as mpl
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
from mpl_toolkits import mplot3d
from sklearn.cluster import AgglomerativeClustering
from sklearn.linear_model import LinearRegression


In [None]:

#Creating the noise file 
def open_noise(filename) :
    noise0=uproot.open(filename) #using uproot to open the root file 
    noise1=noise0['pixTree']
    noise=pd.DataFrame(noise1['fData'].array(library="np"))
    noise=noise[noise.duplicated(subset=['row','col'])==True]
    noise.drop_duplicates(subset=['row','col'],inplace=True)  #one consider a pixel tobe noisy xhen on a short run (<1minute), with no source it declares a hit more than one time.
    X_noise=np.array(noise.col+((noise.chipId-4)*1024)) #changing the coordinates 
    Y_noise=np.array(noise.row)
    return X_noise,Y_noise

#put in the Noise data 
Noise1=open_noise('../Noise1')
Noise2=open_noise('../Noise2')
Noise3=open_noise('../Noise3')
Noise4=open_noise('../Noise4')


In [None]:
#open the root file and getting the tree
def open_file (filename,id):
    file=uproot.open(filename)
    file1=file['pixTree']
    Data=file1['fData'].array(library="np")
    data=pd.DataFrame(Data)
    data['ldr']=id
    data.col=((data.chipId-4)*1024)+data.col
    return data

data=open_file("../Data1")
data2=open_file("../Data2")
data3=open_file("../Data3")
data4=open_file("../Data4")

data=pd.concat([data,data2])
data=pd.concat([data,data3])
data=pd.concat([data,data4])





fig, axes = plt.subplots(4)

display(data) #show the table with the different entries
axes[0].hist2d(data[data.ldr==1].col,data[data.ldr==1].row,bins=100,density=True) #plotting the data as a 2D histogram (at this point data from ladder 2 and 4 are flipped)
axes[1].hist2d(data[data.ldr==2].col,data[data.ldr==2].row,bins=100,density=True)
axes[2].hist2d(data[data.ldr==3].col,data[data.ldr==3].row,bins=100,density=True)
axes[3].hist2d(data[data.ldr==4].col,data[data.ldr==4].row,bins=100,density=True)
plt.show()

In [285]:
#remove noisy pixels 
def remove_noise (noise,data) :
        n_0=data[data.col.isin(noise[0]) & data.row.isin(noise[1])].index
        if len(n_0)>0:
            data.drop(index=n_0,inplace=True)
        return data 


data_filtered=remove_noise(Noise1,data[data.ldr==1])
data_filtered=pd.concat([data_filtered,remove_noise(Noise2,data[data.ldr==2])])
data_filtered=pd.concat([data_filtered,remove_noise(Noise2,data[data.ldr==3])])
data_filtered=pd.concat([data_filtered,remove_noise(Noise2,data[data.ldr==4])])


In [None]:

#Displaying the filtered data (Monitoring the experiment)
display(data_filtered)
fig, axes = plt.subplots(4)
axes[0].hist2d(data_filtered[(data_filtered.ldr==1)&(data_filtered.chipId==8)].col,data_filtered[(data_filtered.ldr==1)&(data_filtered.chipId==8)].row,bins=[600,300],norm='symlog')
axes[1].hist2d(data_filtered[(data_filtered.ldr==2)&(data_filtered.chipId==8)].col,data_filtered[(data_filtered.ldr==2)&(data_filtered.chipId==8)].row,bins=[600,300],norm='symlog')
axes[2].hist2d(data_filtered[(data_filtered.ldr==3)&(data_filtered.chipId==8)].col,data_filtered[(data_filtered.ldr==3)&(data_filtered.chipId==8)].row,bins=[600,300],norm='symlog')
axes[3].hist2d(data_filtered[(data_filtered.ldr==4)&(data_filtered.chipId==8)].col,data_filtered[(data_filtered.ldr==4)&(data_filtered.chipId==8)].row,bins=[600,300],norm='symlog')
plt.show()

In [None]:
#flipping the data from the even plane in y 
def flip_data_row (data) :
    data.row=512-data.row
    return data

data_filtered[(data_filtered.ldr==2)]=flip_data_row(data_filtered[(data_filtered.ldr==2)])
data_filtered[(data_filtered.ldr==4)]=flip_data_row(data_filtered[(data_filtered.ldr==4)])


In [None]:
#Alignement of the telescope 
def track (Pos_1,Pos_2,z) :
    x=((z-Pos_1[2])/(Pos_2[2]-Pos_1[2]))+Pos_1[0]
    y=((z-Pos_1[2])/(Pos_2[2]-Pos_1[2]))+Pos_1[1]
    return [x,y]

#for this first alignement, we just consider the mean in x and y of the hits and we built track 2 by 2, then moving the other plane to be on this track
def alignement_telescope (DATA,Z) :
    for i in range (max(DATA.ldr)) :
        for j in range (max(DATA.ldr)-1,i,-1) :
            for k in range (max(DATA.ldr)) :
                if k!=j and k!=i :
                    Moved=track([np.mean(DATA[DATA.ldr==i+1].col),np.mean(DATA[DATA.ldr==i+1].row),Z[i]],[np.mean(DATA[DATA.ldr==j+1].col),np.mean(DATA[DATA.ldr==j+1].row),Z[j]],Z[k])
                    DATA.loc[DATA.ldr==k+1,'row']=DATA[DATA.ldr==k+1].row+(Moved[1]-np.mean(DATA[DATA.ldr==k+1].row))
                    DATA.loc[DATA.ldr==k+1,'col']=DATA[DATA.ldr==k+1].col+(Moved[0]-np.mean(DATA[DATA.ldr==k+1].col))
    return DATA


Z=[0,10.6,145,155.6]#coordinates of the differents planes of BANCO
data_aligned=alignement_telescope(data_filtered[data_filtered.chipId==8],Z)
display(data_aligned)

In [None]:
#Plotting superposed (to see if planes are aligned)
fig, axes = plt.subplots()
plt.hist2d(data_aligned[data_aligned.ldr==1].col,data_aligned[data_aligned.ldr==1].row,bins=[600,300],norm='symlog',cmap='Greens')
plt.hist2d(data_aligned[data_aligned.ldr==2].col,data_aligned[data_aligned.ldr==2].row,bins=[600,300],norm='symlog',cmap='Blues',alpha=0.2)
plt.hist2d(data_aligned[data_aligned.ldr==3].col,data_aligned[data_aligned.ldr==3].row,bins=[600,300],norm='symlog',cmap='Reds',alpha=0.2)
plt.hist2d(data_aligned[data_aligned.ldr==4].col,data_aligned[data_aligned.ldr==4].row,bins=[600,300],norm='symlog',cmap='viridis',alpha=0.2,cmin=1)
plt.axis('scaled')
plt.show()

In [294]:
#Clustering with DBSCAN (Density based )

def Clustering (data) :
    n=0
    Cluster=[]
    L=[]
    m=max(data.trgNum)
    for i in range(m+1) :
        print('Doing clustering : ',(i*100)/m,' %',end='\r')
        if data[data.trgNum==i].empty ==False :
            if (i in L) ==False :
                L.append(i)
                data_trgNum= data[data.trgNum==i] 
                if len(data_trgNum)!=1 :
                    X=data_trgNum[['row','col']].to_numpy()
                    clustering = DBSCAN(eps=1,metric='euclidean',min_samples=1).fit(X)
                    labels = clustering.labels_ 
                    for i in labels :
                        Cluster.append(i+n)
                    n=max(labels)+1+n
                else :
                    Cluster.append(n)
                    n+=1
    return Cluster

Clusters=[]
for i in range (max(data_aligned.ldr)):
    Clusters.append(Clustering(data_aligned[data_aligned.ldr==i+1]))


In [295]:
#Setting the clusters to the right data

data_clusterized=data_aligned.copy()
data_clusterized['cluster']=0
for i in range(max(data_clusterized.ldr)):
    data_clusterized.loc[data_clusterized.ldr==i+1,'cluster']=np.array(Clusters[i])+max(data_clusterized.cluster)

In [None]:
#cluster size

def cluster_size (data) :
    Cluster_size=pd.DataFrame({'trgNum':[], 'ldr':[],'Cluster_size':[]})
    m=int(max(data.cluster))
    data= data.sort_values(by=['cluster'])
    for cluster in range(10000) :
        print('cluster_size_percentage : ',cluster/100,end='\r')
        Cluster_size=pd.concat([Cluster_size,pd.DataFrame({'trgNum':[data.loc[data.cluster==cluster,'trgNum'].values[0]],'ldr':[data.loc[data.cluster==cluster,'ldr'].values[0]],'Cluster_size':[len(data[data.cluster==cluster])]})])
    return(Cluster_size)

Cluster_size=cluster_size(data_clusterized)


In [None]:
#displaying the distribution of the cluster size
Cluster_size['Cluster_size'].hist(bins=20)
plt.title('Cluster size distribution')
plt.ylabel('Entries')
plt.xlabel('Cluster size')

In [None]:
#Cluster by trgNum 
Cluster_by_trgNum=[len(Cluster_size[Cluster_size.trgNum==t]) for t in range (1,int(max(Cluster_size.trgNum))+1)]
plt.hist(Cluster_by_trgNum,bins=10)
plt.title('Cluster by trigger Number')
plt.ylabel('Entries')
plt.xlable('Number of cluster by trigger number')

In [None]:
 #Efficiency (4 ladders hit for a trigger)
n=0
for t in range (10000):
    data_trg=data_reduced[data_reduced.trgNum==t]
    if len(data_trg[data_trg.ldr==1])>0 and len(data_trg[data_trg.ldr==2])>0 and len(data_trg[data_trg.ldr==3])>0 and len(data_trg[data_trg.ldr==4])>0 :
        n+=1
print('Efficiency : ',n/100)

In [296]:

#computing barycenter
def reduce_by_barycenter_new (data) :
    data_sorted =data.sort_values(by=['cluster'])
    data_reduced=pd.DataFrame({'trgNum':[],'ldr':[],'cluster':[],'Xbar':[],'Ybar':[]})
    m=max(data_sorted.cluster)
    X=[]
    Y=[]
    n=0
    for index, entry in data_sorted.iterrows() :
        print('Reduction : ',(n*100/m)," %",end='\r')
        if entry.cluster == n :
            X.append(entry.col)
            Y.append(entry.row)
        else :
            e=pd.DataFrame({'trgNum':[entry.trgNum],'ldr':[entry.ldr],'cluster':[entry.cluster],'Xbar':[np.mean(X)],'Ybar':[np.mean(Y)]})
            data_reduced=pd.concat([data_reduced,e])
            X=[]
            Y=[]
            X.append(entry.col)
            Y.append(entry.row)
            n+=1
    return data_reduced


data_reduced=reduce_by_barycenter_new (data_clusterized)



In [297]:
#fit the data 

def chi2 (xexp,xfit): #computing chi2
    chi2 =0 
    for i in range (len(xexp)):
        chi2+=((xexp[i]-xfit[i])**2)/xexp[i]
    return chi2

def tracking (X,Y,Z0,t) :
    slope_X, intercept_X, r_value, p_value, std_err= stats.linregress(Z0, X)
    slope_Y, intercept_Y, r_value, p_value, std_err= stats.linregress(Z0, Y)
    Mean_Residuals_X=np.sqrt(np.mean((slope_X*np.array(Z0)+intercept_X-np.array(X))**2))#quadratique
    Mean_Residuals_Y=np.sqrt(np.mean((slope_Y*np.array(Z0)+intercept_Y-np.array(Y))**2))#quadratique
    return pd.DataFrame({'trgNum':[t],'Mean_Residuals_X':[Mean_Residuals_X],'Mean_Residuals_Y':[Mean_Residuals_Y],'slope_X':[slope_X],'intercept_X':[intercept_X],'slope_Y':[slope_Y],'intercept_Y':[intercept_Y]}))


In [None]:

#Alignement from the tracking : In this more precise alignement technique we compute all the residuals from a track (3 points fitting, distance from the track of the 4th one)

def precise_alignement (data,Z0) :
    Distances_X=[[],[],[],[]]
    Distances_Y=[[],[],[],[]]
    m=max(data.ldr)
    for t in range(10000) :
        data_trgNum=data[data.trgNum==t]
        print ("precise_alignement_percentage : ",t/100, end='\r')
        if (len(data_trgNum)==m and data_trgNum.ldr.nunique() == data_trgNum.ldr.size):
            for k in range(m):
                X=[]
                Y=[]
                Z=[]
                for j in range (m):
                    if j!=k:
                        X.append(data_trgNum.loc[data_trgNum.ldr==j+1,'Xbar'].values[0])
                        Y.append(data_trgNum.loc[data_trgNum.ldr==j+1,'Ybar'].values[0])
                        Z.append(Z0[j])
                T=tracking (X,Y,Z,t)
                distance_x=T.at[0,'slope_X']*Z0[k]+T.at[0,'intercept_X']-data_trgNum.loc[data_trgNum.ldr==k+1,'Xbar'].values[0]
                distance_y=T.at[0,'slope_Y']*Z0[k]+T.at[0,'intercept_Y']-data_trgNum.loc[data_trgNum.ldr==k+1,'Ybar'].values[0]
                Distances_X[k].append(distance_x)
                Distances_Y[k].append(distance_y)
    return pd.DataFrame({'dx':Distances_X,'dy':Distances_Y})




In [None]:
#Z corresponds to the original coordinates
Z=[0,10.6,145,155.6]
Distances=precise_alignement(data_reduced,Z)
print('x',np.mean(Distances['dx'][0]),np.mean(Distances['dx'][1]),np.mean(Distances['dx'][2]),np.mean(Distances['dx'][3]),'Y',np.mean(Distances['dy'][0]),np.mean(Distances['dy'][1]),np.mean(Distances['dy'][2]),np.mean(Distances['dy'][3]))

In [None]:
#moving from the correction that we had previously
def move (data,X,Y):
    for i in range(max(data.ldr)) :
        data.loc[data.ldr==i+1,'Xbar']=data[data.ldr==i+1].Xbar+np.mean(X[i])
        data.loc[data.ldr==i+1,'Ybar']=data[data.ldr==i+1].Ybar+np.mean(Y[i])
    return data


data_aligned_1=data_reduced.copy()
data_aligned_1=move(data_reduced,Distances['dx'],Distances['dy'])



In [233]:
#computing all the tracks for a run 

def make_tracks (data,Z) :
    Tracks=pd.DataFrame({'trgNum':[],'Mean_Residuals_X':[],'Mean_Residuals_Y':[],'slope_X':[],'intercept_X':[],'slope_Y':[],'intercept_Y':[]})
    m=int(max(data.ldr))
    data= data.sort_values(by=['trgNum'])
    for t in range(10000) :
        print ("make_tracks_percentage : ",t/100, end='\r')
        data_trgNum=data[data.trgNum==t]
        if (len(data_trgNum)==m and data_trgNum.ldr.nunique() == data_trgNum.ldr.size):
            X=[data_trgNum.loc[data_trgNum.ldr==i+1,'Xbar'].values[0] for i in range (m)]
            Y=[data_trgNum.loc[data_trgNum.ldr==i+1,'Ybar'].values[0] for i in range (m)]
            T=tracking (X,Y,Z,t)
            Tracks=pd.concat([Tracks,T])
    return Tracks

In [None]:
Tracks_aligned_1=make_tracks(data_aligned_1,Z)

In [None]:
#event display (t: trigger number, absi: Z coordinate, ordo: X or Y coordinate)

def event_display(data,track,t,absi,ordo :str,label) :
    Absi=np.array(absi)
    plt.title("event display of " +ordo +" in function of Z")
    plt.scatter(Absi,data[data.trgNum==t][ordo+'bar'])
    plt.plot(Absi,track.loc[track.trgNum==t,'slope_'+ordo].values[0]*Absi+track.loc[track.trgNum==t,'intercept_'+ordo].values[0],label=label)
    

event_display(data_aligned_1,Tracks_aligned_1,2000,Z,'X','aligned')
plt.legend()
plt.show()


In [None]:
#mean event display (the average event)

def mean_event_display(data,track,Z,ordo:str,label) :
    Z=np.array(Z)
    plt.scatter(Z,[np.mean(data.loc[data['trgNum'].isin(np.array(track.trgNum))&(data.ldr==i+1),ordo+'bar'])for i in range(max(data.ldr))])
    plt.plot(Z,np.mean(track['slope_'+ordo])*Z+np.mean(track['intercept_'+ordo]),label=label)
    plt.title(ordo+'(Z) in average, points: real, line: reconstructed track',fontsize=16)