In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
import os

In [2]:
#Path of the file with the data of particles
path = r'/home/user/Documents/BUAP/Estancia/CosmicRayReco/MCAnalysis/ClusterAnalysis/Data'
os.chdir(path)

In [3]:
#File with summary of Showers
Showers = pd.read_csv(r'DAT000006_1k_showers.txt',delimiter='\t')

In [4]:
#File with information of all particles at observation level
Particles = pd.read_csv('DAT000006_1k_particles.txt',delimiter='\t')

In [5]:
Particles

Unnamed: 0,NShow,PId,X,Y,T,Px,Py,Pz,PSq,Ene,ZhA,AzA
0,1,1,202.8570,-52.80860,70347.8,-0.014051,0.000221,0.052103,0.002912,0.053965,15.0941,179.100
1,1,1,43.9967,-18.30410,70512.6,-0.182562,0.015446,0.487750,0.271468,0.521026,20.5877,175.164
2,1,1,26.9723,-5.68421,70534.3,-0.126190,0.013492,0.323988,0.121074,0.347957,21.3907,173.897
3,1,1,33.2305,3.98663,70528.2,-0.020200,0.002715,0.052752,0.003198,0.056553,21.1248,172.344
4,1,1,79.4011,13.43910,70477.4,-0.018291,0.003078,0.052332,0.003083,0.055522,19.5160,170.449
...,...,...,...,...,...,...,...,...,...,...,...,...
76243,1000,5,60.9239,15.21820,66522.0,-33.247300,-11.449000,106.593000,12598.600000,112.138000,18.2568,-160.998
76244,1000,2,-356.6540,360.43600,66859.6,-0.014904,-0.009428,0.052701,0.003088,0.055065,18.5018,-147.681
76245,1000,5,484.7020,354.32500,66030.5,-1.106890,-0.376404,3.540170,13.899700,3.624060,18.2758,-161.219
76246,1000,5,60.6589,-90.33680,66559.4,-9.388220,-3.536470,30.083000,1005.630000,31.606300,18.4427,-159.359


In [6]:
# Run summary
Energies = {'E1': 9, 'E2': 11, 'E3': 14, 'E4': 18, 'E5': 55, 'E6':70}
Run = {'SCod': 'Corsika-77500','Mass':[1],'Lab': ['BUAP'], 'NShows':[1000], 'EInf': Energies['E1'] , 'ESup': Energies['E2'], 'AAInf':[0], 'AASup':[25]} 
Run = pd.DataFrame(Run)

In [7]:
Run

Unnamed: 0,SCod,Mass,Lab,NShows,EInf,ESup,AAInf,AASup
0,Corsika-77500,1,BUAP,1000,9,11,0,25


In [8]:
Showers['Mass'] = 1

In [9]:
#Statistics of the data frame with summary of showers 
Showers

Unnamed: 0,NShow,Energy,ZFInt,ZhAng,AzAng,NParticles,Mass
0,1,1000,21.8589,20.93290,175.38100,240,1
1,2,1000,21.6378,19.03420,-9.29939,51,1
2,3,1000,22.9038,16.56260,164.26400,246,1
3,4,1000,13.4947,20.75230,155.63300,32,1
4,5,1000,19.9795,8.44973,48.14700,49,1
...,...,...,...,...,...,...,...
995,996,1000,34.1608,11.05530,-12.91870,19,1
996,997,1000,59.5770,22.62960,27.18390,9,1
997,998,1000,11.4008,12.71430,174.38900,62,1
998,999,1000,17.5343,25.34680,-81.59460,44,1


In [10]:
# Define local particles codes for labeling clusters
# Particle Corsika PID       P. Code
# Gamma        1              1
# Electrons    2,3            1000
# Muons        5,6            100000
# Pions        8,9            50000000
# Protons      14             10000000
# Neutrons     13             100000000
nop   = -999.
gam   = 1.
ele   = 1000.
mu    = 100000.
pi    = 50000000.
prn   = 10000000.
ntn   = 100000000. 

pmap   = {1.:gam,2.:ele,3.:ele,5.:mu,6.:mu,8.:pi, 9.:pi, 13.:ntn, 14.:prn}
#pmap   = {'1.':'gam','2.':'ele','3.':'ele','5.':'mu','6.':'mu','8.':'pi','9.':'pi', '13.':'ntn', '14.':'prn'}
#pid    = [1., 2., 3., 5., 6., 8., 9., 13., 14.]
#zipped = list(zip(pid,pcode))
#pcode  = pd.DataFrame(zipped, columns=['pid', 'pcode'])
Particles['R'] = np.sqrt(Particles['X']**2 + Particles['Y']**2)
Particles['PCode'] = Particles['PId'].map(pmap)

In [11]:
Particles.describe()

Unnamed: 0,NShow,PId,X,Y,T,Px,Py,Pz,PSq,Ene,ZhA,AzA,R,PCode
count,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76248.0,76224.0
mean,499.485547,3.015909,1.769379,-3.768603,55184.374544,-0.00521,0.035557,3.77734,208.82791,3.926843,17.430514,-2.490288,392.63783,1419710.0
std,292.657247,2.525622,500.445615,513.134389,27208.464019,3.191379,3.097355,13.22053,2812.528593,13.874389,8.040552,105.528784,599.669543,11221770.0
min,1.0,1.0,-8382.91,-8837.78,11480.5,-153.155,-99.7332,0.012677,0.0025,0.05,0.12586,-179.998,0.346019,1.0
25%,240.0,1.0,-87.97805,-93.24475,35643.15,-0.04243,-0.034349,0.078934,0.006972,0.083394,11.94715,-99.125575,62.395043,1.0
50%,501.0,2.0,-0.928835,0.521124,51505.65,-0.002687,-0.000503,0.199205,0.044744,0.205952,17.35675,-1.895215,169.166936,1000.0
75%,753.0,5.0,91.452,89.1563,67909.7,0.032807,0.032736,1.840413,3.786715,1.81227,22.263925,90.732275,468.668955,100000.0
max,1000.0,25.0,8053.78,10224.3,234742.0,139.129,150.12,562.6,318089.0,563.888,86.0827,179.999,10848.110591,100000000.0


In [12]:
# Observatory layout
det_s_x = 1.0 # detector size in x axis (m)
det_s_y = 1.0 # detector size in y axis (m)

In [13]:
#The values in the 'DetMdx' and 'DetMdy' columns will be replaced by the rounded values 
#of the positions of the particles, this will define the position of the cell
#
Particles['DetX'] = np.floor(Particles['X']/det_s_x) + det_s_x/2
Particles['DetY'] = np.floor(Particles['Y']/det_s_x) + det_s_y/2
#
Particles['PartX'] = 1000 * (Particles['X'] - Particles['DetX']) # mm
Particles['PartY'] = 1000 * (Particles['Y'] - Particles['DetY']) # mm
#Add a new column which will contain the number of cluster 
Particles['NClst'] = 0

In [14]:
Particles.head()

Unnamed: 0,NShow,PId,X,Y,T,Px,Py,Pz,PSq,Ene,ZhA,AzA,R,PCode,DetX,DetY,PartX,PartY,NClst
0,1,1,202.857,-52.8086,70347.8,-0.014051,0.000221,0.052103,0.002912,0.053965,15.0941,179.1,209.618011,1.0,202.5,-52.5,357.0,-308.6,0
1,1,1,43.9967,-18.3041,70512.6,-0.182562,0.015446,0.48775,0.271468,0.521026,20.5877,175.164,47.652384,1.0,43.5,-18.5,496.7,195.9,0
2,1,1,26.9723,-5.68421,70534.3,-0.12619,0.013492,0.323988,0.121074,0.347957,21.3907,173.897,27.564746,1.0,26.5,-5.5,472.3,-184.21,0
3,1,1,33.2305,3.98663,70528.2,-0.0202,0.002715,0.052752,0.003198,0.056553,21.1248,172.344,33.468782,1.0,33.5,3.5,-269.5,486.63,0
4,1,1,79.4011,13.4391,70477.4,-0.018291,0.003078,0.052332,0.003083,0.055522,19.516,170.449,80.530392,1.0,79.5,13.5,-98.9,-60.9,0


In [15]:
def Clusters(iclust, PartS, ClustDF):
    # Find clusters of particles in a shower
    # iclust: number of cluster
    # Shower: number of shower
    
    XPos = PartS['DetX'].tolist()
    YPos = PartS['DetY'].tolist()
    
    unique_pairs = set() # Create an empty set

    # Iterate over the pairs of x and y using zip
    for xi, yi in zip(XPos, YPos):
        pair = (xi, yi)
        
        #VarClust = []

        # Check if the pair is already in the set
        if pair not in unique_pairs:
            unique_pairs.add(pair) # If not, add it to the set

            # Extract the particles index by comparing it with the x, y position in the data frame
            p_index_clust = PartS.index[ (PartS['DetX'] == xi) & (PartS['DetY'] == yi) ].tolist()
            
            print(p_index_clust)
            
            # Replacte the cluster position of the particle for the cluster id
            for j in range(len(p_index_clust)):
                PartS.at[p_index_clust[j],'NClst'] = iclust
            
            # For one particle clusters
            if(len(p_index_clust) == 1):
                
                df_clust = PartS[PartS['NClst']==iclust]
                
                ClsIdC = df_clust["PCode"].tolist()[0]
                NpartC = df_clust.shape[0]
                NGamC = df_clust[df_clust['PCode']==1.].shape[0]
                NEleC = df_clust[df_clust['PCode']==1000.].shape[0]
                NMuCl = df_clust[df_clust['PCode']==100000.].shape[0]
                XmClst = df_clust["X"].tolist()[0]
                YmClst = df_clust["Y"].tolist()[0]
                RmClst = df_clust["R"].tolist()[0]
                SigRCl = 0
                TmClst = df_clust["T"].tolist()[0]
                dTClst = df_clust["T"].max()-df_clust["T"].min()
                sTClst = 0
                FstPID = ClsIdC
                FstPCX = df_clust["Px"].tolist()[0]
                FstPCY = df_clust["Py"].tolist()[0]
                FstPCT = df_clust["T"].tolist()[0]
                FstPZh = df_clust["ZhA"].tolist()[0]
                FstPAz = df_clust["AzA"].tolist()[0]
                FstPPm = np.sqrt(FstPCX**2+FstPCY**2+df_clust["Pz"].tolist()[0]**2)
                LstPID = FstPID
                LstPCX = FstPCX
                LstPCY = FstPCY
                LstPCT = FstPCT
                LstPZh = FstPZh
                LstPAz = FstPAz
                LstPPm = FstPPm
           
            # For clusters with more than 1 particle
            if(len(p_index_clust) > 1):
                # Calculate variables and fill the Cluster dataframe
                df_clust = PartS[PartS['NClst']==iclust]
                FirstPart = df_clust[df_clust['T'] == df_clust["T"].min()]
                LastPart = df_clust[df_clust['T'] == df_clust["T"].max()]
                
                ClustStats_mean = df_clust[["X","Y","R","T"]].mean()
                ClustStats_std  = df_clust[["X","Y","R", "T"]].std()
                
                ClsIdC = df_clust["PCode"].sum()
                NpartC = df_clust.shape[0]
                NGamC = df_clust[df_clust['PCode']==1.].shape[0]
                NEleC = df_clust[df_clust['PCode']==1000.].shape[0]
                NMuCl = df_clust[df_clust['PCode']==100000.].shape[0]
                XmClst = ClustStats_mean["X"]
                YmClst = ClustStats_mean["Y"]
                RmClst = ClustStats_mean["R"]
                SigRCl = ClustStats_std["R"]
                TmClst = ClustStats_mean["T"]
                dTClst = df_clust["T"].max()-df_clust["T"].min()
                sTClst = ClustStats_std["T"]
                FstPID = FirstPart["PCode"].tolist()[0]
                FstPCX = FirstPart["Px"].tolist()[0]
                FstPCY = FirstPart["Py"].tolist()[0]
                FstPCT = FirstPart["T"].tolist()[0]
                FstPZh = FirstPart["ZhA"].tolist()[0]
                FstPAz = FirstPart["AzA"].tolist()[0]
                FstPPm = np.sqrt(FirstPart["Px"].tolist()[0]**2+FirstPart["Py"].tolist()[0]**2+FirstPart["Pz"].tolist()[0]**2)
                LstPID = LastPart["PCode"].tolist()[0]
                LstPCX = LastPart["Px"].tolist()[0]
                LstPCY = LastPart["Py"].tolist()[0]
                LstPCT = LastPart["T"].tolist()[0]
                LstPZh = LastPart["ZhA"].tolist()[0]
                LstPAz = LastPart["AzA"].tolist()[0]
                LstPPm = np.sqrt(LastPart["Px"].tolist()[0]**2+LastPart["Py"].tolist()[0]**2+LastPart["Pz"].tolist()[0]**2)
                

            VarClust = [iclust, ClsIdC, NpartC, NGamC, NEleC, NMuCl,
                           XmClst, YmClst, RmClst, SigRCl,
                           TmClst, dTClst, sTClst,
                           FstPID, FstPCX, FstPCY, FstPCT, FstPZh, FstPAz, FstPPm,
                           LstPID, LstPCX, LstPCY, LstPCT, LstPZh, LstPAz, LstPPm]

            ClustDF.loc[len(ClustDF.index)] = VarClust # Fill new row

            
            iclust = iclust+1 # Next Cluster
    
    return iclust, PartS, ClustDF

In [None]:
%%time
# Define the dataframe for cluster info.
ClusterDF = {"NClst": [],'ClsIdC': [], 'NpartC': [], "NGamC": [], "NEleC": [],"NMuCl": [],
            "XmClst": [], "YmClst": [], "RmClst": [], "SigRCl": [],
            "TmClst": [], "dTClst": [], "sTClst": [],
            "FstPID": [], "FstPCX": [], "FstPCY": [], "FstPCT": [], "FstPZh": [], "FstPAz": [], "FstPPm": [],
            "LstPID": [], "LstPCX": [], "LstPCY": [], "LstPCT": [], "LstPZh": [], "LstPAz": [], "LstPPm": []
            } 

ClusterDF = pd.DataFrame(ClusterDF)
iclust = 1 # Cluster counter

# Run over all the showers
nsh = Showers['NShow'].tolist()
for ishow in nsh:
    Shower = Particles[Particles['NShow']==ishow]   #current shower
    iclust, Nshow, ClusterDF = Clusters(iclust, Shower, ClusterDF) 
    Particles.loc[Particles['NShow']==ishow, :] = Nshow

In [None]:
# Check if there are particles that are not counted
Particles[Particles["NClst"]==0]

In [None]:
ClusterDF

In [None]:
# Number of the cluster with the maximum number of particles
df_sn = Particles[Particles["NClst"]!=0]
Max_ClustId = df_sn["NClst"].value_counts().idxmax()
print(Max_ClustId)

In [None]:
# Summary of the cluster with the maximum number of particles
Particles[Particles["NClst"]==Max_ClustId]

In [None]:
# Summary of the cluster with the maximum number of particles
ClusterDF[ClusterDF["NClst"]==Max_ClustId]

In [None]:
#Plot the cell with the maximum number of particles
cx = Particles.loc[Particles["NClst"]==Max_ClustId,'DetX'].iloc[0]
cy = Particles.loc[Particles["NClst"]==Max_ClustId,'DetY'].iloc[0]
Mx = Particles.loc[Particles["NClst"]==Max_ClustId,'PartX']
My = Particles.loc[Particles["NClst"]==Max_ClustId,'PartY']
plt.scatter(Mx,My)
plt.title('Cell: X=%i, Y=%i'%(cx,cy))
plt.xlabel('X (mm)')
plt.ylabel('Y (mm)')
#Cell for detector of 1 m^2 
plt.axis([-500,500,-500,500])
#Cell for detector of 0.30 x 0.30 m^2
#plt.axis([-150,150,-150,150])
plt.grid(True)