In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import time
import os

In [2]:
#Path of the file with the data of particles
os.chdir('/home/user/Documents/BUAP/Estancia/CosmicRayReco/MCAnalysis/ClusterAnalysis/Data')

In [3]:
#File with summary of Showers
Showers = pd.read_csv('DAT000006_showers.txt',delimiter='\t')

In [4]:
#File with information of all particles at observation level
Particles = pd.read_csv('DAT000006_particles.txt',delimiter='\t')

In [5]:
#Change the units of first interaction point from cm to km
Showers['Zfirst'] = Showers['Zfirst'].mul(1e-5)

In [6]:
#change units of positions from cm to m
Particles[['x','y']] = Particles[['x','y']].mul(0.01)

In [7]:
#Statistics of the data frame with summary of showers 
Showers.describe()

In [8]:
#Definition of a box which contain all the particles of the simulation
#size of the detector on mm (¡¡¡For the moment work with a detector of 1m^2 !!!)

det_s_x = 1 # detector size in x axis (m)
det_s_y = 1 # detector size in y axis (m)


In [9]:
#Add a new column which will contain the number of cluster for each particle
Particles["DetPosx"] = np.ceil(Particles["x"])
Particles["DetPosy"] = np.ceil(Particles["y"]) 
Particles["CoorDetx"] = (Particles["x"]) % det_s_x
Particles["CoorDety"] = (Particles["y"]) % det_s_y
Particles['Ncluster'] = 0

In [10]:
#Statistics of the data frame of all particles at observation level
Particles.describe()

In [11]:
#Define a function to find the clusters of particles
def Clusters(ith_clust, Sh_n):
    
    # Transform the columns of the particle data frame with the position of the detector into a list
    x_pos = Sh_n['DetPosx'].tolist()
    y_pos = Sh_n['DetPosy'].tolist()
    
    # Create an empty set
    unique_pairs = set()

    # Iterate over the pairs of x and y using zip
    for xi, yi in zip(x_pos, y_pos):
        pair = (xi, yi)

        # Check if the pair is already in the set
        if pair not in unique_pairs:
            # If not, add it to the set
            unique_pairs.add(pair)

            # Extract the particles index by comparing it with the x, y position in the data frame
            p_index_clust = Sh_n.index[ (Sh_n['DetPosx'] == xi) & (Sh_n['DetPosy'] == yi) ].tolist()
            
            # Replacte the cluster position of the particle for the cluster id
            for j in range(len(p_index_clust)):
                Sh_n.at[p_index_clust[j],'Ncluster'] = ith_clust

            ith_clust = ith_clust+1 # Next Cluster
    
    return Sh_n, ith_clust 
            

In [12]:
%%time
ith_clust = 1 # First cluster

# Run over all the showers
for ith_shower in range(1, Showers["Shower"].count()+1):
    Sh_n, ith_clust = Clusters(ith_clust, Particles[Particles["sh"]==ith_shower]) 
    Particles.loc[Particles["sh"]==ith_shower, :] = Sh_n

In [13]:
# Check if there are particles that are not counted
Particles[Particles["Ncluster"]==0]

In [14]:
# Number of the cluster with the maximum number of particles
df_sn = Particles[Particles["Ncluster"]!=0]
print(df_sn["Ncluster"].value_counts().idxmax())

In [15]:
# Summary of the cluster with the maximum number of particles
Particles[Particles["Ncluster"]==24353].describeribe()

In [None]:
Particles[Particles["Ncluster"]==24353]