In [1]:
import csv
import pandas as pd

In [21]:
def decode_particle_id(data):
    """Decode particle_id into vertex id, generation, etc.
    """
    components = [
        ('vertex_id',    0xfff0000000000000, 13 * 4),
        ('primary_id',   0x000ffff000000000, 9 * 4),
        ('generation',   0x0000000fff000000, 6 * 4),
        ('secondary_id', 0x0000000000fff000, 3 * 4),
        ('process',      0x0000000000000fff, 0),
    ]
    pid = data['particle_id'].values.astype('u8')
    for name, mask, shift in components:
        data[name] = (pid & mask) >> shift
    return data

In [6]:
# get hits from second sublayer in volume 9
volume_9 = pd.read_csv("event000001000/volume_id_9/event000001000-volume_id_9.csv")
volume_9

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id
0,46117,-111.575,-12.45930,598.0,9,2,3
1,46118,-102.945,-18.67670,598.0,9,2,3
2,46119,-108.234,-10.83210,598.0,9,2,3
3,46120,-112.229,-7.95649,598.0,9,2,3
4,46121,-119.432,-13.30280,598.0,9,2,3
...,...,...,...,...,...,...,...
5511,63227,-175.764,4.27339,1498.0,9,14,108
5512,63228,-151.434,13.83350,1498.0,9,14,108
5513,63229,-106.587,-3.00565,1498.0,9,14,108
5514,63230,-160.443,14.75060,1498.0,9,14,108


In [23]:
# read all truth particle
truth_hits_wo_noise = pd.read_csv("event000001000/event000001000-truth-wo-noise.csv")
truth_hits_wo_noise = decode_particle_id(truth_hits_wo_noise)
truth_hits_wo_noise

Unnamed: 0,hit_id,particle_id,tx,ty,tz,tpx,tpy,tpz,weight,vertex_id,primary_id,generation,secondary_id,process
0,2,22525763437723648,-55.3385,0.630805,-1502.5,-0.570605,0.028390,-15.492200,0.000010,5,113,0,0,0
1,4,297237712845406208,-96.1229,-8.230360,-1502.5,-0.225235,-0.050968,-3.702320,0.000008,66,2,0,0,0
2,5,418835796137607168,-62.6594,-9.375040,-1502.5,-0.281806,-0.023487,-6.573180,0.000009,93,15,0,0,0
3,6,108087696726949888,-57.0856,-8.189710,-1502.5,-0.401129,-0.035276,-10.466900,0.000008,24,19,0,0,0
4,7,968286151951515648,-73.8608,-2.575860,-1502.5,-0.442662,-0.036969,-9.130100,0.000007,215,178,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103300,120935,445864815605317632,-763.3840,51.545000,2944.5,-1.640200,0.208933,6.293810,0.000018,99,123,0,0,0
103301,120936,900720956266250240,-804.6650,3.186900,2944.5,-0.315684,-0.094259,1.328200,0.000007,200,15,0,0,0
103302,120937,238705348779704320,-986.9890,41.731300,2952.5,-0.212508,0.095667,0.731932,0.000009,53,212,0,0,0
103303,120938,373800349619716096,-947.0050,18.765000,2952.5,-0.165762,-0.103945,0.800783,0.000007,83,23,0,0,0


In [27]:
particles_in_volumn_9 = truth_hits_wo_noise[truth_hits_wo_noise['hit_id'].isin(volume_9['hit_id'])]
particles_in_volumn_9

Unnamed: 0,hit_id,particle_id,tx,ty,tz,tpx,tpy,tpz,weight,vertex_id,primary_id,generation,secondary_id,process
43168,46117,333267334498091008,-111.572,-12.47600,598.0,-0.322595,-0.001786,1.71496,0.000009,74,14,0,0,0
43169,46118,860203196868460544,-102.946,-18.67000,598.0,-0.671653,0.030952,3.51559,0.000023,191,228,0,0,0
43170,46119,49542963155435520,-108.235,-10.82050,598.0,-0.518487,-0.019823,2.83545,0.000008,11,49,0,0,0
43171,46120,274723769157681152,-112.229,-7.97334,598.0,-0.324477,0.012420,1.71119,0.000009,61,61,0,0,0
43172,46121,157626261835874304,-119.427,-13.32250,598.0,-0.398539,-0.079798,1.98732,0.000008,35,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59505,63226,558457142751789056,-122.960,12.70980,1498.0,-0.192101,-0.017154,2.34164,0.000006,124,157,0,0,0
59506,63228,837676814955446272,-151.439,13.84820,1498.0,-2.412120,0.171231,24.02410,0.000015,186,106,0,0,0
59507,63229,797143799833821184,-106.570,-2.99934,1498.0,-0.348907,-0.045725,4.97316,0.000007,177,97,0,0,0
59508,63230,58547138753200128,-160.448,14.76500,1498.0,-0.230050,-0.024821,2.14526,0.000004,13,5,0,0,0


In [12]:
particle_id = list(set(particles_in_volumn_9['particle_id']))
len(particle_id)

1674

In [33]:
# merged_df = volume_9.merge(particles_in_volumn_9[['hit_id', 'vertex_id','primary_id','generation','secondary_id']], on='hit_id', how='left').dropna()
merged_df = volume_9.merge(particles_in_volumn_9[['hit_id', 'particle_id']], on='hit_id', how='left').dropna()
merged_df

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,particle_id
0,46117,-111.575,-12.45930,598.0,9,2,3,3.332673e+17
1,46118,-102.945,-18.67670,598.0,9,2,3,8.602032e+17
2,46119,-108.234,-10.83210,598.0,9,2,3,4.954296e+16
3,46120,-112.229,-7.95649,598.0,9,2,3,2.747238e+17
4,46121,-119.432,-13.30280,598.0,9,2,3,1.576263e+17
...,...,...,...,...,...,...,...,...
5510,63226,-122.971,12.71780,1498.0,9,14,108,5.584571e+17
5512,63228,-151.434,13.83350,1498.0,9,14,108,8.376768e+17
5513,63229,-106.587,-3.00565,1498.0,9,14,108,7.971438e+17
5514,63230,-160.443,14.75060,1498.0,9,14,108,5.854714e+16


In [61]:
def count_particle_layer(merged_df):
    layer_id_list =  list(set(merged_df['layer_id']))
    particle_by_layer = dict()
    for l in layer_id_list:
        d =  merged_df[merged_df['layer_id']==l]
        particle_by_layer[l] =  list(set(d['particle_id']))
        # print(set(dataset_3[dataset_3['particle_id'].isin(seclect)]
    # for k, v in particle_by_layer.items():
    #     print(k, len(v))
    return particle_by_layer

In [57]:
import random
particle_by_layer = count_particle_layer(merged_df)
keys = list(particle_by_layer.keys())
smallest =  list(particle_by_layer[14])
select_particle_id = set()

for i in smallest:
    do_select = 0
    for k in keys[:-1]:
        if i in particle_by_layer[k]:
            do_select+=1
    if do_select == len(keys[:-1]):
        select_particle_id.add(i)
selected_particle_id =  list(select_particle_id)
print(len(selected_particle_id))

2 888
4 907
6 868
8 758
10 683
12 594
14 516
131


In [58]:
N = 20
idx = set()
while len(idx) < N:
    idx.add(random.randint(0, len(selected_particle_id)-1))
len(idx)

rand_particles = []
for i in idx:
    rand_particles.append(selected_particle_id[i])
len(rand_particles)

20

In [59]:
for p in rand_particles:
    a = merged_df[merged_df['particle_id']==p]
    print(p)

1.0358732691498598e+17
6.665354249104261e+17
3.7380124297291366e+17
3.602895507376046e+17
1.035926870341714e+17
8.377011416502108e+17
9.682896566448292e+17
3.017470161893458e+17
4512601878822912.0
6.845492049446175e+17
3.017490777736479e+17
1.1710341719680614e+17
1.5313386348321178e+17
8.331668244167393e+17
5.629531832367186e+17
6.89053560486232e+17
4.1883668949080474e+17
6.935579160278467e+17
4.458878366300242e+17
1.531247237928059e+17


In [60]:
selected_hit_by_particles = merged_df[merged_df['particle_id'].isin(rand_particles)]
selected_hit_by_particles

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,particle_id
18,46135,-39.51710,-8.12456,598.0,9,2,4,4.512602e+15
23,46140,-45.92910,-6.47587,598.0,9,2,4,4.458878e+17
31,46148,-62.77970,-10.11000,598.0,9,2,4,6.665354e+17
115,46426,-31.77910,-43.48310,598.0,9,2,14,8.377011e+17
286,47045,14.46120,-27.85690,598.0,9,2,36,1.035873e+17
...,...,...,...,...,...,...,...,...
5319,62657,82.60390,109.69000,1498.0,9,14,70,3.602896e+17
5357,62796,9.42022,86.87560,1498.0,9,14,79,1.531339e+17
5366,62805,10.30730,116.41500,1498.0,9,14,80,3.017491e+17
5431,62977,-75.51540,117.18400,1498.0,9,14,92,4.188367e+17


In [63]:
selected_particle_by_layer = count_particle_layer(selected_hit_by_particles)
for k, v in selected_particle_by_layer.items():
    print(k, len(v))

2 20
4 20
6 20
8 20
10 20
12 20
14 20


In [66]:
for p in rand_particles:
    a = selected_hit_by_particles[selected_hit_by_particles['particle_id']==p]['hit_id']
    print(p, len(a))
    # print(p)

1.0358732691498598e+17 7
6.665354249104261e+17 7
3.7380124297291366e+17 7
3.602895507376046e+17 7
1.035926870341714e+17 7
8.377011416502108e+17 7
9.682896566448292e+17 7
3.017470161893458e+17 7
4512601878822912.0 7
6.845492049446175e+17 7
3.017490777736479e+17 7
1.1710341719680614e+17 7
1.5313386348321178e+17 7
8.331668244167393e+17 7
5.629531832367186e+17 7
6.89053560486232e+17 7
4.1883668949080474e+17 7
6.935579160278467e+17 7
4.458878366300242e+17 7
1.531247237928059e+17 7


In [68]:
selected_hit_by_particles.to_csv("event000001000/volume_id_9/hits-vol_9_20_track.csv",index=False)