In [1]:
import pandas as pd, numpy as np
import random


In [2]:
def r_calculation(dx, dy, dz):
    return np.sqrt(dx ** 2 + dy ** 2 + dz ** 2)

def energy_weighted_baryceter_coordinates(row1, row2):
        packed_coords = (row1[['x','y','z']].apply(lambda x: x * row1['edep']) +
               row2[['x','y','z']].apply(lambda x: x * row2['edep']))/(row1['edep'] + row2['edep'])
        packed_coords['edep'] = row1['edep'] + row2['edep']
        
        return packed_coords

def reset_interaction_index(df):
    df = df.reset_index()
    if 'interaction_num' in df.columns:
        df = df.drop('interaction_num', axis=1)
    df['interaction_num'] = df.groupby('event_num').cumcount()
    df.set_index(['event_num', 'interaction_num'], inplace=True)
    return df

def dr_calculation(df):
    df[['dx','dy','dz']] = df.loc[:,['x','y','z']].groupby(level=0).diff().fillna(0)
    df['dr'] = df.apply(lambda x: r_calculation(x.dx, x.dy, x.dz), axis=1)
    df = df.drop(['dx','dy','dz'], axis=1)
    return df

def one_level_packing(df):
    df = dr_calculation(df)
    #idp - indexes to pack
    idp = df.loc[(df.dr <= 5) & (df.dr != 0)].index.tolist()
    for indx in idp:
        packed_coords = energy_weighted_baryceter_coordinates(df.loc[indx], 
                                                              df.loc[tuple([indx[0], indx[1] - 1])])
        df.loc[indx, ['x', 'y', 'z', 'edep']] = packed_coords.values
        df = df.drop(tuple([indx[0], indx[1] - 1]), axis=0)
    df = reset_interaction_index(df)
    return df

In [3]:
file_name = 'scrapped_data_mul1_big'
data = pd.read_csv(file_name + '.csv')
data = reset_interaction_index(data)


In [4]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,index,crystal,edep,x,y,z,slice_sect,time
event_num,interaction_num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0,0,32,753.417,-158.913,-58.842,-166.293,0,3.791976e+07
1,1,1,32,114.153,-158.820,-58.965,-166.193,0,3.791976e+07
1,2,2,32,132.430,-159.144,-57.674,-166.140,0,3.791976e+07
2,0,3,27,291.935,-198.950,46.377,-164.869,21,6.244136e+07
2,1,4,27,48.468,-235.481,78.621,-173.948,42,6.244136e+07
...,...,...,...,...,...,...,...,...,...
58311,6,202402,11,122.433,-47.590,-119.157,-203.353,2,2.098420e+07
58312,0,202403,0,79.860,39.919,13.704,-239.467,3,9.411821e+07
58312,1,202404,0,520.375,45.456,10.574,-255.642,23,9.411821e+07
58312,2,202405,0,175.173,43.197,17.493,-260.374,23,9.411821e+07


In [6]:
data = dr_calculation(data)
while not data.loc[(data.dr <= 5) & (data.dr != 0)].empty:
    data = one_level_packing(data)
data = reset_interaction_index(data)
data = data.drop('dr', axis=1)

KeyboardInterrupt: 

In [None]:
data

In [None]:
new_data = data.groupby('event_num').sum('edep')
new_data

In [None]:
new_data['fully_absorbed'] = (new_data.edep == 1000)
absorption = new_data['fully_absorbed']

# Smearing

In [None]:
smeared_data = data.copy()
smeared_data[['x', 'y', 'z']] = smeared_data[['x', 'y', 'z']].apply(lambda x: x / 10)
# smeared_data['edep'] = smeared_data['edep'].apply(lambda x: x / 1000)
smeared_data

In [None]:
def position_sigma_distribution(energy):
    return (.27+.62*np.sqrt(0.1/energy))/2.35
def energy_sigma_distribution(energy):
    res_at_1333 =2.43;
    return np.sqrt(1 + energy*res_at_1333)/2.35
def gaussian(x, sigma):
    return round(random.gauss(x, sigma), 3)

In [None]:
smeared_data[['x', 'y', 'z']] = smeared_data[['x', 'y', 'z']].apply(
    lambda x_i: gaussian(x_i, position_sigma_distribution(smeared_data.edep)))
smeared_data['edep'] = smeared_data['edep'].apply(lambda x: gaussian(x, energy_sigma_distribution(x)))
smeared_data

In [None]:
# smeared_data.to_csv(file_name + '_PS.csv')

# Preparing data for Graph making

In [None]:
data_fp = smeared_data.copy()
data_fp = data_fp.drop(['index', 'crystal', 'slice_sect'], axis=1)
data_fp

In [None]:
data_fp['node_features'] = data_fp[['edep', 'x', 'y', 'z']].apply(lambda x: np.array([i for i in x]), axis=1)
data_fp.time[1][1]

In [None]:
node_features = data_fp.groupby('event_num').apply(lambda x: np.array([row for row in x.node_features]))
node_features 

In [None]:
prepared_data = node_features.to_frame().join(absorption.to_frame()).rename(columns={0:'node_features'})
prepared_data = prepared_data.loc[prepared_data.node_features.apply(lambda x: len(x) != 1)]

prepared_data

In [None]:
# def adjacency_matrix_creating(l):
#     matrix = []
#     for i in range(1, l):
#         row = [0 for _ in range(l)]
#         row[i] = 1
#         matrix.append(np.array(row))
#     matrix.append(np.array([0 for _ in range(l)]))
#     return np.array(matrix)
def shuffling_sequence_creating(x):
    res_list = list(range(len(x)))
    random.shuffle(res_list)
    return res_list

def nodes_shuffling(seq, node_features):
    return np.array([node_features[i] for i in seq])

def shuffled_adjacency_matrix(seq):
    l = len(seq)
    matrix = np.array([np.array([0 for _ in range(l)]) for _ in range(l)])
    for k in range(l - 1):
        i = seq.index(k)
        j = seq.index(k + 1)
        matrix[i][j] = 1
    return matrix

In [None]:
prepared_data['shuffling_sequence'] = prepared_data['node_features'].apply(
    lambda x: shuffling_sequence_creating(x))
prepared_data['adjacency_matrix'] = prepared_data['shuffling_sequence'].apply(
    lambda x: shuffled_adjacency_matrix(x))
prepared_data['node_features'] = prepared_data.apply(
    lambda x: nodes_shuffling(x.shuffling_sequence, x.node_features), axis=1)
prepared_data = prepared_data.drop('shuffling_sequence', axis=1)
prepared_data.to_json(file_name + '_graph_mode.json')

## Shuffling graphs's nodes

In [None]:
# def shuffle_sequence_creating(x):
#     res_list = list(range(len(x)))
#     random.shuffle(res_list)
#     return res_list

# def nodes_shuffling(seq, node_features):
#     return np.array([node_features[i] for i in seq])

# def shuffled_adjacency_matrix(seq):
#     l = len(seq)
#     matrix = np.array([np.array([0 for _ in range(l)]) for _ in range(l)])
#     for k in range(l - 1):
#         i = seq.index(k)
#         j = seq.index(k + 1)
#         matrix[i][j] = 1
#     return matrix

In [None]:
# prepared_data['shuffle_sequence'] = prepared_data['node_features'].apply(
#     lambda x: shuffle_sequence_creating(x))
# prepared_data['new_adj_mtrx'] = prepared_data['shuffle_sequence'].apply(lambda x: adj_matrx_shuffling(x))
# prepared_data
# prepared_data['new_n_ftrs'] = prepared_data[['node_features', 'shuffle_sequence']].apply(
#     lambda x: nodes_shuffling(x.shuffle_sequence, x.node_features), axis=1)
# prepared_data[['node_features', 'shuffle_sequence', 'new_n_ftrs']].loc[2]