In [1]:
import pandas as pd 
from utils import seed_it_all

In [2]:
DATA_DIR = "./data"
BATCH = 1
EVENT = 24
EXCLUDE_AUXILIARY= True

In [3]:
seed_it_all(10)

In [4]:
train_meta_df = pd.read_parquet(f'{DATA_DIR}/train_meta.parquet', engine='fastparquet')

In [5]:
train_batch_df = pd.read_parquet(f'{DATA_DIR}/train/batch_{BATCH}.parquet', engine='fastparquet').reset_index()

In [6]:
if EXCLUDE_AUXILIARY:
    train_batch_df = train_batch_df[~train_batch_df['auxiliary']]

In [7]:
sensor_geometry = pd.read_csv(f'{DATA_DIR}/sensor_geometry.csv', index_col=0)

In [8]:
event_df = train_batch_df[train_batch_df['event_id'] == EVENT]
event_df = pd.merge(
    left = event_df,
    right = sensor_geometry,
    how='inner',
    on='sensor_id'
)

In [9]:
import numpy as np

x, y, z = event_df['x'], event_df['y'], event_df['z']
x, y, z

(0     -9.68
 1     35.54
 2     35.54
 3     35.54
 4     35.54
 5     35.54
 6     35.54
 7     35.54
 8    -43.27
 9    -43.27
 10   -43.27
 11    35.54
 12    35.54
 Name: x, dtype: float64,
 0     -79.50
 1    -364.83
 2    -364.83
 3    -364.83
 4    -364.83
 5    -364.83
 6    -364.83
 7    -364.83
 8    -267.52
 9    -267.52
 10   -267.52
 11   -364.83
 12   -364.83
 Name: y, dtype: float64,
 0    -219.49
 1     191.34
 2     191.34
 3     208.36
 4     208.36
 5     208.36
 6     208.36
 7     276.44
 8     295.88
 9     278.85
 10    244.81
 11    157.30
 12    259.42
 Name: z, dtype: float64)

In [10]:
coords = np.array((x,y,z)).T

In [11]:
from typing import List, Tuple
from sklearn.decomposition import PCA

def get_direction(coords: np.ndarray) -> np.ndarray:
    """
    Get the direction vector from a list of coordinates.
    """
    pca = PCA(n_components=1)
    pca.fit(coords) 
    direction_vector = pca.components_#type: ignore
    return direction_vector

In [12]:
direction_vector = get_direction(coords)

In [13]:
def get_line(origin, vector, extent):
    below_origin = origin - direction_vector * extent
    above_origin = origin + direction_vector * extent
    line = np.vstack((below_origin, above_origin))
    return line

In [14]:
origin = np.mean(coords, axis=0)
euclidean_distance = np.linalg.norm(coords - origin, axis=1)
extent = np.max(euclidean_distance)
line = get_line(origin, direction_vector, extent)
line

array([[  35.75050904, -551.97437526,  610.96714325],
       [  -8.00127827,  -88.87639397, -224.91637402]])

In [15]:
mean = np.mean(line, axis=0)
mean

array([  13.87461538, -320.42538462,  193.02538462])

In [16]:
points = line - mean
np.sum(points, axis=0)

array([ 3.55271368e-15, -5.68434189e-14, -5.68434189e-14])

In [17]:
from utils import sphere_to_cartesian, adjust_sphere, cartesian_to_sphere

In [18]:

train_metadata=train_meta_df[(train_meta_df['event_id'] == EVENT) & (train_meta_df['batch_id'] == BATCH)]
azimuth = train_metadata['azimuth'].values[0]
zenith= train_metadata['zenith'].values[0]
x,y,z = sphere_to_cartesian(azimuth, zenith) #type: ignore
x,y,z

(0.2711606932818947, -0.8260880094077094, -0.4940146547742188)

In [19]:
truth_trace = get_line([0,0,0], [x,y,z], extent)
truth_trace

array([[  21.87589365, -231.54899065,  417.94175863],
       [ -21.87589365,  231.54899065, -417.94175863]])

In [20]:
truth_trace[:,0], truth_trace[:,1], truth_trace[:,2]

(array([ 21.87589365, -21.87589365]),
 array([-231.54899065,  231.54899065]),
 array([ 417.94175863, -417.94175863]))

## Plot the result

In [21]:
import plotly.graph_objects as go

fig3 = go.Figure(data = [
        go.Scatter3d(
            x=event_df['x'].to_numpy(), y=event_df['y'].to_numpy(), z=event_df['z'].to_numpy(),
            mode='markers',
            marker=dict(size=5, color=event_df['time'].to_numpy(), opacity=1),
            name="Detected"
        ),
        go.Scatter3d(
            x=line[:,0], y=line[:,1], z=line[:,2],
            marker=dict(
                size=4,
                color='red',
            ),
            line=dict(
                color='red',
                width=3
            ),
            name="Predicted"
        ),
        go.Scatter3d(
            x=truth_trace[:,0], y=truth_trace[:,1], z=truth_trace[:,2],
            marker=dict(
                size=4,
                color='green',
            ),
            line=dict(
                color='green',
                width=3
            ),
            name="Truth"
        )
    ],
    
    )
# Add a legend
fig3.update_layout(showlegend=True, legend=dict(x=0, y=1))

fig3.show()

In [22]:
from utils import angular_dist_score, adjust_sphere, cartesian_to_sphere

In [23]:

az_pred, zen_pred = adjust_sphere(*cartesian_to_sphere(x,y,z))

angular_dist_score(az_true=azimuth,zen_true=zenith, az_pred=az_pred, zen_pred=zen_pred)

r 1.0 x2y2 0.7559495208683096


0.0

# Make prediction on test set

In [24]:
test_meta_df = pd.read_parquet(f'{DATA_DIR}/test_meta.parquet', engine='fastparquet')
test_meta_df

Unnamed: 0,batch_id,event_id,first_pulse_index,last_pulse_index
0,661,2092,0,298
1,661,7344,299,334
2,661,9482,335,377


In [25]:
test_batch_df = pd.read_parquet(f'{DATA_DIR}/test/batch_661.parquet', engine='fastparquet').reset_index()
test_batch_df

Unnamed: 0,event_id,sensor_id,time,charge,auxiliary
0,2092,4066,6170,1.275,True
1,2092,3512,6374,0.975,True
2,2092,897,6378,1.475,True
3,2092,2060,6590,0.925,True
4,2092,3072,6625,1.075,True
...,...,...,...,...,...
373,9482,1133,13334,0.675,True
374,9482,2190,14112,1.075,True
375,9482,2057,14713,0.975,True
376,9482,4486,14765,1.425,True


In [26]:
test_events = test_meta_df['event_id'].unique().tolist()
test_events

[2092, 7344, 9482]

In [27]:
def get_event_df(batch_df: pd.DataFrame, sensor_geometry: pd.DataFrame, event_id: str) -> pd.DataFrame:
    """
    Get a DataFrame for a specific event.

    Parameters:
    train_batch_df (pandas.DataFrame): The batch DataFrame.
    sensor_geometry (pandas.DataFrame): The sensor geometry DataFrame.
    event_id (str): The event identifier.

    Returns:
    pandas.DataFrame: A DataFrame containing data for the specified event.
    """
    event_df = batch_df[batch_df['event_id'] == event_id]
    event_df = pd.merge(
        left=event_df,
        right=sensor_geometry,
        how='inner',
        on='sensor_id'
    )
    return event_df

In [28]:
from functools import reduce

test_dfs = [ get_event_df(test_batch_df, sensor_geometry,x) for x in test_events ]
len(test_dfs)

3

In [31]:
results = []

for test_df in test_dfs:
    x, y, z = test_df['x'], test_df['y'], test_df['z']
    coords = np.array((x,y,z)).T
    direction_vector = get_direction(coords)
    print('direction', direction_vector)
    origin = np.mean(coords, axis=0)
    euclidean_distance = np.linalg.norm(coords - origin, axis=1)
    extent = np.max(euclidean_distance)
    line = get_line(origin, direction_vector, extent)
    x, y, z= line[:,0], line[:,1], line[:,2]
    x = x.max() - x.min()
    y = y.max() - y.min()
    z = z.max() - z.min()
    az_pred, zen_pred = adjust_sphere(*cartesian_to_sphere(x,y,z))
    results.append([az_pred, zen_pred])

results
    

direction [[0.39323753 0.7327785  0.55533767]]
r 2356.7645945920267 x2y2 3841381.5215826244
direction [[ 0.45656735 -0.63621559  0.62191316]]
r 1407.2299786229635 x2y2 1214365.2067069958
direction [[0.37338752 0.6023134  0.70554966]]
r 1400.744642948432 x2y2 985358.7194898824


[[1.0782689122931226, 0.9820273837031137],
 [0.9483359318159352, 0.8996128838424357],
 [1.0158567198575796, 0.7875978411980064]]

In [49]:
submission = pd.DataFrame(columns=['event_id', 'azimuth', 'zenith'])
for i, event in enumerate(test_events):
    new_row = pd.DataFrame({ 'event_id': [event], 'azimuth': [results[i][0]],'zenith': [results[i][1]]})
    submission = pd.concat([ new_row, submission.loc[:]])
    
submission

Unnamed: 0,event_id,azimuth,zenith
0,9482,1.015857,0.787598
0,7344,0.948336,0.899613
0,2092,1.078269,0.982027


In [50]:
submission.to_csv('submission.csv', index=True)

!head submission.csv



Unnamed: 0,event_id,azimuth,zenith
0,9482,1.015857,0.787598
0,7344,0.948336,0.899613
0,2092,1.078269,0.982027
