# Predict Neutrino Direction with an LSTM

Using a Tensorflow LSTM layer using the event time steps to the input to predict the Neutrino Direction azimuth and zenith angle

## Imports

In [4]:
# Standard library imports
import os
import random
import math
import logging
from sys import getsizeof
import sys
sys.path.append('..')

# Third-party library imports
import numpy as np
import pandas as pd
import dask.dataframe as dd

# Typing imports
from typing import List, Tuple

from scripts.utils import seed_it_all, compose_event_df, reduce_mem_usage, convert_bytes_to_gmbkb


## Variables

In [5]:
# Parameters
IS_TRAINING = True # Whether to train the model
SEED=10

TIME_LIMIT_HOURS = 1
PULSE_AMOUNT = 100 # Amount of pulses to use for features
FEATURES = [ 'sensor_id', 'time', 'charge', 'auxiliary'] # Which features to use as the model input

# Directories
DATA_DIR = "../data"
SET = 'train' if IS_TRAINING else 'test'

# logging
LOG_LEVEL = logging.INFO

## Logging

In [6]:
# Setup logging
logging.basicConfig(filename='artifacts/info.log', level=LOG_LEVEL, format='%(asctime)s %(levelname)s %(message)s')

## Functions

In [7]:
seed_it_all(SEED)

### For optimization

## Load the dataframes

In [8]:
sensor_dtypes = { 'x': 'float16', 'y': 'float16', 'z': 'float16' }
sensor_geometry_df = pd.read_csv(f'{DATA_DIR}/sensor_geometry.csv', dtype=sensor_dtypes)
sensor_geometry_df.head(1)

Unnamed: 0,sensor_id,x,y,z
0,0,-256.25,-521.0,496.0


In [9]:
convert_bytes_to_gmbkb(getsizeof(sensor_geometry_df))

'70.69 KB'

In [10]:

meta_dtypes = {'batch_id': 'int16', 'event_id': 'Int64', 'first_pulse_index': 'int32', 'last_pulse_index': 'int32', 'azimuth': 'float16', 'zenith': 'float16'}
meta_df = pd.read_parquet(f'{DATA_DIR}/{SET}_meta.parquet').astype(meta_dtypes)
meta_df.head(1)

Unnamed: 0,batch_id,event_id,first_pulse_index,last_pulse_index,azimuth,zenith
0,1,24,0,60,5.03125,2.087891


In [12]:
convert_bytes_to_gmbkb(getsizeof(meta_df))

'2.83 GB'

In [14]:
batch_directory = f'{DATA_DIR}/{SET}'
batch_file_paths = [f'{batch_directory}/{file}' for file in os.listdir(batch_directory) if os.path.isfile(os.path.join(batch_directory, file))]
print('First batch file path 3 Samples:')
batch_file_paths[:3]


First batch file path 3 Samples:


['../data/train/batch_540.parquet',
 '../data/train/batch_115.parquet',
 '../data/train/batch_136.parquet']

In [15]:
sample_batch_df= pd.read_parquet(batch_file_paths[1])
convert_bytes_to_gmbkb(getsizeof(meta_df))

'2.83 GB'

## Build the dataset

In [None]:
# define a generator function
def data_generator(
    batch_paths:List[str],
    sensor_geometry_df:pd.DataFrame,
    meta_df: pd.DataFrame, 
    sequence_length:int
):
    """Emits a single event training example to be called by the model.fit_generator() method.

    Args:
        batch_paths (List[str]): A list of paths to the batch files
        sensor_geometry_df (pd.DataFrame): The sensor geometry dataframe
        meta_df (pd.DataFrame): The dataframe containing the meta data
        sequence_length (int): The length of the pulse sequence to use for training

    Yields:
        _type_: _description_
    """
    batch_dtypes = {'event_id': 'int32', 'sensor_id': 'int16', 'time': 'int32', 'charge': 'float16', 'auxiliary': 'bool'}
    
    while True:
                
        for batch_path in batch_paths:
            
            batch = pd.read_parquet(batch_path, dtype=batch_dtypes)
            
            for event_id in batch['event_id'].unique():
                
                # The event dataframe with a list of pulse readings
                event_data = batch[batch['event_id'] == event_id][FEATURES]
                
                # get the first N pulses with N being the sequence length
                sequence = event_data.head(sequence_length) 
                # get the target labels 
                target_labels = meta_df[meta_df['event_id'] == event_id][['azimuth', 'zenith']].values[0] 
                
                x_batch, y_batch = np.reshape(sequence, (1, sequence_length, len(FEATURES))), np.reshape(target_labels, (1, 2))
                
                yield x_batch, y_batch

In [None]:
# create a generator object
gen = data_generator(batch_file_paths, batch_size=32, sequence_length=PULSE_AMOUNT)

## Build the model

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [None]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(PULSE_AMOUNT, len(FEATURES))))
model.add(Dense(2, activation='linear')) # set the number of output neurons to 2 and the activation function to linear

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Train the model 
# with input sequence (num_samples, num_timesteps, num_features),
# and the azimuth and zenith angle values for each sequence.
model.fit(input_sequences, output_values, batch_size=32, epochs=100)

# Evaluate the model
loss = model.evaluate(test_sequences, test_values)

## For scoring

In [None]:
def angular_dist_score(az_true:float, zen_true:float, az_pred:float, zen_pred:float):
    '''
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse 
    cosine (arccos) thereof is then the angle between the two input vectors
    
    The lower the angle, the more similar the two vectors are meaning the score is better.
    
    Parameters:
    -----------
    
    az_true : float (or array thereof)
        true azimuth value(s) in radian
    zen_true : float (or array thereof)
        true zenith value(s) in radian
    az_pred : float (or array thereof)
        predicted azimuth value(s) in radian
    zen_pred : float (or array thereof)
        predicted zenith value(s) in radian
    
    Returns:
    --------
    
    dist : float
        mean over the angular distance(s) in radian
    '''
    
    if not (np.all(np.isfinite(az_true)) and
            np.all(np.isfinite(zen_true)) and
            np.all(np.isfinite(az_pred)) and
            np.all(np.isfinite(zen_pred))):
        raise ValueError("All arguments must be finite")
    
    # pre-compute all sine and cosine values
    sa1 = np.sin(az_true)
    ca1 = np.cos(az_true)
    sz1 = np.sin(zen_true)
    cz1 = np.cos(zen_true)
    
    sa2 = np.sin(az_pred)
    ca2 = np.cos(az_pred)
    sz2 = np.sin(zen_pred)
    cz2 = np.cos(zen_pred)
    
    # scalar product of the two Cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
    # scalar product of two unit vectors is always between -1 and 1, this is against numerical instability
    # that might otherwise occur from the finite precision of the sine and cosine functions
    scalar_prod =  np.clip(scalar_prod, -1, 1)
    
    # convert back to an angle (in radian)
    return np.average(np.abs(np.arccos(scalar_prod)))