# Predict Neutrino Direction with an LSTM

Using a Tensorflow LSTM layer using the event time steps to the input to predict the Neutrino Direction azimuth and zenith angle

## Imports

In [None]:
# Standard library imports
import os
import random
import math
import logging
from sys import getsizeof
import sys
sys.path.append('..')

# Third-party library imports
import numpy as np
import pandas as pd
import dask.dataframe as dd

# Typing imports
from typing import List, Tuple

from scripts.utils import seed_it_all, compose_event_df, reduce_mem_usage, convert_bytes_to_gmbkb


## Variables

In [None]:
# Parameters
IS_TRAINING = True # Whether to train the model
SEED=10

TIME_LIMIT_HOURS = 1

PULSE_AMOUNT = 100 # Amount of pulses to use for features
FEATURES = [ 'time', 'charge', 'auxiliary', 'x', 'y', 'z'] # Which features to use as the model input

# Directories
DATA_DIR = "../data"
SET = 'train' if IS_TRAINING else 'test'

# logging
LOG_LEVEL = logging.INFO

## Logging

In [None]:
# Setup logging
logging.basicConfig(filename='artifacts/info.log', level=LOG_LEVEL, format='%(asctime)s %(levelname)s %(message)s')

## Functions

In [None]:
seed_it_all(SEED)

### For optimization

## Load the dataframes

In [None]:
sensor_dtypes = { 'x': 'float16', 'y': 'float16', 'z': 'float16' }
sensor_geometry_df = pd.read_csv(f'{DATA_DIR}/sensor_geometry.csv', dtype=sensor_dtypes)
sensor_geometry_df.head(1)

In [None]:
convert_bytes_to_gmbkb(getsizeof(sensor_geometry_df))

In [None]:

meta_dtypes = {'batch_id': 'int16', 'event_id': 'Int64', 'first_pulse_index': 'int32', 'last_pulse_index': 'int32', 'azimuth': 'float16', 'zenith': 'float16'}
meta_df = pd.read_parquet(f'{DATA_DIR}/{SET}_meta.parquet').astype(meta_dtypes)
meta_df.head(1)

In [None]:
convert_bytes_to_gmbkb(getsizeof(meta_df))

In [None]:
batch_directory = f'{DATA_DIR}/{SET}'
batch_file_paths = [f'{batch_directory}/{file}' for file in os.listdir(batch_directory) if os.path.isfile(os.path.join(batch_directory, file))]
print('First batch file path 3 Samples:')
batch_file_paths[:3]


In [None]:
sample_batch_df= pd.read_parquet(batch_file_paths[1])
convert_bytes_to_gmbkb(getsizeof(meta_df))

## Build the dataset

In [172]:
# define a generator function
def data_generator(
    batch_paths:List[str],
    sensor_geometry:pd.DataFrame,
    meta_data: pd.DataFrame, 
    sequence_length:int,
    batch_size:int=32
):
    """Emits a single event training example to be called by the model.fit_generator() method.

    Args:
        batch_paths (List[str]): A list of paths to the batch files
        sensor_geometry_df (pd.DataFrame): The sensor geometry dataframe
        meta_df (pd.DataFrame): The dataframe containing the meta data
        sequence_length (int): The length of the pulse sequence to use for training

    Yields:
        _type_: _description_
    """
    batch_dtypes = {'event_id': 'int32', 'sensor_id': 'int16', 'time': 'int32', 'charge': 'float16', 'auxiliary': 'int8'}
    
   
                
    for batch_path in batch_paths:
        
        batch = pd.read_parquet(batch_path).reset_index().astype(batch_dtypes)
        
        output_batch = None
        
        for event_id in batch['event_id'].unique():
            
            # The event dataframe with a list of pulse readings
            event_data = batch[batch['event_id'] == event_id]
            
            merged_df = pd.merge(event_data, sensor_geometry, on='sensor_id', how='left')
            
            # get the first N pulses with N being the sequence length
            sequence = merged_df.head(sequence_length)[FEATURES]
            n_missing = 100 - len(sequence)
            if n_missing > 0:
                df_missing = pd.DataFrame(0, index=np.arange(n_missing), columns=sequence.columns)
                sequence = pd.concat([sequence, df_missing])
            
            # get the target labels 
            target_labels = meta_data[meta_data['event_id'] == event_id][['azimuth', 'zenith']].values[0] 
            
            # reshape the sequence and target labels to be fed into the model
            x_batch, y_batch = np.reshape(sequence, (sequence_length, len(FEATURES))), np.reshape(target_labels, (1, 2))
            
            
            if output_batch is None:
                    output_batch = [[x_batch, y_batch]]
                    print('output_batch initializing', len(output_batch))  
            else:
                if len(output_batch) == batch_size:
                    output = np.array(output_batch)
                    output_batch = None
                    print('output_batch', len(output_batch))  
                    print('output', len(output_batch))  
                    yield output
                else:
                    output_batch.extend([x_batch, y_batch])
                    print('output_batch extending', len(output_batch),batch_size )  


In [173]:
# create a generator object
data_gen = data_generator(batch_file_paths, sensor_geometry_df, meta_df, sequence_length=PULSE_AMOUNT)

In [174]:
next(data_gen)

output_batch initializing 1
output_batch extending 3
output_batch extending 5
output_batch extending 7
output_batch extending 9
output_batch extending 11
output_batch extending 13
output_batch extending 15
output_batch extending 17
output_batch extending 19
output_batch extending 21
output_batch extending 23
output_batch extending 25
output_batch extending 27
output_batch extending 29
output_batch extending 31
output_batch extending 33
output_batch extending 35
output_batch extending 37
output_batch extending 39
output_batch extending 41
output_batch extending 43
output_batch extending 45
output_batch extending 47
output_batch extending 49
output_batch extending 51
output_batch extending 53
output_batch extending 55
output_batch extending 57
output_batch extending 59
output_batch extending 61
output_batch extending 63
output_batch extending 65
output_batch extending 67
output_batch extending 69
output_batch extending 71
output_batch extending 73
output_batch extending 75
output_batch e

## Build the model

In [126]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [127]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(PULSE_AMOUNT, len(FEATURES))))
model.add(Dense(2, activation='linear')) # set the number of output neurons to 2 and the activation function to linear

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])


In [128]:

# Train the model 
# with input sequence (num_samples, num_timesteps, num_features),
# and the azimuth and zenith angle values for each sequence.
model.fit(data_gen, steps_per_epoch=50, epochs=10)


Epoch 1/10


ValueError: in user code:

    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/aj/anaconda3/envs/KAG_IC_NEU/lib/python3.8/site-packages/keras/engine/input_spec.py", line 232, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_3' (type Sequential).
    
    Input 0 of layer "lstm_3" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, None)
    
    Call arguments received by layer 'sequential_3' (type Sequential):
      • inputs=tf.Tensor(shape=(None, None), dtype=float64)
      • training=True
      • mask=None


In [None]:
# Evaluate the model
loss = model.evaluate(test_sequences, test_values)

## For scoring

In [None]:
def angular_dist_score(az_true:float, zen_true:float, az_pred:float, zen_pred:float):
    '''
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse 
    cosine (arccos) thereof is then the angle between the two input vectors
    
    The lower the angle, the more similar the two vectors are meaning the score is better.
    
    Parameters:
    -----------
    
    az_true : float (or array thereof)
        true azimuth value(s) in radian
    zen_true : float (or array thereof)
        true zenith value(s) in radian
    az_pred : float (or array thereof)
        predicted azimuth value(s) in radian
    zen_pred : float (or array thereof)
        predicted zenith value(s) in radian
    
    Returns:
    --------
    
    dist : float
        mean over the angular distance(s) in radian
    '''
    
    if not (np.all(np.isfinite(az_true)) and
            np.all(np.isfinite(zen_true)) and
            np.all(np.isfinite(az_pred)) and
            np.all(np.isfinite(zen_pred))):
        raise ValueError("All arguments must be finite")
    
    # pre-compute all sine and cosine values
    sa1 = np.sin(az_true)
    ca1 = np.cos(az_true)
    sz1 = np.sin(zen_true)
    cz1 = np.cos(zen_true)
    
    sa2 = np.sin(az_pred)
    ca2 = np.cos(az_pred)
    sz2 = np.sin(zen_pred)
    cz2 = np.cos(zen_pred)
    
    # scalar product of the two Cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
    # scalar product of two unit vectors is always between -1 and 1, this is against numerical instability
    # that might otherwise occur from the finite precision of the sine and cosine functions
    scalar_prod =  np.clip(scalar_prod, -1, 1)
    
    # convert back to an angle (in radian)
    return np.average(np.abs(np.arccos(scalar_prod)))