In [1]:
import gc
import math
import pathlib

import numpy as np
import pandas as pd
import polars as pl
import pyarrow.parquet as pq

import seaborn as sns

In [2]:
def convert_to_3d(df: pd.DataFrame) -> pd.DataFrame:
    """Converts zenith and azimuth to 3D direction vectors"""
    df['x'] = np.cos(df['azimuth']) * np.sin(df['zenith'])
    df['y'] = np.sin(df['azimuth'])*np.sin(df['zenith'])
    df['z'] = np.cos(df['zenith'])
    return df

def xyz2azzen(x, y, z):
    # https://www.kaggle.com/code/rasmusrse/graphnet-baseline-submission
    r = np.sqrt(x**2 + y**2 + z**2)
    zenith = np.arccos(z/r)
    azimuth = np.arctan2(y, x) #np.sign(results['true_y'])*np.arccos((results['true_x'])/(np.sqrt(results['true_x']**2 + results['true_y']**2)))
    azimuth[azimuth < 0] = azimuth[azimuth < 0] + 2*np.pi
    return azimuth, zenith

def angular_dist_score(az_true, zen_true, az_pred, zen_pred):
    '''
    calculate the MAE of the angular distance between two directions.
    The two vectors are first converted to cartesian unit vectors,
    and then their scalar product is computed, which is equal to
    the cosine of the angle between the two vectors. The inverse 
    cosine (arccos) thereof is then the angle between the two input vectors
    
    Parameters:
    -----------
    
    az_true : float (or array thereof)
        true azimuth value(s) in radian
    zen_true : float (or array thereof)
        true zenith value(s) in radian
    az_pred : float (or array thereof)
        predicted azimuth value(s) in radian
    zen_pred : float (or array thereof)
        predicted zenith value(s) in radian
    
    Returns:
    --------
    
    dist : float
        mean over the angular distance(s) in radian
    '''

    if not (np.all(np.isfinite(az_true)) and
        np.all(np.isfinite(zen_true)) and
        np.all(np.isfinite(az_pred)) and
        np.all(np.isfinite(zen_pred))):
        raise ValueError("All arguments must be finite")
    
    # pre-compute all sine and cosine values
    sa1 = np.sin(az_true)
    ca1 = np.cos(az_true)
    sz1 = np.sin(zen_true)
    cz1 = np.cos(zen_true)
    
    sa2 = np.sin(az_pred)
    ca2 = np.cos(az_pred)
    sz2 = np.sin(zen_pred)
    cz2 = np.cos(zen_pred)
    
    # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
    scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
    # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
    # that might otherwise occure from the finite precision of the sine and cosine functions
    scalar_prod =  np.clip(scalar_prod, -1, 1)
    
    # convert back to an angle (in radian)
    return np.average(np.abs(np.arccos(scalar_prod)))

In [3]:
nbs = [
    "/kaggle/input/icecube-notebooks/graphnet-baseline-submission.ipynb",
    "/kaggle/input/icecube-notebooks/tensorflow-lstm-model-inference.ipynb",
    "/kaggle/input/icecube-notebooks/icecube-inference-run1679802608.ipynb",
    "/kaggle/input/icecube-notebooks/icecube-inference-run1680668395.ipynb",
    "/kaggle/input/icecube-notebooks/icecube-inference-run1680866125.ipynb",
    "/kaggle/input/icecube-notebooks/early-sharing-prize-dynedge-1-046.ipynb",
]

In [4]:
df = None
for nb in nbs:
    n = nb.split("/")[-1].split(".")[0]
    _df = pd.read_parquet("/kaggle/input/icecube-inference-batch577/" + n + "/out.parquet")
    _df = pd.concat([_df, pd.read_parquet("/kaggle/input/icecube-inference-batch538/" + n + "/out.parquet")], ignore_index=True)
    _df = pd.concat([_df, pd.read_parquet("/kaggle/input/icecube-inference-batch650/" + n + "/out.parquet")], ignore_index=True)
    _df = pd.concat([_df, pd.read_parquet("/kaggle/input/icecube-inference-batch602/" + n + "/out.parquet")], ignore_index=True)
    if not "x" in _df.columns:
        _df = convert_to_3d(_df)
    _df = _df.rename(columns={"x": f"x_{n}", "y": f"y_{n}", "z": f"z_{n}"})
    # norm
    c = np.sqrt(_df[f"x_{n}"]**2+_df[f"y_{n}"]**2+_df[f"z_{n}"]**2)
    _df[f"x_{n}"] /= c
    _df[f"y_{n}"] /= c
    _df[f"z_{n}"] /= c
    if "direction_kappa" in _df.columns:
        _df["sigma"] = 1/np.sqrt(_df["direction_kappa"])
    if df is not None:
        if "sigma" in _df.columns:
            df = df.merge(_df[["event_id", f"x_{n}", f"y_{n}", f"z_{n}", "sigma"]], on="event_id", how="left")
        else:
            df = df.merge(_df[["event_id", f"x_{n}", f"y_{n}", f"z_{n}"]], on="event_id", how="left")
    else:
        if "sigma" in _df.columns:
            df = _df[["event_id", f"x_{n}", f"y_{n}", f"z_{n}", "sigma"]]
        else:
            df = _df[["event_id", f"x_{n}", f"y_{n}", f"z_{n}"]]
    del(_df)
    gc.collect()

In [5]:
df

Unnamed: 0,event_id,x_graphnet-baseline-submission,y_graphnet-baseline-submission,z_graphnet-baseline-submission,sigma,x_tensorflow-lstm-model-inference,y_tensorflow-lstm-model-inference,z_tensorflow-lstm-model-inference,x_icecube-inference-run1679802608,y_icecube-inference-run1679802608,z_icecube-inference-run1679802608,x_icecube-inference-run1680668395,y_icecube-inference-run1680668395,z_icecube-inference-run1680668395,x_icecube-inference-run1680866125,y_icecube-inference-run1680866125,z_icecube-inference-run1680866125,x_early-sharing-prize-dynedge-1-046,y_early-sharing-prize-dynedge-1-046,z_early-sharing-prize-dynedge-1-046
0,1874986970,-0.454322,-0.040024,0.889938,0.089730,-0.395110,-0.067490,0.916151,-0.474163,-0.077028,0.877061,-0.433580,-0.276207,0.857740,-0.226509,0.055728,0.972413,-0.534563,-0.203332,0.820304
1,1874986985,-0.515027,0.199930,0.833532,1.135477,-0.743145,0.304575,0.595793,-0.734159,-0.012843,0.678856,0.907170,0.205016,-0.367440,-0.113267,0.849811,0.514773,-0.899901,0.270107,0.342375
2,1874986992,0.840164,0.258491,0.476766,1.469807,0.630753,0.259002,0.731484,0.846265,-0.184024,0.499970,0.980372,-0.090857,0.174976,0.639809,0.264051,0.721749,0.896739,0.441355,-0.032623
3,1874986993,-0.311810,0.914238,0.258735,1.536321,-0.868686,-0.425770,-0.253188,0.375515,0.924277,-0.068568,-0.047001,0.730471,0.681324,-0.068476,0.835581,0.545083,-0.458873,0.888500,-0.001981
4,1874987001,-0.173935,-0.016890,0.984612,1.098959,-0.396836,0.012513,0.917804,-0.930673,-0.039554,0.363707,0.071753,-0.460728,0.884636,-0.559430,0.035634,0.828112,-0.993605,0.112510,-0.009556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
799995,1959571370,-0.158166,-0.744386,0.648747,1.681837,-0.509878,-0.089013,0.855629,-0.362247,-0.688109,0.628715,-0.270654,-0.530350,0.803415,-0.457024,-0.093967,0.884477,-0.993871,-0.050985,0.098085
799996,1959571385,0.127265,0.990364,-0.054610,1.918426,-0.102694,0.678669,0.727230,0.051661,0.715874,0.696316,-0.341256,0.391394,0.854608,-0.203382,0.674054,0.710132,-0.290200,0.953383,0.082731
799997,1959571392,-0.658005,-0.196187,-0.727008,0.169487,-0.750252,-0.079318,-0.656377,-0.720326,-0.125552,-0.682178,-0.693230,-0.088004,-0.715323,-0.728104,-0.144120,-0.670145,-0.691901,-0.065727,-0.718994
799998,1959571396,-0.261358,-0.023073,0.964966,0.706945,-0.768481,0.137416,0.624943,-0.779739,-0.041320,0.624740,-0.956091,-0.158329,0.246620,-0.810711,0.167761,0.560895,-0.547356,0.142175,0.824735


In [6]:
# https://www.kaggle.com/code/mmakhyanov/classification-of-the-events?scriptVersionId=125828590&cellId=44

import numpy as np
import pandas as pd
import polars as pl
import seaborn as sns
import math

from pathlib import Path


def add_sides(dataf, account_for_aux):
    if account_for_aux == False:
        return dataf.groupby(['event_id']).agg([
            pl.col('sensor_id').first()]
        ).with_columns([
            pl.col('sensor_id').is_in(list(sides_set)).alias('side'),
            pl.col('sensor_id').is_in(list(top_set)).alias('top'),
            pl.col('sensor_id').is_in(list(bottom_set)).alias('bottom')
        ])
    if account_for_aux == True:
          return dataf.filter(pl.col('auxiliary') == False).groupby(['event_id']).agg([
            pl.col('sensor_id').first()]
        ).with_columns([
            pl.col('sensor_id').is_in(list(sides_set)).alias('side'),
            pl.col('sensor_id').is_in(list(top_set)).alias('top'),
            pl.col('sensor_id').is_in(list(bottom_set)).alias('bottom')
        ])

def join_tables(dataf, data_geometry):
    return dataf.join(data_geometry, on='sensor_id')

def time_rank(dataf, account_for_aux):
    if account_for_aux == True:
        return dataf.filter(pl.col('auxiliary') == False).with_columns(
[
    pl.col('time').rank('ordinal').over('event_id').alias('time_rank')
]).filter(
    pl.col('time_rank').is_in([1,2])
)
    else:
        return dataf.with_columns(
[
    pl.col('time').rank('ordinal').over('event_id').alias('time_rank')
]).filter(
    pl.col('time_rank').is_in([1,2])
)
    
def add_direction(dataf):
    return dataf.groupby('event_id').agg([
    pl.col('z').head(1).alias('first'),
    pl.col('z').tail(1).alias('second')
]).with_columns(
[
    (pl.col('second').arr.explode() - pl.col('first').arr.explode()).alias('direction')
]).with_columns(
[
    (pl.col('direction') > 0).alias('upgoing'),
    (pl.col('direction') == 0).alias('horizontal'),
    (pl.col('direction') < 0).alias('downgoing')
]).select(pl.col('*').sort_by('event_id'))


def join_two_features(dataf, df_train_batch, account_for_aux):
    return dataf.join(df_train_batch.pipe(add_sides, account_for_aux), on='event_id')

def classification_feature(dataf, account_for_aux):
    if account_for_aux == True:
        return dataf.with_columns(
        [
            (pl.col('horizontal') * 0.25 + pl.col('downgoing') * 0.5 + pl.col('side') * 0.75 + pl.col('top') + pl.col('bottom') * 0.25
            ).alias('hard_to_reconstruct_aux_on')
        ]).select([
            pl.col('event_id'),
            pl.col('hard_to_reconstruct_aux_on') / pl.col('hard_to_reconstruct_aux_on').max()
        ])
    if account_for_aux == False:
        return dataf.with_columns(
        [
            (pl.col('horizontal') * 0.25 + pl.col('downgoing') * 0.5 + pl.col('side') * 0.75 + pl.col('top') + pl.col('bottom') * 0.25
            ).alias('hard_to_reconstruct_aux_off')
        ]).select([
            pl.col('event_id'),
            pl.col('hard_to_reconstruct_aux_off') / pl.col('hard_to_reconstruct_aux_off').max()
        ])




PATH_INPUT = Path("/kaggle/input/icecube-neutrinos-in-deep-ice")

train_batch_ids = [577, 538, 650, 602]
df_classification = None
for train_batch_id in train_batch_ids:
    print('Training batch', train_batch_id)
    batch_path = "train/batch_" + str(train_batch_id)+ ".parquet" 
    train_batch = pl.scan_parquet(PATH_INPUT / batch_path).lazy()
    df_train_meta = pl.scan_parquet(PATH_INPUT / "train_meta.parquet").lazy()
    df_sensor_geometry = pl.scan_csv(PATH_INPUT / 'sensor_geometry.csv').with_columns(pl.col('sensor_id').cast(pl.Int16)).lazy()


    DISTANCE = 470

    sides = df_sensor_geometry.with_columns(
    [
        (((pl.col('x') ** 2 + pl.col('y') ** 2) ** 0.5)).alias('xy_distance')
    ]).filter(
    pl.col('xy_distance') > DISTANCE).collect()

    #need to delete these points
    delete_points = sides.filter((pl.col('x') > 400) & (pl.col('x') < 500) & (pl.col('y') < 200) & (pl.col('y') > 0))

    #need to add these points
    add_points = df_sensor_geometry.filter((pl.col('x') > 100) & (pl.col('x') < 180) & (pl.col('y') > 350) & (pl.col('y') < 420)).collect()

    delete_set = set(delete_points.select(pl.col('sensor_id')).to_pandas()['sensor_id'])
    add_set = set(add_points.select(pl.col('sensor_id')).to_pandas()['sensor_id'])
    sides_set = set(sides.select(pl.col('sensor_id')).to_pandas()['sensor_id'])
    sides_set = (sides_set - delete_set) | add_set


    Z_BOTTOM = -500
    Z_TOP = 450

    top_sensors = df_sensor_geometry.filter(pl.col('z') > Z_TOP).collect()
    bottom_sensors = df_sensor_geometry.filter(pl.col('z') < Z_BOTTOM).collect()

    top_set = set(top_sensors.select(pl.col('sensor_id')).to_pandas()['sensor_id'])
    bottom_set = set(bottom_sensors.select(pl.col('sensor_id')).to_pandas()['sensor_id'])


    account_for_aux = False
    temp_2 = train_batch.pipe(join_tables, df_sensor_geometry
                    ).pipe(time_rank, account_for_aux
                    ).pipe(add_direction
                    ).pipe(join_two_features, train_batch, account_for_aux
                    ).pipe(classification_feature, account_for_aux)


    account_for_aux = True
    temp_3 = train_batch.pipe(join_tables, df_sensor_geometry
                    ).pipe(time_rank, account_for_aux
                    ).pipe(add_direction
                    ).pipe(join_two_features, train_batch, account_for_aux
                    ).pipe(classification_feature, account_for_aux)

    if df_classification is None:
        df_classification = temp_2.join(temp_3, on='event_id', how='left').collect().to_pandas()
    else:
        df_classification = pd.concat([df_classification, temp_2.join(temp_3, on='event_id', how='left').collect().to_pandas()], ignore_index=True)

Training batch 577
Training batch 538
Training batch 650
Training batch 602


In [7]:
df_classification

Unnamed: 0,event_id,hard_to_reconstruct_aux_off,hard_to_reconstruct_aux_on
0,1876026856,0.222222,0.222222
1,1877184976,0.222222,0.555556
2,1876110864,0.222222,0.000000
3,1876113996,0.000000,0.777778
4,1876451776,0.333333,0.555556
...,...,...,...
799995,1958054653,0.555556,0.333333
799996,1957857485,0.222222,0.111111
799997,1956440449,0.000000,0.222222
799998,1958539553,0.222222,0.777778


In [8]:
df = df.merge(df_classification, on="event_id", how="left")

In [9]:
df

Unnamed: 0,event_id,x_graphnet-baseline-submission,y_graphnet-baseline-submission,z_graphnet-baseline-submission,sigma,x_tensorflow-lstm-model-inference,y_tensorflow-lstm-model-inference,z_tensorflow-lstm-model-inference,x_icecube-inference-run1679802608,y_icecube-inference-run1679802608,...,y_icecube-inference-run1680668395,z_icecube-inference-run1680668395,x_icecube-inference-run1680866125,y_icecube-inference-run1680866125,z_icecube-inference-run1680866125,x_early-sharing-prize-dynedge-1-046,y_early-sharing-prize-dynedge-1-046,z_early-sharing-prize-dynedge-1-046,hard_to_reconstruct_aux_off,hard_to_reconstruct_aux_on
0,1874986970,-0.454322,-0.040024,0.889938,0.089730,-0.395110,-0.067490,0.916151,-0.474163,-0.077028,...,-0.276207,0.857740,-0.226509,0.055728,0.972413,-0.534563,-0.203332,0.820304,0.222222,0.000000
1,1874986985,-0.515027,0.199930,0.833532,1.135477,-0.743145,0.304575,0.595793,-0.734159,-0.012843,...,0.205016,-0.367440,-0.113267,0.849811,0.514773,-0.899901,0.270107,0.342375,0.000000,1.000000
2,1874986992,0.840164,0.258491,0.476766,1.469807,0.630753,0.259002,0.731484,0.846265,-0.184024,...,-0.090857,0.174976,0.639809,0.264051,0.721749,0.896739,0.441355,-0.032623,0.222222,0.555556
3,1874986993,-0.311810,0.914238,0.258735,1.536321,-0.868686,-0.425770,-0.253188,0.375515,0.924277,...,0.730471,0.681324,-0.068476,0.835581,0.545083,-0.458873,0.888500,-0.001981,0.000000,1.000000
4,1874987001,-0.173935,-0.016890,0.984612,1.098959,-0.396836,0.012513,0.917804,-0.930673,-0.039554,...,-0.460728,0.884636,-0.559430,0.035634,0.828112,-0.993605,0.112510,-0.009556,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
799995,1959571370,-0.158166,-0.744386,0.648747,1.681837,-0.509878,-0.089013,0.855629,-0.362247,-0.688109,...,-0.530350,0.803415,-0.457024,-0.093967,0.884477,-0.993871,-0.050985,0.098085,0.333333,0.555556
799996,1959571385,0.127265,0.990364,-0.054610,1.918426,-0.102694,0.678669,0.727230,0.051661,0.715874,...,0.391394,0.854608,-0.203382,0.674054,0.710132,-0.290200,0.953383,0.082731,0.666667,0.666667
799997,1959571392,-0.658005,-0.196187,-0.727008,0.169487,-0.750252,-0.079318,-0.656377,-0.720326,-0.125552,...,-0.088004,-0.715323,-0.728104,-0.144120,-0.670145,-0.691901,-0.065727,-0.718994,0.222222,0.000000
799998,1959571396,-0.261358,-0.023073,0.964966,0.706945,-0.768481,0.137416,0.624943,-0.779739,-0.041320,...,-0.158329,0.246620,-0.810711,0.167761,0.560895,-0.547356,0.142175,0.824735,0.222222,0.555556


In [11]:
df.isna().any()

event_id                               False
x_graphnet-baseline-submission         False
y_graphnet-baseline-submission         False
z_graphnet-baseline-submission         False
sigma                                  False
x_tensorflow-lstm-model-inference      False
y_tensorflow-lstm-model-inference      False
z_tensorflow-lstm-model-inference      False
x_icecube-inference-run1679802608      False
y_icecube-inference-run1679802608      False
z_icecube-inference-run1679802608      False
x_icecube-inference-run1680668395      False
y_icecube-inference-run1680668395      False
z_icecube-inference-run1680668395      False
x_icecube-inference-run1680866125      False
y_icecube-inference-run1680866125      False
z_icecube-inference-run1680866125      False
x_early-sharing-prize-dynedge-1-046    False
y_early-sharing-prize-dynedge-1-046    False
z_early-sharing-prize-dynedge-1-046    False
hard_to_reconstruct_aux_off            False
hard_to_reconstruct_aux_on              True
dtype: boo

In [14]:
df.hard_to_reconstruct_aux_on = df.hard_to_reconstruct_aux_on.fillna(0.111111)

In [15]:
assert not df.isna().any().any()

In [16]:
df.head()

Unnamed: 0,event_id,x_graphnet-baseline-submission,y_graphnet-baseline-submission,z_graphnet-baseline-submission,sigma,x_tensorflow-lstm-model-inference,y_tensorflow-lstm-model-inference,z_tensorflow-lstm-model-inference,x_icecube-inference-run1679802608,y_icecube-inference-run1679802608,...,y_icecube-inference-run1680668395,z_icecube-inference-run1680668395,x_icecube-inference-run1680866125,y_icecube-inference-run1680866125,z_icecube-inference-run1680866125,x_early-sharing-prize-dynedge-1-046,y_early-sharing-prize-dynedge-1-046,z_early-sharing-prize-dynedge-1-046,hard_to_reconstruct_aux_off,hard_to_reconstruct_aux_on
0,1874986970,-0.454322,-0.040024,0.889938,0.08973,-0.39511,-0.06749,0.916151,-0.474163,-0.077028,...,-0.276207,0.85774,-0.226509,0.055728,0.972413,-0.534563,-0.203332,0.820304,0.222222,0.0
1,1874986985,-0.515027,0.19993,0.833532,1.135477,-0.743145,0.304575,0.595793,-0.734159,-0.012843,...,0.205016,-0.36744,-0.113267,0.849811,0.514773,-0.899901,0.270107,0.342375,0.0,1.0
2,1874986992,0.840164,0.258491,0.476766,1.469807,0.630753,0.259002,0.731484,0.846265,-0.184024,...,-0.090857,0.174976,0.639809,0.264051,0.721749,0.896739,0.441355,-0.032623,0.222222,0.555556
3,1874986993,-0.31181,0.914238,0.258735,1.536321,-0.868686,-0.42577,-0.253188,0.375515,0.924277,...,0.730471,0.681324,-0.068476,0.835581,0.545083,-0.458873,0.8885,-0.001981,0.0,1.0
4,1874987001,-0.173935,-0.01689,0.984612,1.098959,-0.396836,0.012513,0.917804,-0.930673,-0.039554,...,-0.460728,0.884636,-0.55943,0.035634,0.828112,-0.993605,0.11251,-0.009556,0.0,1.0


In [17]:
meta = pq.read_table(
    "/kaggle/input/icecube-neutrinos-in-deep-ice/train_meta.parquet",
    columns=["event_id", "azimuth", "zenith"],
    filters=[("event_id", "in", df.event_id.values)]
).to_pandas()

meta = convert_to_3d(meta)

In [18]:
meta

Unnamed: 0,event_id,azimuth,zenith,x,y,z
0,1747976576,4.711111,1.764444,-0.001254,-0.981308,-0.192439
1,1747976591,4.251911,1.409921,-0.438638,-0.884272,0.160183
2,1747976643,3.105026,1.623184,-0.997961,0.036509,-0.052364
3,1747976710,0.678444,1.672558,0.774523,0.624336,-0.101586
4,1747976725,4.834699,0.256633,0.030968,-0.251929,0.967250
...,...,...,...,...,...,...
799995,2115745808,3.183812,1.525154,-0.998068,-0.042163,0.045626
799996,2115745824,2.137132,1.410924,-0.529701,0.833111,0.159192
799997,2115745844,5.613862,1.413757,0.774591,-0.612820,0.156395
799998,2115745849,0.280501,3.031776,0.105313,0.030340,-0.993976


In [19]:
df = df.merge(meta, on="event_id", how="left")


In [20]:
df

Unnamed: 0,event_id,x_graphnet-baseline-submission,y_graphnet-baseline-submission,z_graphnet-baseline-submission,sigma,x_tensorflow-lstm-model-inference,y_tensorflow-lstm-model-inference,z_tensorflow-lstm-model-inference,x_icecube-inference-run1679802608,y_icecube-inference-run1679802608,...,x_early-sharing-prize-dynedge-1-046,y_early-sharing-prize-dynedge-1-046,z_early-sharing-prize-dynedge-1-046,hard_to_reconstruct_aux_off,hard_to_reconstruct_aux_on,azimuth,zenith,x,y,z
0,1874986970,-0.454322,-0.040024,0.889938,0.089730,-0.395110,-0.067490,0.916151,-0.474163,-0.077028,...,-0.534563,-0.203332,0.820304,0.222222,0.000000,3.365500,0.545063,-0.505530,-0.115122,0.855094
1,1874986985,-0.515027,0.199930,0.833532,1.135477,-0.743145,0.304575,0.595793,-0.734159,-0.012843,...,-0.899901,0.270107,0.342375,0.000000,1.000000,6.126056,1.361371,0.966100,-0.153065,0.207898
2,1874986992,0.840164,0.258491,0.476766,1.469807,0.630753,0.259002,0.731484,0.846265,-0.184024,...,0.896739,0.441355,-0.032623,0.222222,0.555556,3.377643,1.451546,-0.965364,-0.232204,0.118968
3,1874986993,-0.311810,0.914238,0.258735,1.536321,-0.868686,-0.425770,-0.253188,0.375515,0.924277,...,-0.458873,0.888500,-0.001981,0.000000,1.000000,1.804400,1.079331,-0.204087,0.857695,0.471919
4,1874987001,-0.173935,-0.016890,0.984612,1.098959,-0.396836,0.012513,0.917804,-0.930673,-0.039554,...,-0.993605,0.112510,-0.009556,0.000000,1.000000,6.130301,0.471980,0.449348,-0.069238,0.890670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
799995,1959571370,-0.158166,-0.744386,0.648747,1.681837,-0.509878,-0.089013,0.855629,-0.362247,-0.688109,...,-0.993871,-0.050985,0.098085,0.333333,0.555556,4.533564,1.177788,-0.164313,-0.909030,0.382969
799996,1959571385,0.127265,0.990364,-0.054610,1.918426,-0.102694,0.678669,0.727230,0.051661,0.715874,...,-0.290200,0.953383,0.082731,0.666667,0.666667,6.065031,0.479707,0.450581,-0.099886,0.887130
799997,1959571392,-0.658005,-0.196187,-0.727008,0.169487,-0.750252,-0.079318,-0.656377,-0.720326,-0.125552,...,-0.691901,-0.065727,-0.718994,0.222222,0.000000,2.793952,2.371475,-0.654571,0.237189,-0.717829
799998,1959571396,-0.261358,-0.023073,0.964966,0.706945,-0.768481,0.137416,0.624943,-0.779739,-0.041320,...,-0.547356,0.142175,0.824735,0.222222,0.555556,6.206947,1.343148,0.971370,-0.074199,0.225687


In [21]:
df = df.set_index("event_id", drop=True)

In [22]:
from sklearn.neural_network import MLPRegressor

In [23]:
clf = MLPRegressor()
clf.fit(X=df[[col for col in df.columns if col not in ["azimuth", "zenith", "x", "y", "z"]]], y=df[["x", "y", "z"]])

MLPRegressor()

In [24]:
preds = clf.predict(X=df[[col for col in df.columns if col not in ["azimuth", "zenith", "x", "y", "z"]]])
az_pred, zen_pred = xyz2azzen(preds[:, 0], preds[:, 1], preds[:, 2])
print(angular_dist_score(az_true=df["azimuth"].values, zen_true=df["zenith"].values, az_pred=az_pred, zen_pred=zen_pred))

1.0044119966555534


In [25]:
dir(clf)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_backprop',
 '_check_feature_names',
 '_check_n_features',
 '_check_solver',
 '_compute_loss_grad',
 '_estimator_type',
 '_fit',
 '_fit_lbfgs',
 '_fit_stochastic',
 '_forward_pass',
 '_forward_pass_fast',
 '_get_param_names',
 '_get_tags',
 '_init_coef',
 '_initialize',
 '_loss_grad_lbfgs',
 '_more_tags',
 '_no_improvement_count',
 '_optimizer',
 '_random_state',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_unpack',
 '_update_no_improvement_count',
 '_validate_data',
 '_validate_hyperparameters',
 '_validate_input',
 'activation',
 '

In [26]:
import pickle
with open("clf.pickle", "wb") as f:
    pickle.dump(clf, f)