##### Imports & setup

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

# Feature names (without linear features):
feature_names = ['mean', 'weightedMean', 'std', 'median', 'amplitude', 'beyond1Std', 'cusum', 'IPR10',
                 'kurtosis', 'MPR40_5', 'MPR20_10', 'maxSlope', 'medianAbsDev', 'medianBRP10',
                 'percentAmplitude', 'meanVariance', 'andersonDarlingNorm', 'chi2', 'skew', 'stetsonK']

## Data preparation

### Getting data

In [2]:
# The following paths should be changed accordingly. The data can be saved in a csv from the clean_data.ipynb notebook.
positive_g = pd.read_csv('../../data/clean_data/positive_class_g.csv', index_col=0)
positive_r = pd.read_csv('../../data/clean_data/positive_class_r.csv', index_col=0)
negative_g = pd.read_csv('../../data/clean_data/negative_class_g.csv', index_col=0)
negative_r = pd.read_csv('../../data/clean_data/negative_class_r.csv', index_col=0)

# Adding labels for each class:
positive_g['class'] = 'positive'
positive_r['class'] = 'positive'
negative_g['class'] = 'negative'
negative_r['class'] = 'negative'

### Train-test split

In [3]:
np.random.seed(42)

# Taking a random sample of the negative class with the same size as the positive class:
negative_g_sample = negative_g.sample(frac=len(positive_g)/len(negative_g))

positive_g_train, positive_g_test = train_test_split(positive_g, train_size=0.7)
negative_g_train, _ = train_test_split(negative_g_sample, train_size=0.7)

In [4]:
samples = pd.concat([positive_g_train, negative_g_train])

### Nearest neighbors model

In [None]:
neigh = NearestNeighbors(algorithm='brute', metric='euclidean')
neigh.fit(samples[feature_names])

In [15]:
neighbors_indices = neigh.kneighbors(positive_g_test[feature_names][0:20], 1, return_distance=False)

In [16]:
neighbors = samples.iloc[neighbors_indices.flatten()]

In [17]:
neighbors

Unnamed: 0,mean,weightedMean,std,median,amplitude,beyond1Std,cusum,IPR10,kurtosis,linT,...,andersonDarlingNorm,chi2,skew,stetsonK,i:objectId,d:anomaly_score,class,objectId,candid,cdsxmatch
24398,21.183962,20.767464,0.557037,21.127646,0.990323,0.277778,0.267194,1.449339,-0.242552,0.025686,...,0.315663,0.200957,0.520598,0.884929,ZTF18abadsmu,-0.258526,positive,,,
2724,19.741221,18.546284,0.657252,20.016863,0.917347,0.142857,0.317915,1.533676,6.157907,0.006139,...,1.409638,17.263254,-2.455907,0.982735,ZTF19aapzvoj,-0.017075,positive,,,
13588,18.247466,17.200247,1.720615,17.632827,3.464518,0.12,0.278697,3.697448,5.090302,0.070747,...,3.23459,10.127104,2.317413,0.835153,ZTF17aaaikoz,-0.259944,positive,,,
3054,16.96059,16.956512,0.027989,16.961836,0.030017,0.5,0.415139,0.060034,-3.76437,0.002092,...,0.109315,2.160024,-0.14328,0.946139,ZTF17aaajtfx,-0.107642,positive,,,
15147,18.553313,18.568963,0.175153,18.555319,0.248683,0.333333,0.255349,0.436516,-1.49809,-0.006765,...,0.322919,9.010079,-0.037109,0.861234,ZTF18acuexzs,-0.011751,positive,,,
16446,17.959814,17.856545,0.26587,17.909608,0.304693,0.25,0.371399,0.609386,0.143062,-0.00945,...,0.098522,7.418126,0.90496,0.871309,ZTF20acodcxq,-0.011735,positive,,,
463354,18.565776,18.05885,0.592662,18.575355,0.574687,0.0,0.246458,1.149374,-5.505574,,...,0.175343,10.839507,-0.024634,0.863906,,,negative,ZTF18actabgj,2.58223e+18,Unknown
12052,17.999138,17.987567,0.083192,17.997429,0.139497,0.4,0.398623,0.198845,-0.713002,0.000235,...,0.274627,2.821276,0.430083,0.842371,ZTF17aaaehqt,-0.099879,positive,,,
751,17.881683,17.981509,0.286397,17.825378,0.369308,0.333333,0.323383,0.713293,-1.37344,0.003092,...,0.209808,21.080132,0.447553,0.884914,ZTF18aabpzjg,-0.164549,positive,,,
15604,18.229442,18.234515,0.359553,18.208607,0.559804,0.363636,0.19167,0.931214,-0.487413,-0.015085,...,0.43714,27.595678,-0.452707,0.828032,ZTF18aabpzjg,-0.014177,positive,,,
