# Construct a nearest neighbour set of features

In [1]:
import astropy
import astropy.coordinates
from astropy.coordinates import SkyCoord, match_coordinates_sky
from astropy import units

import pandas as pd
import numpy as np

Read file, available from [fri_frii_background_slice.csv.gz](https://github.com/jbanfield/broome/files/1928276/fri_frii_background_slice.csv.gz)

In [2]:
components = pd.read_csv('fri_frii_background_slice.csv.gz', index_col=0)

In [3]:
components.head(10)

Unnamed: 0,first,sdss,ra,dec,fint,fpeak,rms,maj,min,pa,index,label
0,J003927.8-103249,SDSS J003930.52-103218.6,00 39 27.828,-10 32 49.02,2.84,3.05,0.142,1.21,0.0,33.299999,0,I
1,J003930.4-103219,SDSS J003930.52-103218.6,00 39 30.458,-10 32 19.18,21.6,6.2,0.141,17.09,3.56,23.799999,0,I
2,J011255.4-095047,SDSS J011255.11-095040.6,01 12 55.495,-09 50 47.56,11.57,2.75,0.15,11.86,9.27,146.5,1,I
3,J011255.0-095040,SDSS J011255.11-095040.6,01 12 55.096,-09 50 40.44,27.19,25.31,0.15,2.13,1.04,167.600006,1,I
4,J011255.1-095027,SDSS J011255.11-095040.6,01 12 55.130,-09 50 27.39,7.64,2.2,0.15,11.05,7.61,35.799999,1,I
5,J011247.7-095003,SDSS J011255.11-095040.6,01 12 47.743,-09 50 03.29,48.51,24.88,0.153,7.52,3.79,129.800003,1,I
6,J011250.3-095000,SDSS J011255.11-095040.6,01 12 50.362,-09 50 00.66,7.89,6.84,0.152,2.85,1.32,80.699997,1,I
7,J004149.6-091826,SDSS J004148.22-091703.1,00 41 49.632,-09 18 26.20,1.61,1.69,0.183,3.67,0.0,81.300003,2,I
8,J004150.4-091811,SDSS J004148.22-091703.1,00 41 50.481,-09 18 11.36,42.75,37.05,0.181,3.06,1.56,0.6,2,I
9,J004148.2-091702,SDSS J004148.22-091703.1,00 41 48.218,-09 17 02.67,4.78,5.28,0.171,0.9,0.0,87.199997,2,I


## Find nearest neighbours, and copy features

Convert ra/dec to astropy's ```SkyCoord```, and apply astropy's nearest neighbour to find the 7 nearest neighbours. For each nearest neighbour, copy the FIRST name, take note of the angular separation, and copy ```fint, fpeak, rms, maj, min```.

In [4]:
sc = SkyCoord(ra=components['ra'], dec=components['dec'], unit=('hourangle', 'deg'))

In [5]:
num_nearest_neighbour = 7
col_names = []
for nn in range(2, num_nearest_neighbour+1):
    col_names.append('nn{}'.format(nn))
    col_names.append('nn{}_first'.format(nn))
    col_names.append('nn{}_angle'.format(nn))
    for name in ['fint', 'fpeak', 'rms', 'maj', 'min']:
        col_names.append('nn{}_{}'.format(nn, name))
print(col_names)
neighbours = pd.DataFrame(index=components.index, columns=col_names)
print(neighbours.shape)

['nn2', 'nn2_first', 'nn2_angle', 'nn2_fint', 'nn2_fpeak', 'nn2_rms', 'nn2_maj', 'nn2_min', 'nn3', 'nn3_first', 'nn3_angle', 'nn3_fint', 'nn3_fpeak', 'nn3_rms', 'nn3_maj', 'nn3_min', 'nn4', 'nn4_first', 'nn4_angle', 'nn4_fint', 'nn4_fpeak', 'nn4_rms', 'nn4_maj', 'nn4_min', 'nn5', 'nn5_first', 'nn5_angle', 'nn5_fint', 'nn5_fpeak', 'nn5_rms', 'nn5_maj', 'nn5_min', 'nn6', 'nn6_first', 'nn6_angle', 'nn6_fint', 'nn6_fpeak', 'nn6_rms', 'nn6_maj', 'nn6_min', 'nn7', 'nn7_first', 'nn7_angle', 'nn7_fint', 'nn7_fpeak', 'nn7_rms', 'nn7_maj', 'nn7_min']
(4508, 48)


In [6]:
for nn in range(2,num_nearest_neighbour+1):
    neighbour = match_coordinates_sky(sc, sc, nthneighbor=nn)
    neighbours['nn{}'.format(nn)] = neighbour[0]
    neighbours['nn{}_first'.format(nn)] = \
        np.array(components['first'].iloc[neighbours['nn{}'.format(nn)]])
    neighbours['nn{}_angle'.format(nn)] = neighbour[1].degree
    for name in ['fint', 'fpeak', 'rms', 'maj', 'min']:
        neighbours['nn{}_{}'.format(nn, name)] = \
            np.array(components[name].iloc[neighbours['nn{}'.format(nn)]])
    
        

In [7]:
neighbours.head(10)

Unnamed: 0,nn2,nn2_first,nn2_angle,nn2_fint,nn2_fpeak,nn2_rms,nn2_maj,nn2_min,nn3,nn3_first,...,nn6_maj,nn6_min,nn7,nn7_first,nn7_angle,nn7_fint,nn7_fpeak,nn7_rms,nn7_maj,nn7_min
0,1,J003930.4-103219,0.013593,21.6,6.2,0.141,17.09,3.56,2615,J003742.3-101916,...,0.0,0.0,693,J004325.8-105351,1.03553,5.39,2.69,0.146,6.09,5.6
1,0,J003927.8-103249,0.013593,2.84,3.05,0.142,1.21,0.0,2615,J003742.3-101916,...,0.0,0.0,693,J004325.8-105351,1.028283,5.39,2.69,0.146,6.09,5.6
2,3,J011255.0-095040,0.002568,27.19,25.31,0.15,2.13,1.04,4,J011255.1-095027,...,2.4,1.08,701,J011830.9-104346,1.634402,17.87,3.32,0.126,16.67,8.86
3,2,J011255.4-095047,0.002568,11.57,2.75,0.15,11.86,9.27,4,J011255.1-095027,...,2.4,1.08,701,J011830.9-104346,1.636851,17.87,3.32,0.126,16.67,8.86
4,3,J011255.0-095040,0.003628,27.19,25.31,0.15,2.13,1.04,2,J011255.4-095047,...,2.4,1.08,701,J011830.9-104346,1.638703,17.87,3.32,0.126,16.67,8.86
5,6,J011250.3-095000,0.010777,7.89,6.84,0.152,2.85,1.32,4,J011255.1-095027,...,2.4,1.08,701,J011830.9-104346,1.667826,17.87,3.32,0.126,16.67,8.86
6,5,J011247.7-095003,0.010777,48.51,24.88,0.153,7.52,3.79,4,J011255.1-095027,...,2.4,1.08,701,J011830.9-104346,1.659173,17.87,3.32,0.126,16.67,8.86
7,8,J004150.4-091811,0.005402,42.75,37.05,0.181,3.06,1.56,9,J004148.2-091702,...,12.84,4.04,13,J004259.2-091345,0.29676,17.91,2.47,0.158,25.85,7.02
8,7,J004149.6-091826,0.005402,1.61,1.69,0.183,3.67,0.0,9,J004148.2-091702,...,12.84,4.04,13,J004259.2-091345,0.292327,17.91,2.47,0.158,25.85,7.02
9,10,J004147.9-091643,0.00542,29.58,8.67,0.169,18.91,2.36,8,J004150.4-091811,...,12.84,4.04,13,J004259.2-091345,0.297255,17.91,2.47,0.158,25.85,7.02


Combine the original components, and the nearest neighbours, and save as a gzipped CSV.

In [8]:
data = pd.concat([components, neighbours], axis=1)
data.to_csv('fri_frii_background_features.csv.gz', compression='gzip')