# Lensed QSO work with Gaia DR2

Using the LQSO Castle match with DR2 as the learning set and apply this to LQSO allwise 2.

Trying with machine learning classification.

The idea is to use the following set of _features_ for each LQSO:

* the total proper motion and its error
* the total parallax and its error
* the astrometric colour
* the distances to the nearest 3 neighbours.

This makes 6 _features_ to feed to the SVM/PCA classification.


In [17]:
import os
import numpy as np

from astropy.table import Table, vstack

import matplotlib.pyplot as plt
%matplotlib inline

from astropy.coordinates import SkyCoord 
import astropy.units as u

from sklearn.neighbors import NearestNeighbors
from sklearn import svm, tree
from sklearn.preprocessing import Imputer
#from IPython.display import Math

D2R = np.pi/180.0

wdir = os.path.expanduser('~') + '/Dropbox/Work/Gaia-QSO'

In [12]:
# read the Castle xmatch with DR2
url1 = 'http://vospace.esac.esa.int/vospace/sh/9da5bd4b841f4032ee3f94e2f357067a554434?dl=1'
t1 = Table.read(url1,format='votable')
nt1 = len(t1)
#

Downloading http://vospace.esac.esa.int/vospace/sh/9da5bd4b841f4032ee3f94e2f357067a554434?dl=1 [Done]




In [13]:
# read the DR2 xmatch with allwise
url2 = 'http://vospace.esac.esa.int/vospace/sh/d964d4b3687ede95d64c19609abb3136fbe55c0?dl=1'
t2 = Table.read(url2,format='votable')
nt2 = len(t2)

Downloading http://vospace.esac.esa.int/vospace/sh/d964d4b3687ede95d64c19609abb3136fbe55c0?dl=1 [Done]




In [16]:
#t2.colnames
t2.rename_column('qso_id','qso_name')

In [24]:
t2['qso_name'] = t2['qso_name'].astype(t1['qso_name'].dtype)

In [27]:
#
# now add a column type, all from t1 will be type='LQSO' all from t2 are 'QSO'
t1['type'] = 'LQSO'
t2['type'] = 'QSO'

In [82]:
#
# merge the two tables
#
tx = vstack([t1,t2])

In [83]:
#
# now filter those with NaNs in the following columns
# 
m1 = ~np.isnan(tx['pmra'])
m2 = ~np.isnan(tx['pmdec'])
m3 = ~np.isnan(tx['parallax'])
m4 = ~np.isnan(tx['astrometric_pseudo_colour'])
ix = (m1*m2*m3*m4)
print (len(np.where(ix)[0]))
#print (np.where(np.isnan(t['astrometric_pseudo_colour'])))
t = tx[ix]
nt = len(t)

490945


In [84]:
# the total pm
mu = np.sqrt(t['pmra']**2 + t['pmdec']**2)
# propagate the errors, assuming pmra_error and pmdec_errors are uncorrelated
mu_error = np.sqrt((t['pmra']*t['pmra_error']/mu)**2 + (t['pmdec']*t['pmdec_error']/mu)**2)
#
t['mu'] = mu
t['mu_error'] = mu_error
#
i_lqso = np.where(t['type'] == 'LQSO')[0]
n_lqso = len(i_lqso)
print ("Total number of LQSO is {}".format(n_lqso))
i_qso = np.where(t['type'] == 'QSO')[0]
n_qso = len(i_qso)
print ("Total number of QSO is {}".format(n_qso))

Total number of LQSO is 66
Total number of QSO is 490879


In [85]:
#
# now calculate the nearest neighbour and the 3rd nearest neighbour to each source
# using BallTree
#
#
# build the tree
#
X = np.array([t['ra']*D2R,t['dec']*D2R]).transpose()
# metric haversine is the distance on a sphere
nbrs = NearestNeighbors(n_neighbors=4, algorithm='ball_tree').fit(X)
#tree = BallTree(X,leaf_size=10,metric="haversine")
distances, indices = nbrs.kneighbors(X)

In [86]:
#
# add the distances to the nearest 3 neighbours in table
t["n1"] = distances[:,1]
t["n2"] = distances[:,2]
t["n3"] = distances[:,3]
#

## Machine learning approach

Will try different methods to learn from the LQSO and apply it to the simulated catalogue


In [87]:
X = np.array([t["mu"],t["mu_error"],t["parallax"],t["parallax_error"],\
              t["n1"],t["n2"],t["n3"]]).transpose()
#X = np.array([t["mu"],t["mu_error"],t["parallax"],t["parallax_error"],t["astrometric_pseudo_colour"],\
#              t["n1"],t["n2"],t["n3"]]).transpose()
#
# the shape should be (n_samples, n_features)
y = np.zeros(nt,dtype=np.uint8)
y[i_lqso] = 1

In [101]:
nr_qso = 10000
isub = np.hstack([i_lqso,np.random.choice(i_qso,size=nr_qso)])
Xp = X[isub,:]
yp = y[isub]
clf = tree.DecisionTreeClassifier()
clf.fit(Xp, yp)
#
ipred = np.where(clf.predict(X[i_qso,:]) == 1)[0]
print ("potential LQSO: {} ({:.1f} %)".format(len(ipred),100*len(ipred)/n_qso))

potential LQSO: 1247 (0.3 %)
