In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [2]:
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential
from sklearn.metrics import roc_curve, auc,roc_auc_score
from sklearn.model_selection import train_test_split
import pandas as pd
from matplotlib import gridspec
from scipy import stats
from sklearn import preprocessing
import os
import keras
os.environ["CUDA_VISIBLE_DEVICES"]="2"
import random

import tensorflow as tf
gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
def build_data_arrays(SR, SB, gaiadata2):
    X = SR[:,3]-center_ra
    Y = SR[:,2]-center_dec

    Xb = SB[:,3]-center_ra
    Yb = SB[:,2]-center_dec
    
    Xad = gaiadata2[:, 3]-center_ra
    Yad = gaiadata2[:, 2]-center_dec

    SR = np.c_[SR[:,0],SR[:,1],X, Y, SR[:,4],SR[:,5]]
    SB = np.c_[SB[:,0],SB[:,1],Xb, Yb, SB[:,4],SB[:,5]]
    gaiadata2 = np.c_[gaiadata2[:,0],gaiadata2[:,1], Xad, Yad, gaiadata2[:,4],gaiadata2[:,5]]
    
    return SR, SB, gaiadata2

In [4]:
threshold_arr_size = 10000

In [5]:
def calc_diff(SR, SB):
    return abs(len(SR) - len(SB))

In [6]:
def find_SR_SB_pointers(pointers, increment, data_arr, start_point, end_point):
    SR = data_arr[(data_arr[:,0] > pointers[1])*(data_arr[:,0] < pointers[2])]
    SB = data_arr[(data_arr[:,0] > pointers[0])*(data_arr[:,0] < pointers[1]) + (data_arr[:,0] > pointers[2])*(data_arr[:,0] < pointers[3])]

    previous_diff = calc_diff(SR, SB)
    prev_SR, prev_SB = SR, SB
    curr_diff = previous_diff
    bottom_bool = False
    top_bool = True

    while curr_diff <= previous_diff and pointers[3] < end_point:
        previous_diff, prev_SR, prev_SB = curr_diff, SR, SB
        if bottom_bool:
            pointers[0] = max(start_point, pointers[0] - increment)
        else:
            pointers[3] = min(end_point, pointers[3] + increment)
        bottom_bool, top_bool = top_bool, bottom_bool
        SR = data_arr[(data_arr[:,0] > pointers[1])*(data_arr[:,0] < pointers[2])]
        SB = data_arr[(data_arr[:,0] > pointers[0])*(data_arr[:,0] < pointers[1]) + (data_arr[:,0] > pointers[2])*(data_arr[:,0] < pointers[3])]
        curr_diff = calc_diff(SR, SB)

    return prev_SR, prev_SB

In [7]:
def angular_distance(angle1,angle2):
    # inputs are np arrays of [ra,dec]
    deltara=np.minimum(np.minimum(np.abs(angle1[:,0]-angle2[:,0]+360),np.abs(angle1[:,0]-angle2[:,0])),\
                          np.abs(angle1[:,0]-angle2[:,0]-360))
    deltadec=np.abs(angle1[:,1]-angle2[:,1])
    return np.sqrt(deltara**2+deltadec**2)

In [8]:
#function from David's file via_machinae.py
def FilterGD1(stars):
    gd1stars=np.zeros(len(stars))
    for x in allgd1stars:
        ra=x[0]
        dec=x[1]
        pmra=x[2]
        pmdec=x[3]
    
        foundlist=angular_distance(np.dstack((stars[:,3],stars[:,2]))[0],np.array([[ra,dec]]))
        foundlist=np.sqrt(foundlist**2+(stars[:,0]-pmdec)**2+(stars[:,1]-pmra)**2)   
        foundlist=foundlist<.0001
        if len(np.argwhere(foundlist))>1:
            print(foundlist)
        if len(np.argwhere(foundlist))==1:
            gd1stars+=foundlist
    gd1stars=gd1stars.astype('bool')
    return gd1stars,stars[gd1stars]

In [None]:
datafile = '/data0/users/bpnachman/Gaia/gaiascan_l101.2_b58.4_ra212.7_dec55.2.npy'
gaiadata=np.load(datafile,allow_pickle=True)
gaiadata2 = np.array(gaiadata[:,[9,8,6,7,4,5]]).astype('float32') 
#pm_lat, pm_lon_coslat, lon, lat, color, mag
allgd1stars = np.load('gd1_stars.npy')
is_stream_arr, gaiadata3 = FilterGD1(gaiadata)

new_gaiadata = []
for i in range(len(gaiadata)):
    temp = np.append(gaiadata[i], is_stream_arr[i])
    new_gaiadata.append(temp)

In [None]:
gaiadata = new_gaiadata
gaiadata = np.array(gaiadata)

In [None]:
#switch 0 and 1
gaiadata2 = np.array(gaiadata[:,[8,9,6,7,4,5,10]]).astype('float32') 
gaiadata3 = np.array(gaiadata3[:,[8,9,6,7,4,5]]).astype('float32') 

In [None]:
gaiadata2 = gaiadata2[(np.abs(gaiadata2[:,0]) > 2) + (np.abs(gaiadata2[:,1]) > 2)] 
gaiadata2 = gaiadata2[(gaiadata2[:,4]>0.5) * (gaiadata2[:,4]<1)]

gaiadata3 = gaiadata3[(np.abs(gaiadata3[:,0]) > 2) + (np.abs(gaiadata3[:,1]) > 2)] 
gaiadata3 = gaiadata3[(gaiadata3[:,4]>0.5) * (gaiadata3[:,4]<1)]

In [None]:
def make_fractional_background(all_data, fraction):
    num = fraction*100
    print(num)
    frac_background = []
    for i in range(len(all_data)):
        if all_data[i][6]:
            frac_background.append(all_data[i])
        else:
            if random.randint(0, 100) < num:
                frac_background.append(all_data[i])
    frac_background = np.array(frac_background)
    frac_background = frac_background[:, :6]
    return frac_background

In [None]:
frac_background = make_fractional_background(gaiadata2, 0.5)


SR = frac_background[(frac_background[:,0] > -10)*(frac_background[:,0] < -8)]
SB = frac_background[(frac_background[:,0] > -11)*(frac_background[:,0] < -10)+(frac_background[:,0] > -8)*(frac_background[:,0] < -7)]
print(len(SR), len(SB))
pointer1 = -10
pointer2 = -8
all_data = frac_background
stream = gaiadata3_2

X = np.concatenate([SR,SB])
Y = np.concatenate([np.ones(len(SR)),np.zeros(len(SB))])

myscalar = preprocessing.StandardScaler()
myscalar.fit(X)
X_scaled = myscalar.transform(X)
all_data_scaled = myscalar.transform(all_data)

X_scaled = X_scaled[Y<2]
Y = Y[Y<2]


X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.5)

print('working')

#set biases for each layer, bias_initializer
#initializers.GlorotNormal()

preds = []
preds_all = []
num_loops = 5

for _ in range(num_loops):
    model = Sequential()
    initializer =tf.keras.initializers.glorot_normal()
    model.add(Dense(256, input_dim=5, activation='relu', bias_initializer = initializer)) 
    model.add(Dropout(0.25))
    model.add(Dense(256, activation='relu', bias_initializer = initializer))
    model.add(Dropout(0.25))
    model.add(Dense(256, activation='relu', bias_initializer = initializer))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation='sigmoid', bias_initializer = initializer))
    optimizer = keras.optimizers.Adam(lr=1e-4)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    history = model.fit(X_train[:,1:],Y_train, epochs=200, batch_size=200,validation_data=(X_test[:,1:],Y_test), verbose = 0) 

    preds_curr = model.predict(X_test[:,1:], batch_size=int(0.1*len(X_test)))
    preds_all_curr = model.predict(all_data_scaled[:, 1:])
    
    if len(preds) == 0:
        preds = preds_curr
        preds_all = preds_all_curr
    else:
        preds = preds + preds_curr
        preds_all = preds_all + preds_all_curr

preds = [x/num_loops for x in preds]
preds_all = [x/num_loops for x in preds_all]

preds = np.array(preds)
preds_all = np.array(preds_all)
    
stars_passing_cut = []

X_test_unscaled = myscalar.inverse_transform(X_test)

preds_sorted = preds[np.argsort(preds[:,0])]
X_test_sorted = X_test_unscaled[np.argsort(preds[:,0])]
X_test_sorted = X_test_sorted[(X_test_sorted[:,0] > pointer1) * (X_test_sorted[:,0] < pointer2)]

stars_passing_cut = X_test_sorted[len(X_test_sorted) - 100:]



In [None]:
plt.scatter(stars_passing_cut[:,3],stars_passing_cut[:,2], marker = '.')
plt.scatter(gaiadata3[:,3], gaiadata3[:,2], marker = '.', alpha = 0.2)
plt.xlim(-15,15)
plt.ylim(-15,15)
plt.show()

In [None]:
plt.plot(history.history['loss'], color = 'blue', label = 'loss')
plt.plot(history.history['val_loss'], color = 'red', label = 'val loss')
plt.legend()