In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
from scipy import special
import h5py
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import types
import numpy.ma as ma
import sys
import sklearn as skl
import sklearn.svm as svm
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split
from dataloc import *

In [None]:
# Read in data from MXXL hdf5 file

input = h5py.File(MXXL_FILE, 'r')
dec = input['Data/dec'][:]
ra = input['Data/ra'][:]
z_obs = input['Data/z_obs'][:]
app_mag = input['Data/app_mag'][:]

bright_filter = app_mag < 19.5 # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
location_filter_1 = ra < 160.0
location_filter_2 = ra > 140.0
location_filter_3 = dec > 0.0
location_filter_4 = dec < 20.0
#keep = np.all([bright_filter, redshift_filter], axis=0)
keep = np.all([bright_filter, redshift_filter, location_filter_1, location_filter_2, location_filter_3, location_filter_4], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
sim_halo_id = input['Data/mxxl_id'][:]
sim_halo_id = sim_halo_id[keep]

input.close()

print("Galaxies kept: ", len(dec))

In [None]:
ldist = z_to_ldist(z_obs)

In [None]:
# PREPARE FEATURE: angular distances to nearest 3 neighbors
# PREPARE FEATURE: redshifts of nearest 3 neighbors
# PREPARE FEATURE: Luminosity Distances of nearest 3 neighbors

catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
to_match = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')

nn_bins = [2,3,4] # this means closest 3. '1' will find the same object.

nn_z = np.empty((len(nn_bins), len(dec)))
nn_distances = np.empty((len(nn_bins), len(dec)))
nn_ldist = np.empty((len(nn_bins), len(dec)))

for j in range(len(nn_bins)):
    idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nn_bins[j])
    
    nn_distances[j] = d2d.to(u.arcsec).value
    nn_z[j] = z_obs[idx]
    nn_ldist[j] = ldist[idx]

In [None]:
# PREPARE FEATURE: Absolute mags of the neighbors
#my_abs_mag = app_mag_to_abs_mag(small_app_mag, small_z_obs)

In [None]:
# app mag of target?

In [None]:
# PUT ALL FEATURES INTO X
print(nn_z[0][1])
print(nn_distances[0][1])

X = np.vstack((nn_z, nn_distances)).T # NN-1-z NN-2-z NN-3-z NN-1-ang-dist NN-2-ang-dist NN-3-ang-dist
print(X.shape)
print(X[1])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, z_obs, test_size=0.2, random_state=42)

In [None]:
svr_rbf = svm.SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
#svr_lin = svm.SVR(kernel="linear", C=100, gamma="auto")
#svr_poly = svm.SVR(kernel="poly", C=100, gamma="auto", degree=3, epsilon=0.1, coef0=1)

#svrs = [svr_rbf]#[svr_rbf, svr_lin, svr_poly]
#kernel_label = ["RBF", "Linear", "Polynomial"]
#model_color = ["m", "c", "g"]

In [None]:
svr_rbf.fit(X_train, y_train)

In [None]:
score = svr_rbf.score(X_test,y_test)
z_guessed = svr_rbf.predict(X_test)
right = np.isclose(z_guessed, y_test, rtol=0, atol=0.01)
print(np.sum(right.astype(int)) / len(y_test))

In [None]:
from sklearn import tree
clf = tree.DecisionTreeRegressor()
clf.fit(X_train, y_train)

In [None]:
print(clf.score(X_test, y_test))
tree_results = clf.predict(X_test)
right = np.isclose(tree_results, y_test, rtol=0, atol=0.01)
print("Close z fraction: ", np.sum(right.astype(int)) / len(y_test))

trash=plt.hist(y_test, alpha=.5, bins=50)
trash=plt.hist(tree_results, alpha=.5, bins=50)


In [None]:
delta = np.abs(tree_results - y_test)
trash=plt.hist(delta, bins=50)