In [1]:
from astropy.io import fits
from astropy.table import Table
import numpy as np
import pylab as plt
import random
from scipy import stats
from sklearn.neighbors import KDTree
import time
from sklearn.metrics import mean_squared_error
from astropy.cosmology import FlatLambdaCDM
from os import listdir
import desispec
import desispec.io
import speclite.filters
import scipy
import pandas as pd

In [2]:
## reading fluxes and equivalent widths

run=2
fluxes_bin=np.load("/global/homes/a/ashodkh/results/fluxes_bin_"+str(run)+".txt.npz")["arr_0"]
zs=np.load("/global/homes/a/ashodkh/results/zs_s_"+str(run)+".txt.npz")["arr_0"]
target_lines=np.load("/global/homes/a/ashodkh/results/target_lines_s_"+str(run)+".txt.npz")["arr_0"]
target_ids=np.load("/global/homes/a/ashodkh/results/target_ids_s_"+str(run)+".txt.npz")["arr_0"]

n=25*10**3
lines=["OII_DOUBLET_EW","HGAMMA_EW","HBETA_EW","OIII_4959_EW","OIII_5007_EW","NII_6548_EW"\
       ,"HALPHA_EW","NII_6584_EW","SII_6716_EW","SII_6731_EW"]
EWs=np.zeros([n,len(lines)])
cosmo = FlatLambdaCDM(H0=70., Om0=0.3)
magnitudes=np.zeros([n,fluxes_bin.shape[1]])

## selecting only positive fluxes and saving them to use in other codes
select_fluxes=fluxes_bin[:,0]>0
for i in range(1,fluxes_bin.shape[1]):
    select_fluxes=select_fluxes*(fluxes_bin[:,i]>0)
np.savez_compressed("/global/homes/a/ashodkh/results/select_positive_fluxes_"+str(run)+".txt", select_fluxes)
fluxes_bin=fluxes_bin[select_fluxes,:]
target_lines=target_lines[select_fluxes]
target_ids=target_ids[select_fluxes]


for i in range(n):
    #Dl=10**6*cosmo.luminosity_distance(zs[i]).value
    magnitudes[i,:]=-2.5*np.log10(fluxes_bin[i,:])#-5*np.log10(Dl/10)
    for j in range(len(lines)):
        EWs[i,j]=target_lines[i][j]
    

In [3]:
def LLR_inverse_distance(x1,x1_train,y1_train,nn):
    '''
    Local linear regression with inverse distance weight and nn number of nearest neighbors.
    
    Input
    -----
    
    x1: Matrix of features in the shape (number of data points, number of features). Outcome will be evaluated at these points.
    
    x1_train: Matrix of features used for training in the shape of (number of data points, number of features).
    
    y1_train: Matrix of outcomes used for training in the shape of (number of data points, number of outcomes).
    
    nn: Number of nearest neighbors to include for each point.
    
    
    Output
    ------
    
    EW_fit: Matrix of outcomes predicte in the same shape as y1_train.
    
    zeros: indices corresponding to points in x1 that have nearest neighbor at zero distance.
    
    '''
    
    nl=nn
    tree=KDTree(x1_train[:,:])
    dist, ind=tree.query(x1[:,:],k=nl)

    # removing points on top of each other
    zeros=np.where(dist==0)[0]
    print(zeros)
    dist=np.delete(dist,obj=zeros,axis=0)
    ind=np.delete(ind,obj=zeros,axis=0)
    x1=np.delete(x1,obj=zeros,axis=0)

    n_valid=x1.shape[0]

    theta=np.zeros([n_valid,x1.shape[1],1])
    W=np.zeros([n_valid,nl,nl])
    X=np.zeros([n_valid,nl,x1.shape[1]])
    Y=np.zeros([n_valid,nl,1])
    for j in range(nl):
        W[:,j,j]=1/dist[:,j]
        X[:,j,:]=x1_train[ind[:,j],:]
        Y[:,j,0]=y1_train[ind[:,j]]
    a1=np.zeros([n_valid,x1.shape[1],1])
    a2=np.zeros([n_valid,x1.shape[1],x1.shape[1]])
    EW_fit=np.zeros(n_valid)
    for ii in range(n_valid):
        a1[ii,:,:]=np.matmul(X[ii,:,:].transpose(),np.matmul(W[ii,:,:],Y[ii,:,:]))
        a2[ii,:,:]=np.matmul(X[ii,:,:].transpose(),np.matmul(W[ii,:,:],X[ii,:,:]))
        theta[ii,:,:]=np.matmul(np.linalg.inv(a2[ii,:,:]),a1[ii,:,:])
        EW_fit[ii]=np.matmul(theta[ii,:,:].transpose(),x_valid[ii,:])
        
    return EW_fit,zeros

In [4]:


ones=np.ones([n,1])
x=np.zeros([n,magnitudes.shape[1]-1])
for i in range(n):
    for j in range(magnitudes.shape[1]-1):
        x[i,j]=magnitudes[i,j]-magnitudes[i,j+1]
x=np.concatenate((ones,x),axis=1)

# av_x=np.zeros(x.shape[1]-1)
# std_x=np.zeros(x.shape[1]-1)
# for i in range(1,x.shape[1]):
#     av_x[i-1]=np.average(x[:,i])
#     std_x[i-1]=np.std(x[:,i])
#     x[:,i]=(x[:,i]-av_x[i-1])/std_x[i-1]
    
for l in range(1):
    l=6
    EW=np.log10(EWs[:,l])    
    N_cv=10
    x_split=np.split(x,N_cv)
    EW_split=np.split(EW,N_cv)

    EW_fit_all=[]
    EW_obs_all=[]

    spearman_all=[]
    rms_all=[]
    nmad_all=[]
    nmad2_all=[]
    for i in range(N_cv):
        ## assigning the training and validation sets
        x_valid=x_split[i]
        EW_valid=EW_split[i]

        x_to_combine=[]
        EW_to_combine=[]
        for j in range(N_cv):
            if j!=i:
                x_to_combine.append(x_split[j])
                EW_to_combine.append(EW_split[j])
        x_train=np.concatenate(tuple(x_to_combine),axis=0)
        EW_train=np.concatenate(tuple(EW_to_combine),axis=0)
        
        # predicting EWs using LLR
        EW_fit,zeros=LLR_inverse_distance(x_valid,x_train,EW_train,100)
        
        # removing points that are on top of each other from y_valid and its ivar
        EW_valid=np.delete(EW_valid,obj=zeros,axis=0)

        
        # calculating spearman coefficient and nmad for fit. nmad2 has the error in it.
        nmad=np.abs(EW_fit-EW_valid)
        nmad2=np.abs(EW_fit-EW_valid)

        EW_fit_all.append(EW_fit)
        EW_obs_all.append(EW_valid)

        spearman_all.append(stats.spearmanr(EW_fit,EW_valid)[0])
        rms_all.append(np.sqrt(mean_squared_error(EW_fit,EW_valid)))
        nmad_all.append(1.48*np.median(nmad))
        nmad2_all.append(1.48*np.median(nmad2))

    print(lines[l])
    print(spearman_all)
    print(np.average(spearman_all))
    # print(rms_all)
    # print(np.average(rms_all))
    print(nmad_all)
    print(np.average(nmad_all))
    print(nmad2_all)
    print(np.average(nmad2_all))
    print("\n")

    #np.savez_compressed("/global/homes/a/ashodkh/results/logEW_fit_bins_selection"+str(run)+"_line"+str(lines[l])+".txt",EW_fit_all)
    #np.savez_compressed("/global/homes/a/ashodkh/results/logEW_obs_bins_selection"+str(run)+"_line"+str(lines[l])+".txt",EW_obs_all)

    
    



[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
HALPHA_EW
[0.8762985642717703, 0.8833131348661016, 0.8729118949299033, 0.8684810956289755, 0.8625146854583498, 0.8621055963688955, 0.8618396118143381, 0.871363555226169, 0.8739042908646867, 0.8817880217900836]
0.8714520451219274
[0.19580962345535483, 0.19735639981285627, 0.1993750298475247, 0.20175721186299353, 0.20148762271764326, 0.19443759553261505, 0.2007357327399384, 0.20099639537097472, 0.19661135898719315, 0.19979457074705226]
0.19883615410741462
[0.19580962345535483, 0.19735639981285627, 0.1993750298475247, 0.20175721186299353, 0.20148762271764326, 0.19443759553261505, 0.2007357327399384, 0.20099639537097472, 0.19661135898719315, 0.19979457074705226]
0.19883615410741462


