## Canon Residuals

This notebook calculates and pickles residuals for spectral fits. These results are then visualized in notebook 04 and can be used to reproduce a figure in the paper

In [None]:
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import numpy as np


from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA

import sys
import pickle

from tagging.src.datasets import ApogeeDataset
from tagging.src.networks import ConditioningAutoencoder,Embedding_Decoder,Feedforward
from tagging.paths import path_dataset
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



In [None]:
n_batch = 9
n_bins = 7751 
n_conditioned = 2
n_degree = 4

In [None]:
data = pd.read_pickle(path_dataset)

overall_data = data 

In [None]:

spectra_matrix = np.matrix(overall_data["spectra"].tolist())
spectra_matrix = spectra_matrix[0:50000]

params_list = overall_data.params.tolist()
params_list = params_list[0:50000]

print("shape:{}".format(np.shape(spectra_matrix)))


polynomial = PolynomialFeatures(degree=n_degree)
params_matrix = polynomial.fit_transform(np.array(params_list))
d = np.dot(np.linalg.inv(np.dot(params_matrix.T,params_matrix)),params_matrix.T)
s= np.dot(d,spectra_matrix)

fit_matrix = np.dot(params_matrix,s)
print(fit_matrix)
residual_matrix = spectra_matrix - fit_matrix

The dataset is designed such that the ith start is matched with the 25000+ith star in the dataset (where matched stars are stars with identical chemical composition but differing physical parameters).

We can the create our stars with swapped physical parameters by adding the fit of a star with the residuals of its matched star (since residuals only contain chemical info). This is exactly what ```swapped_matrix_1``` and ```swapped_matrix_2``` are

In [None]:
swapped_matrix_1 = fit_matrix[0:25000]+residual_matrix[25000:]
swapped_matrix_2 = fit_matrix[25000:]+residual_matrix[0:25000]


In [None]:
n_start = 0
n_end = 256

## Visualizing fit stars

In [None]:
#this section here is only to get the axis correct
nlam = 8575 
start_wl = 4.179 
diff_wl = 6e-06 
val = diff_wl*(nlam) + start_wl  
wl_full_log = np.arange(start_wl,val, diff_wl) 
wl_full = [10**aval for aval in wl_full_log] 
xdata = np.array(wl_full)  



In [None]:
"""invert the scaling used by the neural network"""
def invert_x(x):
    return (x+3.5)/4

#x_test1 = invert_x(x_test1)
#x_test2 = invert_x(x_test2)
#x1_pred = invert_x(x1_pred)
#x1_pred_swp = invert_x(x1_pred_swp)


##### Plotting ###################

colors = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']

lw = 1
ls = (0, (5, 5))

i=1
n_start = 0
n_end = 256
#n_end = 1024

fig, (ax1,ax2,ax3) = plt.subplots(3,1,sharex=True,gridspec_kw={'hspace': 0, 'wspace': 0})

#fig = plt.figure()
#ax1 = fig.add_subplot(6, 1, 1)
#ax2 = fig.add_subplot(6, 1, 2, sharex = ax1)
#ax3 = fig.add_subplot(6, 1, 3)
#ax4 = fig.add_subplot(6, 1, 4, sharex = ax3)

print(np.shape(swapped_matrix_1[i,n_start:n_end]))
print(np.shape(xdata[n_start:n_end]))


swapped_1 = np.squeeze(np.array(swapped_matrix_1[:,n_start:n_end]))
swapped_2 = np.squeeze(np.array(swapped_matrix_2[:,n_start:n_end]))

original_1 = np.squeeze(np.array(spectra_matrix[:10,n_start:n_end]))
original_2 = np.squeeze(np.array(spectra_matrix[25000:25010,n_start:n_end]))


ax1.plot(xdata[n_start:n_end],original_1[i,n_start:n_end],linewidth=lw,label="$x_{1}$",c="b")
ax1.plot(xdata[n_start:n_end],original_2[i,n_start:n_end],linewidth=lw,label="$x_{2}$",c="darkorange") 


ax2.plot(xdata[n_start:n_end],original_1[i,n_start:n_end],linewidth=lw,label="$D(E(x_{1},u_{1}),u_{2})$",c="b")
ax2.plot(xdata[n_start:n_end],swapped_1[i,n_start:n_end],linewidth=lw,label="$x_{2}$",c="darkorange")


fig.text(0.05, 0.62, 'flux', va='center', rotation='vertical',fontsize=20)

res1 = original_1[i]-swapped_1[i]
ax3.plot(xdata[n_start:n_end],res1[n_start:n_end],linewidth=lw,label="$D(E(x_{1},u_{1}),u_{2})-x_{2}$",c="b")



fig.text(0.05, 0.25, 'residuals', va='center', rotation='vertical',fontsize=16)

#fig.text(0.13,0.92,"Star $x_1$: Teff= {} , logg = {}, [Fe/H]= {:.2g}         Star $x_2$: Teff= {} , logg = {}, [Fe/H]= {:.2g}".format(*data_plotting["params"][i][0:3]+data_plotting["params"][i+9][0:3]),va='center', rotation='horizontal',fontsize=16)
fig.text(0.13,0.92,"c) PolyDis",va='center', rotation='horizontal',fontsize=16)




ax1.set_ylim(0.6,1.0)
ax2.set_ylim(0.6,1.0)
ax3.set_ylim(-0.025,0.025)

fig.set_size_inches(14.5, 6.5)
plt.xlim(xdata[n_start],xdata[n_end])
plt.xlabel(r"Wavelength($\AA$)",fontsize=24)

yticks1 = ax1.yaxis.get_major_ticks()
yticks1[0].set_visible(False)

yticks2 = ax2.yaxis.get_major_ticks()
yticks2[0].set_visible(False)

#yticks3 = ax3.yaxis.get_major_ticks()
#yticks3[-1].set_visible(False)




ax1.legend()
ax2.legend()
ax3.legend()