In [103]:
import sys, os, pdb, glob, time
import numpy as np
from astropy.io import fits
from astropy.table import Table, Column
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import math
from tqdm import tqdm_notebook as tqdm

In [104]:
### READ IN LABELS FROM dr25_labels.ipynb
tce_table = Table.read("dr25_labels.txt", format="ascii.csv")

In [105]:
### SPLIT INTO TRAIN-TEST-VAL SETS

### RANDOMLY SHUFFLE
np.random.seed(222)
num_tces = len(tce_table)
tce_table = tce_table[np.random.permutation(num_tces)]

### PARTITION TABLE (80% TRAIN, 10% VAL, 10% TEST)
train_cutoff = int(0.80 * num_tces)
val_cutoff = int(0.90 * num_tces)
train_tces = tce_table[0:train_cutoff]
val_tces = tce_table[train_cutoff:val_cutoff]
test_tces = tce_table[val_cutoff:]

In [106]:
### NORMALIZE STELLAR PARAMETERS
cols = ['teff', 'radius', 'mass', 'logg', 'feh', 'density']
for col in cols:
    
    ### GET NORMALIZATION VALUES FOR THIS STELLAR PARAMETER
    ### (MEDIAN AND STANDARD DEVIATION FROM TRAINING SET)
    norm_vals = np.copy(train_tces[col].data)
    norm_med = np.median(train_tces[col].data)
    norm_std = np.std(train_tces[col].data)
    print("Normalization values for {}:\n".format(col))
    print("   Median = {0:0.5f}".format(norm_med))
    print("   Standard Deviation = {0:0.5f}\n".format(norm_std))

    ### APPLY TO TRAINING SET
    train_vals = np.copy(train_tces[col].data)
    train_vals -= norm_med
    train_vals /= norm_std
    print("   Training set median = {0:0.3f}, stdev = {1:0.3f}".format(np.median(train_vals), np.std(train_vals)))
    train_tces[col + '_n'] = train_vals
    train_tces[col + '_n'].format = "%0.5f"
    
    ### APPLY TO VALIDATION SET
    val_vals = np.copy(val_tces[col].data)
    val_vals -= norm_med
    val_vals /= norm_std
    print("   Validation set median = {0:0.3f}, stdev = {1:0.3f}".format(np.median(val_vals), np.std(val_vals)))
    val_tces[col + '_n'] = val_vals
    val_tces[col + '_n'].format = "%0.5f"

    ### APPLY TO TEST SET
    test_vals = np.copy(test_tces[col].data)
    test_vals -= norm_med
    test_vals /= norm_std
    print("   Test set median = {0:0.3f}, stdev = {1:0.3f}\n\n".format(np.median(test_vals), np.std(test_vals)))
    test_tces[col + '_n'] = test_vals
    test_tces[col + '_n'].format = "%0.5f"



Normalization values for teff:

   Median = 6033.00000
   Standard Deviation = 1234.50694

   Training set median = 0.000, stdev = 1.000
   Validation set median = 0.000, stdev = 1.021
   Test set median = 0.020, stdev = 0.993


Normalization values for radius:

   Median = 1.24900
   Standard Deviation = 14.20059

   Training set median = 0.000, stdev = 1.000
   Validation set median = 0.001, stdev = 0.729
   Test set median = 0.001, stdev = 0.916


Normalization values for mass:

   Median = 1.06500
   Standard Deviation = 0.53075

   Training set median = 0.000, stdev = 1.000
   Validation set median = 0.002, stdev = 0.986
   Test set median = 0.010, stdev = 0.986


Normalization values for logg:

   Median = 4.28500
   Standard Deviation = 0.60327

   Training set median = 0.000, stdev = 1.000
   Validation set median = -0.015, stdev = 0.941
   Test set median = -0.002, stdev = 0.970


Normalization values for feh:

   Median = -0.12000
   Standard Deviation = 0.29945

   Training 