# Initialization

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# imports
import pandas as pd
import numpy as np
import os
import sys
import pickle
from matplotlib import pyplot as plt
import matplotlib
import boto3

# random seed
seed = 42
np.random.seed(seed)

# local files paths
local_work_dir_path = os.path.split(os.path.split(os.path.dirname(os.path.realpath("__file__")))[0])[0]
local_code_dir_path = os.path.join(local_work_dir_path , 'code')

# S3 file paths
endpoint_url = 'https://s3-west.nrp-nautilus.io'
bucket_name = 'tau-astro'
prefix = 'almogh'
s3_work_dir_path = os.path.join(prefix, 'workdir3')
s3_saves_dir_path = os.path.join(s3_work_dir_path , 'model_saves')
s3_data_dir_path = os.path.join(s3_work_dir_path , 'data')
s3_data_ver_dir_path = os.path.join(s3_data_dir_path,'100K_V1')

s3_client = boto3.client("s3", endpoint_url=endpoint_url)

# adding code folder to path
sys.path.insert(1, local_code_dir_path)
from s3 import to_s3_npy, to_s3_pkl, from_s3_npy, from_s3_pkl, to_s3_fig

# Load

In [3]:
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
perplexity = 25

## Common

In [4]:
#X = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/data/100K_V1/spec.npy')

loading from uri: s3://tau-astro/almogh/workdir3/data/100K_V1/spec.npy


In [5]:
I_train = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/I_train.npy')

loading from uri: s3://tau-astro/almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/I_train.npy


In [6]:
#X_train_real = X[I_train]

In [7]:
#to_s3_npy(X_train_real, s3_client, bucket_name, 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/X_train_real.npy')

saving to uri: s3://tau-astro/almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/X_train_real.npy


True

In [None]:
X_train_real = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/X_train_real.npy')

In [9]:
wl_grid = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/data/100K_V1/wl_grid.npy')

loading from uri: s3://tau-astro/almogh/workdir3/data/100K_V1/wl_grid.npy


In [10]:
gs_train = from_s3_pkl(s3_client, bucket_name, 'almogh/workdir3/data/100K_V1/gs_train.pkl')

loading from uri: s3://tau-astro/almogh/workdir3/data/100K_V1/gs_train.pkl


## RF

In [11]:
# RF dir path
s3_rf_save_dir_path = 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/'

# loads - RF
#D_RF = from_s3_npy(s3_client, bucket_name, '/'.join([s3_rf_save_dir_path, 'dis_mat.npy']))
#RF_weird_scores = np.mean(D_RF, axis=1)
#RF_sne = TSNE(n_components=2, perplexity=perplexity, metric='precomputed', verbose=1, random_state=seed).fit_transform(D_RF)
RF_sne = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/tsne.npy')
RF_weird_scores = from_s3_npy(s3_client, bucket_name, 'almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/weird_scores.npy')

loading from uri: s3://tau-astro/almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/tsne.npy
loading from uri: s3://tau-astro/almogh/workdir3/model_saves/RF/small_URF_10K_train_set__2022_03_27___13_00_39/weird_scores.npy


In [18]:
snr = gs_train.loc[I_train].snMedian.tolist()

# Plot

## Definitions

In [13]:
# Imports
from datetime import datetime
import traceback
import holoviews as hv
from holoviews import opts
from holoviews.streams import Selection1D
from bokeh.models import HoverTool
from scipy import stats
import panel as pn
from holoviews.plotting.links import DataLink
hv.extension('bokeh')



In [14]:
# creading the dataframe for RF
RF_df = pd.DataFrame()
RF_df['feature_1'] = RF_sne[:,0]
RF_df['feature_2'] = RF_sne[:,1]
RF_df['score'] = RF_weird_scores
RF_df['snr'] = snr
RF_df['index'] = np.arange(len(RF_df))

# full dataframe
full_df = pd.DataFrame()
full_df['RF_feature_1'] = RF_sne[:,0]
full_df['RF_feature_2'] = RF_sne[:,1]
full_df['score'] = RF_weird_scores
full_df['snr'] = snr
full_df['index'] = np.arange(len(RF_df))

In [15]:
def points_dmap_callable_inner(src, color_src):
    points = hv.Points(full_df, kdims=[src+'_feature_1', src+'_feature_2']).opts(color=color_src, cmap='jet').opts(tools=['tap','box_select','lasso_select']).opts(selection_line_color='black', selection_alpha=0.7, nonselection_alpha=0.1).opts(framewise=True, width=700, height=500, colorbar=True)
    return points

def points_dmap_callable(color_src):
    """
    The callable function for the points DynamicMap.
    """
    RF_points = points_dmap_callable_inner('RF', color_src)
    #NN_points = points_dmap_callable_inner('NN', color_src)
    #NN_AWGN_points = points_dmap_callable_inner('NN_AWGN', color_src)
    #NN_dlink = DataLink(RF_points, NN_points)
    #NN_AWGN_dlink = DataLink(RF_points, NN_AWGN_points)
    #points_layout = (RF_points+NN_points+NN_AWGN_points)
    #points_layout = (RF_points+NN_AWGN_points)
    #return points_layout
    return RF_points

def spectra_dmap_callable(index):
    """
    The callable function for the spectra DynamicMap.
    """
    with open(os.path.join(local_work_dir_path,'debug.txt'),'w') as f:
        f.write('in spectra_dmap_callable - '+datetime.now().strftime("%d/%m/%Y %H:%M:%S")+'\n')
        try:
            w = wl_grid
            if len(index)==0:
                f.write('len==0\n')
                # No Selection
                x = np.zeros(shape=wl_grid.shape)
                label = 'No Selection'
                x_max_err = x
                x_min_err = x
            else:
                f.write('len!=0\n')
                x = np.nanmean(X_train_real[index], axis=0)
                #x_valid = ~np.isnan(x)
                #x = x[x_valid]
                #w = w[x_valid]
                if len(index)==1:
                    f.write('len==1\n')
                    # a single point - plotting the outlier feature importance
                    label = 'index=%s, snr=%f, score=%f' % (index[0], snr[index[0]], RF_weird_scores[index[0]])
                    x_max_err = np.zeros_like(x)
                    x_min_err = np.zeros_like(x)
                else:
                    f.write('len>1\n')
                    # Multiple points - plotting the cluster feature importance
                    label = '%d points selected - plotting the average' % len(index)
                    x_max_err = np.nanmax(X_train_real[index], axis=0)-x
                    x_min_err = x-np.nanmin(X_train_real[index], axis=0)
                
            # decimating max and min by 2 (for some reason, spread is not showing from over ~5000 points)
            f.write('x_max_err type = {0}\n'.format(str(type(x_max_err))))
            f.write('x_max_err shape = {0}\n'.format(str(x_max_err.shape)))
            D = 2
            w_spread = w[::D].reshape(-1)
            x_spread = x[::D].reshape(-1)
            x_max_err = x_max_err[::D].reshape(-1)
            x_min_err = x_min_err[::D].reshape(-1)
            #x_spread = np.mean(x.reshape(-1,D),axis=1).reshape(-1)
            #x_max_err = np.max(x_max_err.reshape(-1,D),axis=1).reshape(-1)
            #x_max_err = np.zeros_like(x_spread)
            #x_min_err = np.max(x_min_err.reshape(-1,D),axis=1).reshape(-1)
            #x_min_err = np.zeros_like(x_spread)
            assert len(w_spread)==len(x_spread)==len(x_max_err)==len(x_min_err), 'length must be equal! shapes are {0}, {1}, {2}, {3}.'.format(w_spread.shape, x_spread.shape,x_max_err.shape, x_min_err.shape)

            #flux = hv.Curve((w,x), kdims=['w'],vdims=['flux']).opts(color='black')
            flux = hv.Curve((w,x), kdims=['w'],vdims=['flux']).opts(color='black').opts(norm=dict(framewise=True)) * hv.Spread((w_spread,x_spread,x_min_err,x_max_err), kdims=['w'],vdims=['flux', 'yerrneg', 'yerrpos']).opts(fill_alpha=0.5, line_alpha=0).opts(norm=dict(framewise=True))
            #flux = hv.Curve((w,x), kdims=['w'],vdims=['flux']).opts(color='black') * hv.Curve((w_spread,x_spread), kdims=['w'],vdims=['flux']).opts(color='red')
            #flux = hv.Spread((w,x,x_min_err,x_max_err), kdims=['w'],vdims=['y', 'yerrneg', 'yerrpos'])
            #np.save(r'C:\Users\ahershko\OneDrive - Qualcomm\Documents\Thesis\git\w.npy', w)
            #np.save(r'C:\Users\ahershko\OneDrive - Qualcomm\Documents\Thesis\git\x.npy', x)
            #np.save(r'C:\Users\ahershko\OneDrive - Qualcomm\Documents\Thesis\git\x_max_err.npy', x_max_err)
            #np.save(r'C:\Users\ahershko\OneDrive - Qualcomm\Documents\Thesis\git\x_min_err.npy', x_min_err)
        
        except Exception as e:
            f.write('exception!\n')
            f.write(str(e)+'\n')
            tb = traceback.format_exc()
            f.write(tb)
            
        flux = flux.opts(tools=['hover']).relabel(label).opts(width=800, height=300, show_grid=True).opts(norm=dict(framewise=True))

        #f.write('flux is an object of type: {0}\n'.format(str(type(flux))))
        #f.write('exiting...\n')
    
    return flux

## Interactive plot

In [16]:
color_src = 'score'
RF_points = points_dmap_callable_inner('RF', color_src)
#NN_points = points_dmap_callable_inner('NN', color_src)
#NN_AWGN_points = points_dmap_callable_inner('NN_AWGN', color_src)

#NN_dlink = DataLink(RF_points, NN_points)
#NN_AWGN_dlink = DataLink(RF_points, NN_AWGN_points)

selection = Selection1D(source=RF_points) # creating a selection from the points
spectra_dmap = hv.DynamicMap(spectra_dmap_callable, kdims=[], streams=[selection])
spectra_dmap.opts(norm=dict(framewise=True))

# Building the layout full layout
#layout = (RF_points+NN_points+NN_AWGN_points+spectra_dmap).opts(merge_tools=False)
#layout = (RF_points+NN_AWGN_points+spectra_dmap).opts(merge_tools=False)
layout = (RF_points+spectra_dmap).opts(merge_tools=False)
layout.cols(1)