# Pip Install

In [1]:
!pip install boto3 astropy sfdmap progressbar2

Collecting boto3
  Using cached boto3-1.26.78-py3-none-any.whl (132 kB)
Collecting astropy
  Using cached astropy-5.2.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (11.2 MB)
Collecting sfdmap
  Using cached sfdmap-0.1.1-py3-none-any.whl
Collecting progressbar2
  Using cached progressbar2-4.2.0-py2.py3-none-any.whl (27 kB)
Collecting jmespath<2.0.0,>=0.7.1
  Using cached jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting botocore<1.30.0,>=1.29.78
  Using cached botocore-1.29.78-py3-none-any.whl (10.4 MB)
Collecting s3transfer<0.7.0,>=0.6.0
  Using cached s3transfer-0.6.0-py3-none-any.whl (79 kB)
Collecting pyerfa>=2.0
  Using cached pyerfa-2.0.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (743 kB)
Collecting python-utils>=3.0.0
  Using cached python_utils-3.5.2-py2.py3-none-any.whl (24 kB)
Installing collected packages: sfdmap, python-utils, pyerfa, jmespath, progressbar2, botocore, astropy, s3transfer, boto3
Success

# Initialization

In [2]:
# imports
import pandas as pd
import numpy as np
import os
import sys
import pickle
import boto3
from matplotlib import pyplot as plt
import time

# random seed
seed = 42
np.random.seed(seed)

# local files paths
local_home_dir_path = os.path.expanduser("~")
local_work_dir_path = os.path.join(local_home_dir_path, 'thesis2')
local_code_dir_path = os.path.join(local_work_dir_path , 'code')

# S3 file paths
endpoint_url = 'https://s3-west.nrp-nautilus.io'
bucket_name = 'tau-astro'
prefix = 'almogh'
s3_work_dir_path = os.path.join(prefix, 'thesis2')
s3_data_dir_path = os.path.join(s3_work_dir_path , 'data')
s3_models_dir_path = os.path.join(s3_work_dir_path , 'models')
s3_final_table_csv_path = os.path.join(s3_data_dir_path, 'SDSS_DR16_all.csv')

s3_client = boto3.client("s3", endpoint_url=endpoint_url)

# adding code folder to path
sys.path.insert(1, local_code_dir_path)
from s3 import to_s3_npy, to_s3_pkl, from_s3_npy, from_s3_pkl, to_s3_fig

# Load Data

In [3]:
n_slices = 16

In [4]:
data_path_in_bucket = 'almogh/thesis2/eval/inference/Z_NN_i{0}_n{1}.npy'
Z = [from_s3_npy(s3_client = s3_client,
                 bucket_name = bucket_name,
                 path_in_bucket = data_path_in_bucket.format(i,n_slices)) for i in range(n_slices)]

loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i0_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i1_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i2_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i3_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i4_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i5_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i6_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i7_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i8_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i9_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i10_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/eval/inference/Z_NN_i11_n16.npy
loading from uri: s3://tau-astro/almogh/thesis2/ev

# Build the distance matrix

In [5]:
N = int((-1+np.sqrt(1+8*sum([len(z) for z in Z])))/2)

In [6]:
D = np.zeros(shape=(N,N), dtype=np.float16)

In [7]:
triu = np.triu_indices(N)
tmp = np.linspace(0,len(triu[0]),n_slices+1).astype(int)

In [8]:
for i_slice,z in enumerate(Z):
    print('i_slice = {0}'.format(i_slice))
    i_start = tmp[i_slice]
    i_end = tmp[i_slice+1]
    print('slice index {0} takes couples {1}-{2} (non-inclusive) out of {3}'.format(i_slice, i_start, i_end, len(triu[0])))
    triu_i = (triu[0][i_start:i_end].astype(np.int32),triu[1][i_start:i_end].astype(np.int32))
    D[triu_i] = z

i_slice = 0
slice index 0 takes couples 0-312503125 (non-inclusive) out of 5000050000
i_slice = 1
slice index 1 takes couples 312503125-625006250 (non-inclusive) out of 5000050000
i_slice = 2
slice index 2 takes couples 625006250-937509375 (non-inclusive) out of 5000050000
i_slice = 3
slice index 3 takes couples 937509375-1250012500 (non-inclusive) out of 5000050000
i_slice = 4
slice index 4 takes couples 1250012500-1562515625 (non-inclusive) out of 5000050000
i_slice = 5
slice index 5 takes couples 1562515625-1875018750 (non-inclusive) out of 5000050000
i_slice = 6
slice index 6 takes couples 1875018750-2187521875 (non-inclusive) out of 5000050000
i_slice = 7
slice index 7 takes couples 2187521875-2500025000 (non-inclusive) out of 5000050000
i_slice = 8
slice index 8 takes couples 2500025000-2812528125 (non-inclusive) out of 5000050000
i_slice = 9
slice index 9 takes couples 2812528125-3125031250 (non-inclusive) out of 5000050000
i_slice = 10
slice index 10 takes couples 3125031250-34

In [9]:
D = D.T
D[triu] = D.T[triu]

# Save to S3

In [10]:
to_s3_npy(D,
          s3_client = s3_client,
          bucket_name = bucket_name,
          path_in_bucket = 'almogh/thesis2/eval/inference/D_NN.npy')

saving to uri: s3://tau-astro/almogh/thesis2/eval/inference/D_NN.npy


True