In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from anomaly.constants import GALAXY_LINES
from anomaly.utils import specobjid_to_idx
from sdss.metadata import MetaData
from sdss.utils.managefiles import FileDirectory

meta = MetaData()
# %matplotlib inline

# Directories

In [3]:
vae_architecture = '256_128_64/latent_12'

project_dir = '/home/elom/onedrive/phd/spectra/0_01_z_0_5_4_0_snr_inf'
bin_id = 'bin_03'
explanations_dir = (
    f'{project_dir}/{bin_id}/explanation/{vae_architecture}'
)

scores_list = [
    'mse_filter_250kms_noRel100',
    'mse_filter_250kms_noRel97',
    'mse_filter_250kms_rel100',
    'mse_filter_250kms_rel97',
    'mse_noRel100',
    'mse_noRel97',
    'mse_rel100',
    'mse_rel97'
]

# Load data

In [4]:
# load data
wave = np.load(f"{project_dir}/wave_spectra_imputed.npy")

meta_data_df = pd.read_csv(
    f"{project_dir}/drop_0_01_z_0_5_4_0_snr_inf.csv.gz",
    index_col="specobjid",
)

spectra = np.load(
    f"{project_dir}/spectra_imputed.npy",
    mmap_mode="r"
)

idx_id = np.load(
    f"{project_dir}/{bin_id}/{bin_id}_index_specobjid.npy"
)

# load MSE score of most normal galaxies, AKA, best reconstructed ones
normal_mse_df = pd.read_csv(
    f"{explanations_dir}/mse_noRel100/top_normal.csv.gz",
    index_col="specobjid",
)

anomalous_mse_df = pd.read_csv(
    f"{explanations_dir}/mse_noRel100/top_anomalies.csv.gz",
    index_col="specobjid",
)

In [6]:
anomalies_scores_df_list = []
for score in scores_list:

    fpath = f"{explanations_dir}/{score}/top_anomalies.csv.gz"

    _df = pd.read_csv(fpath)
    _df["score"] = score
    anomalies_scores_df_list.append(_df)

anomalies_scores_df = pd.concat(
    anomalies_scores_df_list,
    ignore_index=True
    )

In [7]:
anomalies_scores_df.score.value_counts()

score
mse_filter_250kms_noRel100    10000
mse_filter_250kms_noRel97     10000
mse_filter_250kms_rel100      10000
mse_filter_250kms_rel97       10000
mse_noRel100                  10000
mse_noRel97                   10000
mse_rel100                    10000
mse_rel97                     10000
Name: count, dtype: int64