# AMP Feature For b278 VVV Tile

- **author:** JB Cabral (<jbc.develop@gmail.com>)

In [1]:
% matplotlib inline

from __future__ import print_function

import numpy as np

import pandas as pd

from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

from sklearn import feature_selection as fs
from sklearn import preprocessing as prp

from skcriteria import Data, MAX, MIN
from skcriteria.madm import topsis, electre
from skcriteria.weights import critic

from libs import fourier_help

pd.options.mode.chained_assignment = None 

TWO_LABELS = {-1: -1, 1: 1, 2: 1, 3: 1}

## Load Data

In [2]:
store = pd.HDFStore("/home/juan/proyectos/phd/src/astroestadistica/work/data/b278_full.h5")
vvv_x_ogle = store["vvv_x_ogle"]
srcs = store["sources"]

# obs helper
obs_table = store.get_node("observations")
def get_amp(vvv_id):
    query = "source_id == " + str(int(vvv_id))
    data = []
    for r in obs_table.where(query):
        data.append({k: r[k] for k in obs_table.colnames})
    obs = pd.DataFrame(data)[obs_table.colnames]
    mags = obs.sort_values("hjd").mag.values
    fluxs = 10 ** (mags / -2.5)
    count, std_flux, mean_flux = len(fluxs), np.std(fluxs), np.mean(fluxs)
    return np.log10(np.sqrt(count * std_flux / mean_flux))    

In [4]:
rr_ids = vvv_x_ogle[vvv_x_ogle.star_type.str.startswith("RRLy")].vvv_id.values
nv_ids = srcs[(~srcs.id.isin(rr_ids)) & (srcs.obs_number > 44)].sample(20).id.values

In [5]:
data = []
for src_id in rr_ids:
    data.append({"vvv_id": src_id, "amp": get_amp(src_id)})
for src_id in nv_ids:
    data.append({"vvv_id": src_id, "amp": get_amp(src_id)})

In [9]:
amp = pd.DataFrame(data)[["vvv_id", "amp"]]
amp.to_pickle("amp.pkl")

In [10]:
df = pd.read_hdf("data/features.h5", "b278")
df["period_diff"] = np.abs(df.PeriodLS - df.gatspy_period)
df["cls"] = df["cls"].astype('category')
df["scls"] = df.apply(lambda r: TWO_LABELS[r["cls"]], axis=1).astype("category")

In [11]:
df = df[df.vvv_id.isin(amp.vvv_id)]

In [18]:
df = df.join(amp, on="vvv_id", rsuffix="_amp")

In [22]:
df = df[[c for c in df.columns if c != "vvv_id_amp"]]

In [23]:
df.to_csv("data/to_mariano.csv")