In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#| code-fold: false
# IMPORTING LIBRARIES
# --------------------

# AstroML & Astropy
from astroML.datasets import fetch_LINEAR_sample
from astroML.datasets import fetch_LINEAR_sample

# Basic libraries
import pickle
import os
import sys

# Plotting
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.font_manager import FontProperties
import ipywidgets as widgets
from IPython.display import display, clear_output

# DataFrame analysis
import pandas as pd

# Math libraries
import numpy as np

In [3]:
#| code-fold: false
# CONFIG
sns.set_theme() # setting the theme for plotting
np.random.seed(42)

colors = ['#1A090D', '#D8C99B', '#D8973C', '#BD632F', '#273E47']
cmap = mcolors.ListedColormap(colors)

font = FontProperties()
font.set_family('avenir')
font.set_name('Avenir')
font.set_style('normal')
font.set_size('xx-large')

%matplotlib inline  

In [4]:
#| code-fold: false
# Importing custom libraries
# ----------------------------
sys.path.insert(0,'../src/')
#from config import*
#from descriptive_stats import *
#from plots import *

#from selection import *
from blazhko_analysis import blazhko_determine, sort3arr, sort4arr, sigG, plotAll, makeLCplot_info, BE_analyzer

In [5]:
#| code-fold: false
# DATA
dataLINEAR = fetch_LINEAR_sample(data_home='../inputs') # fetching the data from astroML data library

In [6]:
end = 'FINAL_FINAL_FULL'

In [7]:
#| code-fold: false
# DATA from previous notebook
lc_analysis = pd.read_csv('../outputs/LC_ANALYSIS_FINALFINAL_FINAL_FULL.csv')

lc_analysis['MainPeakL'] = pd.to_numeric(lc_analysis['MainPeakL'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeakL'] = pd.to_numeric(lc_analysis['BlazhkoPeakL'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeriodL'] = pd.to_numeric(lc_analysis['BlazhkoPeriodL'], errors='coerce').fillna(0)
lc_analysis['BpowerRatioL'] = pd.to_numeric(lc_analysis['BpowerRatioL'], errors='coerce').fillna(0)
lc_analysis['BsignificanceL'] = pd.to_numeric(lc_analysis['BsignificanceL'], errors='coerce').fillna(0)
lc_analysis['MainPeakZ'] = pd.to_numeric(lc_analysis['MainPeakZ'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeakZ'] = pd.to_numeric(lc_analysis['BlazhkoPeakZ'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeriodZ'] = pd.to_numeric(lc_analysis['BlazhkoPeriodZ'], errors='coerce').fillna(0)
lc_analysis['BpowerRatioZ'] = pd.to_numeric(lc_analysis['BpowerRatioZ'], errors='coerce').fillna(0)
lc_analysis['BsignificanceZ'] = pd.to_numeric(lc_analysis['BsignificanceZ'], errors='coerce').fillna(0)

lc_analysis['Pratio'] = round(lc_analysis['Pratio'], 4)
lc_analysis['Plinear'] = round(lc_analysis['Plinear'], 6)
lc_analysis['Pztf'] = round(lc_analysis['Pztf'], 6)
lc_analysis['Lampl'] = round(lc_analysis['Lampl'], 2)
lc_analysis['Zampl'] = round(lc_analysis['Zampl'], 2)
lc_analysis['L_chi2dofR'] = round(lc_analysis['L_chi2dofR'], 1)
lc_analysis['Lmean_chi2dofR'] = round(lc_analysis['Lmean_chi2dofR'], 1)
lc_analysis['Zchi2dofR'] = round(lc_analysis['Zchi2dofR'], 1)
lc_analysis['Zmean_chi2dofR'] = round(lc_analysis['Zmean_chi2dofR'], 1)
lc_analysis['L_chi2dof'] = round(lc_analysis['L_chi2dof'], 1)
lc_analysis['Lmean_chi2dof'] = round(lc_analysis['Lmean_chi2dof'], 1)
lc_analysis['Zchi2dof'] = round(lc_analysis['Zchi2dof'], 1)
lc_analysis['Zmean_chi2dof'] = round(lc_analysis['Zmean_chi2dof'], 1)
lc_analysis['Ampl_diff'] = round(abs(lc_analysis['Lampl'] - lc_analysis['Zampl']), 2)
lc_analysis['dP'] = round(abs((lc_analysis['Plinear']-lc_analysis['Pztf'])/lc_analysis['Pmean']),5)
lc_analysis['MainPeakL'] = round(lc_analysis['MainPeakL'], 4)
lc_analysis['BlazhkoPeakL'] =round(lc_analysis['BlazhkoPeakL'], 4)
lc_analysis['BlazhkoPeriodL'] = round(lc_analysis['BlazhkoPeriodL'], 4)
lc_analysis['BpowerRatioL'] = round(lc_analysis['BpowerRatioL'], 4)
lc_analysis['BsignificanceL'] = round(lc_analysis['BsignificanceL'], 4)
lc_analysis['MainPeakZ'] = round(lc_analysis['MainPeakZ'], 4)
lc_analysis['BlazhkoPeakZ'] = round(lc_analysis['BlazhkoPeakZ'], 4)
lc_analysis['BlazhkoPeriodZ'] = round(lc_analysis['BlazhkoPeriodZ'], 4)
lc_analysis['BpowerRatioZ'] = round(lc_analysis['BpowerRatioZ'], 4)
lc_analysis['BsignificanceZ'] = round(lc_analysis['BsignificanceZ'], 4)
lc_analysis['IndicatorType'] = 'NaN'
lc_analysis['ChiType'] = 'NaN'
lc_analysis['BE_score'] = 'NaN'

periodogr = pickle.load(open("../outputs/periodograms_"+end+".pkl", "rb"))
fits = np.load("../outputs/fits_"+end+".npy", allow_pickle=True)
ztf_data = np.load("../outputs/ztf_data_filter"+end+".npy", allow_pickle=True)

In [8]:
lc_analysis.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,29848,0.55702,0.557035,0.557,1.0,301,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,0.0,3e-05,,,
1,50402,0.643303,0.643294,0.6433,1.0,284,0.105976,0.6,0.7,0.107611,...,1.9161,303.4901,0.0146,2.0748,1,0.1,1e-05,,,
2,62892,0.530776,0.530786,0.5308,1.0,276,0.108535,1.1,0.9,0.131104,...,1.0256,42.9553,0.0618,4.7729,1,0.2,2e-05,,,
3,91437,0.674733,0.674737,0.6747,1.0,177,0.040739,2.8,1.3,0.073285,...,1.541,16.9635,0.0328,5.4267,1,0.16,1e-05,,,
4,95250,0.31387,0.313877,0.3139,1.0,222,0.162522,0.8,0.8,0.865595,...,1.0929,32.3729,0.0969,8.2018,2,0.1,2e-05,,,


In [9]:
blazhko_can = pd.DataFrame(())
blazhko_can = blazhko_determine(lc_analysis, blazhko_can)
end = 'final'
blazhko_can.to_csv("../outputs/blazhko_can"+end+".csv", index=False)

In [10]:
blazhko_can.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,227.7904,0.0536,5.3214,1,0.08,0.0,Z,,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,277.0083,0.0826,9.4402,1,0.16,4e-05,Z,,,
2,798477,0.651627,0.651611,0.6516,1.0,294,0.0,0.0,0.0,0.0,...,309.119,143.7553,35.9709,1,0.0,2e-05,Z,,,
3,843294,0.374216,0.748404,0.5613,1.9999,290,0.123428,3.3,1.4,0.933846,...,314.4654,0.1448,9.3913,2,0.06,0.66665,Z,,,
4,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,350.8772,0.1043,15.1599,1,0.2,0.0,L,,,


In [11]:
blazhko_can.shape

(203, 40)

# Creating a visual interface

In [12]:
for i in fits:
    L = i[1][2]
    for key in list(L.keys()):  # Iterate over the keys of the dictionary
        if isinstance(L[key], pd.Series):
            L[key] = L[key].to_numpy()

In [13]:
Lids = blazhko_can['LINEAR id'].to_numpy()

In [14]:
cols = ['Plinear', 'Pztf', 'Pmean', 'Pratio', 'NdataLINEAR', 
        'L_rms', 'L_chi2dof', 'L_chi2dofR', 
        'Lmean_rms', 'Lmean_chi2dof', 'Lmean_chi2dofR', 'Lmmax', 'Lampl', 
        'NdataZTF', 'Z_rms', 'Zchi2dof', 
        'Zchi2dofR', 'Zmean_rms', 'Zmean_chi2dof', 'Zmean_chi2dofR', 
        'Zmmax', 'Zampl',
        'MainPeakL', 'BlazhkoPeakL', 'BlazhkoPeriodL','BpowerRatioL','BsignificanceL',
        'MainPeakZ', 'BlazhkoPeakZ', 'BlazhkoPeriodZ', 'BpowerRatioZ', 'BsignificanceZ']

In [19]:
blazhko_can.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,227.7904,0.0536,5.3214,1,0.08,0.0,Z,,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,277.0083,0.0826,9.4402,1,0.16,4e-05,Z,,,
2,798477,0.651627,0.651611,0.6516,1.0,294,0.0,0.0,0.0,0.0,...,309.119,143.7553,35.9709,1,0.0,2e-05,Z,,,
3,843294,0.374216,0.748404,0.5613,1.9999,290,0.123428,3.3,1.4,0.933846,...,314.4654,0.1448,9.3913,2,0.06,0.66665,Z,,,
4,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,350.8772,0.1043,15.1599,1,0.2,0.0,L,,,


# Separating the original dataset into robust smaller datasets

In order to make visual interpretation easier and more robust, we are dividing our BE candidates into smaller tables. The categories are:
- LINEAR periodogram parameter satisfied
- ZTF periodogram parameter satisfied
- Chi2 parameter satisfied
    - ZTF Chi2 parameter
    - LINEAR Chi2 parameter

The last three are based on the `ChiType` for determining LINEAR or ZTF parameter satisfaction. The periodogram parameter is determined by the `IndicatorType` column.

In [25]:
# ZTF data periodogram
blazhko_can_ztf_per = blazhko_can.loc[(blazhko_can['IndicatorType'] == 'Z')]
print(blazhko_can_ztf_per.shape)

(111, 39)


In [26]:
# LINEAR data periodogram
blazhko_can_linear_per = blazhko_can.loc[(blazhko_can['IndicatorType'] == 'L')]
print(blazhko_can_linear_per.shape)

(53, 39)


In [27]:
# Chi2 ZTF parameter
blazhko_can_ztf_chi = blazhko_can.loc[(blazhko_can['ChiType'] == 'Z')]
print(blazhko_can_ztf_chi.shape)

(28, 39)


In [28]:
# Chi2 LINEAR parameter
blazhko_can_linear_chi = blazhko_can.loc[(blazhko_can['ChiType'] == 'L')]
print(blazhko_can_linear_chi.shape)

(9, 39)


# Visual inspection of every category

### ZTF periodogram

In [None]:
length = blazhko_can_ztf_per.shape[0]

BE_candidates_ztf_per = pd.DataFrame(())
analysis = BE_analyzer(Lids, length, blazhko_can_ztf_per, BE_candidates_ztf_per, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [19]:
BE_candidates_ztf_per = analysis.get_save_data()
BE_candidates_ztf_per.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,1.4143,227.7904,0.0536,5.3214,1,0.08,0.0,Z,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,1.6618,277.0083,0.0826,9.4402,1,0.16,4e-05,Z,,
2,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,1.6691,350.8772,0.1043,15.1599,1,0.2,0.0,L,,


### LINEAR periodogram

In [None]:
length = blazhko_can_linear_per.shape[0]

BE_candidates_linear_per = pd.DataFrame(())
analysis = BE_analyzer(Lids, length, blazhko_can_linear_per, BE_candidates_linear_per, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_linear_per = analysis.get_save_data()
BE_candidates_linear_per.head()

### LINEAR Chi2

In [None]:
length = blazhko_can_linear_chi.shape[0]

BE_candidates_linear_chi = pd.DataFrame(())
analysis = BE_analyzer(Lids, length, blazhko_can_linear_chi, BE_candidates_linear_chi, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_linear_chi = analysis.get_save_data()
BE_candidates_linear_chi.head()

### ZTF Chi2

In [None]:
length = blazhko_can_ztf_chi.shape[0]

BE_candidates_ztf_chi = pd.DataFrame(())
analysis = BE_analyzer(Lids, length, blazhko_can_ztf_chi, BE_candidates_ztf_chi, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_ztf_chi = analysis.get_save_data()
BE_candidates_ztf_chi.head()