In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#| code-fold: false
# IMPORTING LIBRARIES
# --------------------

# AstroML
from astroML.datasets import fetch_LINEAR_sample
from astropy.coordinates import SkyCoord
import astropy.units as u

# Basic libraries
import pickle
import os
import sys
from tqdm import tqdm

# Plotting
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.font_manager import FontProperties

# DataFrame analysis
import pandas as pd

# Math libraries
import numpy as np
import scipy as sc
from scipy.stats import norm

# CONFIGURATION
# -------------
sns.set_theme() # setting the theme for plotting
sys.path.insert(0,'../src/')
np.random.seed(42)

# configuring plotting colors
colors = ['#5F6372', '#79A8A4', '#B2AD8F', '#92A186', '#AD8082']
blue = '#5F6372'
turqoise = '#79A8A4'
light_green = '#B2AD8F'
green = '#92A186'
pink = '#AD8082'
muted_colors = sns.set_palette(sns.color_palette(colors))

# configuring fonts for plotting
font = FontProperties()
font.set_family('avenir')
font.set_name('Big Caslon')
font.set_style('normal')
font.set_size('xx-large')


%matplotlib inline  

In [3]:
#| code-fold: false
# Importing custom libraries
# ----------------------------
sys.path.insert(0,'../src/')
from BE_plotting import*
#from RR_lyrae_params import*
from blazhko_analysis import*

## Importing data

The first step is to import the `LINEAR` and `ZTF` data, as well as the data calculated in the previous notebook.

In [4]:
#| code-fold: false
# DATA
dataLINEAR = fetch_LINEAR_sample(data_home='../inputs') # fetching the data from astroML data library
end = 'FINAL_FINAL_FULL'
#| code-fold: false
# DATA from previous notebook
lc_analysis = pd.read_csv('../outputs/LC_ANALYSIS_FINALFINAL_FINAL_FULL.csv')

lc_analysis['MainPeakL'] = pd.to_numeric(lc_analysis['MainPeakL'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeakL'] = pd.to_numeric(lc_analysis['BlazhkoPeakL'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeriodL'] = pd.to_numeric(lc_analysis['BlazhkoPeriodL'], errors='coerce').fillna(0)
lc_analysis['BpowerRatioL'] = pd.to_numeric(lc_analysis['BpowerRatioL'], errors='coerce').fillna(0)
lc_analysis['BsignificanceL'] = pd.to_numeric(lc_analysis['BsignificanceL'], errors='coerce').fillna(0)
lc_analysis['MainPeakZ'] = pd.to_numeric(lc_analysis['MainPeakZ'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeakZ'] = pd.to_numeric(lc_analysis['BlazhkoPeakZ'], errors='coerce').fillna(0)
lc_analysis['BlazhkoPeriodZ'] = pd.to_numeric(lc_analysis['BlazhkoPeriodZ'], errors='coerce').fillna(0)
lc_analysis['BpowerRatioZ'] = pd.to_numeric(lc_analysis['BpowerRatioZ'], errors='coerce').fillna(0)
lc_analysis['BsignificanceZ'] = pd.to_numeric(lc_analysis['BsignificanceZ'], errors='coerce').fillna(0)

lc_analysis['Pratio'] = round(lc_analysis['Pratio'], 4)
lc_analysis['Plinear'] = round(lc_analysis['Plinear'], 6)
lc_analysis['Pztf'] = round(lc_analysis['Pztf'], 6)
lc_analysis['Lampl'] = round(lc_analysis['Lampl'], 2)
lc_analysis['Zampl'] = round(lc_analysis['Zampl'], 2)
lc_analysis['L_chi2dofR'] = round(lc_analysis['L_chi2dofR'], 1)
lc_analysis['Lmean_chi2dofR'] = round(lc_analysis['Lmean_chi2dofR'], 1)
lc_analysis['Zchi2dofR'] = round(lc_analysis['Zchi2dofR'], 1)
lc_analysis['Zmean_chi2dofR'] = round(lc_analysis['Zmean_chi2dofR'], 1)
lc_analysis['L_chi2dof'] = round(lc_analysis['L_chi2dof'], 1)
lc_analysis['Lmean_chi2dof'] = round(lc_analysis['Lmean_chi2dof'], 1)
lc_analysis['Zchi2dof'] = round(lc_analysis['Zchi2dof'], 1)
lc_analysis['Zmean_chi2dof'] = round(lc_analysis['Zmean_chi2dof'], 1)
lc_analysis['Ampl_diff'] = round(abs(lc_analysis['Lampl'] - lc_analysis['Zampl']), 2)
lc_analysis['dP'] = round(abs((lc_analysis['Plinear']-lc_analysis['Pztf'])/lc_analysis['Pmean']),5)
lc_analysis['MainPeakL'] = round(lc_analysis['MainPeakL'], 4)
lc_analysis['BlazhkoPeakL'] =round(lc_analysis['BlazhkoPeakL'], 4)
lc_analysis['BlazhkoPeriodL'] = round(lc_analysis['BlazhkoPeriodL'], 4)
lc_analysis['BpowerRatioL'] = round(lc_analysis['BpowerRatioL'], 4)
lc_analysis['BsignificanceL'] = round(lc_analysis['BsignificanceL'], 4)
lc_analysis['MainPeakZ'] = round(lc_analysis['MainPeakZ'], 4)
lc_analysis['BlazhkoPeakZ'] = round(lc_analysis['BlazhkoPeakZ'], 4)
lc_analysis['BlazhkoPeriodZ'] = round(lc_analysis['BlazhkoPeriodZ'], 4)
lc_analysis['BpowerRatioZ'] = round(lc_analysis['BpowerRatioZ'], 4)
lc_analysis['BsignificanceZ'] = round(lc_analysis['BsignificanceZ'], 4)
lc_analysis['IndicatorType'] = 'NaN'
lc_analysis['ChiType'] = 'NaN'
lc_analysis['BE_score'] = 'NaN'

periodogr = pickle.load(open("../outputs/periodograms_"+end+".pkl", "rb"))
fits = np.load("../outputs/fits_"+end+".npy", allow_pickle=True)
ztf_data = np.load("../outputs/ztf_data_filter"+end+".npy", allow_pickle=True)

In [5]:
lc_analysis.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,29848,0.55702,0.557035,0.557,1.0,301,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1,0.0,3e-05,,,
1,50402,0.643303,0.643294,0.6433,1.0,284,0.105976,0.6,0.7,0.107611,...,1.9161,303.4901,0.0146,2.0748,1,0.1,1e-05,,,
2,62892,0.530776,0.530786,0.5308,1.0,276,0.108535,1.1,0.9,0.131104,...,1.0256,42.9553,0.0618,4.7729,1,0.2,2e-05,,,
3,91437,0.674733,0.674737,0.6747,1.0,177,0.040739,2.8,1.3,0.073285,...,1.541,16.9635,0.0328,5.4267,1,0.16,1e-05,,,
4,95250,0.31387,0.313877,0.3139,1.0,222,0.162522,0.8,0.8,0.865595,...,1.0929,32.3729,0.0969,8.2018,2,0.1,2e-05,,,


# Determination of BE candidates

(write up entire explanation of the algorithm)

In [6]:
blazhko_can = pd.DataFrame(())
blazhko_can = blazhko_determine(lc_analysis, blazhko_can)
end = 'final'
blazhko_can.to_csv("../outputs/blazhko_can"+end+".csv", index=False)

In [7]:
blazhko_can.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,227.7904,0.0536,5.3214,1,0.08,0.0,Z,,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,277.0083,0.0826,9.4402,1,0.16,4e-05,Z,,,
2,798477,0.651627,0.651611,0.6516,1.0,294,0.0,0.0,0.0,0.0,...,309.119,143.7553,35.9709,1,0.0,2e-05,Z,,,
3,843294,0.374216,0.748404,0.5613,1.9999,290,0.123428,3.3,1.4,0.933846,...,314.4654,0.1448,9.3913,2,0.06,0.66665,Z,,,
4,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,350.8772,0.1043,15.1599,1,0.2,0.0,L,,,


In [8]:
blazhko_can.shape

(208, 40)

# Determining vallidity of BE candidates

In order to make visual interpretation easier and more robust, we are dividing our BE candidates into smaller tables. The categories are:
- LINEAR periodogram parameter satisfied
- ZTF periodogram parameter satisfied
- Chi2 parameter satisfied
    - ZTF Chi2 parameter
    - LINEAR Chi2 parameter

The last three are based on the `ChiType` for determining LINEAR or ZTF parameter satisfaction. The periodogram parameter is determined by the `IndicatorType` column.

In [9]:
# ZTF data periodogram
blazhko_can_ztf_per = blazhko_can.loc[(blazhko_can['IndicatorType'] == 'Z')]
blazhko_can_ztf_per = blazhko_can_ztf_per.reset_index(drop=True)
print(blazhko_can_ztf_per.shape)
blazhko_can_ztf_per.head()

(111, 40)


Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,227.7904,0.0536,5.3214,1,0.08,0.0,Z,,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,277.0083,0.0826,9.4402,1,0.16,4e-05,Z,,,
2,798477,0.651627,0.651611,0.6516,1.0,294,0.0,0.0,0.0,0.0,...,309.119,143.7553,35.9709,1,0.0,2e-05,Z,,,
3,843294,0.374216,0.748404,0.5613,1.9999,290,0.123428,3.3,1.4,0.933846,...,314.4654,0.1448,9.3913,2,0.06,0.66665,Z,,,
4,1005497,0.653607,0.653604,0.6536,1.0,607,0.073102,2.1,1.1,0.076865,...,52.6316,0.0963,7.5464,1,0.09,0.0,Z,,,


In [10]:
# LINEAR data periodogram
blazhko_can_linear_per = blazhko_can.loc[(blazhko_can['IndicatorType'] == 'L')]
blazhko_can_linear_per = blazhko_can_linear_per.reset_index(drop=True)
print(blazhko_can_linear_per.shape)
blazhko_can_linear_per.head()

(53, 40)


Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,350.8772,0.1043,15.1599,1,0.2,0.0,L,,,
1,1212611,0.630896,0.630893,0.6309,1.0,297,0.049308,0.9,0.9,0.052035,...,18.5995,0.0277,6.1929,1,0.17,0.0,L,,,
2,1876491,0.760128,0.760123,0.7601,1.0,301,0.110849,2.5,1.2,0.138102,...,202.2245,0.0477,8.5224,1,0.11,1e-05,L,,,
3,1890167,0.651279,0.0,0.3256,0.0,269,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1,0.0,2.00024,L,,,
4,2462460,0.682155,0.0,0.3411,0.0,309,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1,0.0,1.99987,L,,,


In [11]:
# Chi2 ZTF parameter
blazhko_can_ztf_chi = blazhko_can.loc[(blazhko_can['ChiType'] == 'Z')]
blazhko_can_ztf_chi = blazhko_can_ztf_chi.reset_index(drop=True)
print(blazhko_can_ztf_chi.shape)
blazhko_can_ztf_chi.head()

(33, 40)


Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,924301,0.507503,0.50744,0.5075,0.9999,418,0.095515,13.8,1.9,0.088003,...,47.081,0.0403,7.7536,1,0.28,0.00012,,Z,7,period
1,1092244,0.649496,0.649558,0.6495,1.0001,590,0.106827,2.3,1.2,0.110552,...,13.65,0.0724,1.0003,1,0.25,0.0001,,Z,6,period
2,2041979,0.653694,0.653639,0.6537,0.9999,276,0.134647,1.5,1.2,0.137441,...,207.2539,0.0164,9.8786,1,0.3,8e-05,,Z,6,period
3,2050107,0.686454,0.686466,0.6865,1.0,190,0.089416,16.4,3.9,0.124859,...,350.2627,0.0275,12.8492,1,0.23,2e-05,,Z,10,amp
4,3139572,0.211316,0.366613,0.289,1.7349,275,0.232189,2.0,1.2,1.013054,...,268.4564,0.0236,11.2099,2,0.12,0.53736,,Z,7,period


In [12]:
# Chi2 LINEAR parameter
blazhko_can_linear_chi = blazhko_can.loc[(blazhko_can['ChiType'] == 'L')]
blazhko_can_linear_chi = blazhko_can_linear_chi.reset_index(drop=True)
print(blazhko_can_linear_chi.shape)
blazhko_can_linear_chi.head()

(9, 40)


Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,3218931,0.693979,0.693941,0.694,0.9999,274,0.049901,28.8,2.6,0.067145,...,354.6099,0.0791,11.6797,1,0.21,5e-05,,L,6,period
1,4670587,0.357698,1.07312,0.7154,3.0001,555,0.219086,31.8,4.1,0.186476,...,297.619,0.6133,0.5715,2,0.02,1.00003,,L,6,period
2,7555727,0.990143,0.393012,0.6916,0.3969,260,0.393559,7.6,2.9,0.788193,...,287.3563,0.0065,9.7436,2,0.05,0.86341,,L,6,period
3,8640362,0.33272,0.688383,0.5106,2.069,444,0.134056,12.6,4.1,1.143575,...,23.7135,0.0349,6.1598,2,1.0,0.69656,,L,8,period
4,12252043,0.340085,0.704536,0.5223,2.0716,188,0.310593,12.7,3.1,0.435987,...,102.0408,0.002,5.9673,1,0.02,0.69778,,L,6,period


In [13]:
blazhko_can_other= blazhko_can.loc[(blazhko_can['ChiType'] != 'L')&(blazhko_can['ChiType'] != 'Z')&(blazhko_can['IndicatorType'] != 'L')&(blazhko_can['IndicatorType'] != 'Z')]
blazhko_can_other = blazhko_can_other.reset_index(drop=True)
print(blazhko_can_other.shape)
blazhko_can_other.head()

(2, 40)


Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,LC_type,Ampl_diff,dP,IndicatorType,ChiType,BE_score,period_vs_amp
0,8428593,0.955946,0.468273,0.7121,0.4899,299,0.133885,51.2,4.6,0.499911,...,357.1429,0.0354,14.3974,1,0.2,0.68484,,,6,period
1,24010683,0.588512,0.370107,0.4793,0.6289,569,0.097862,1.1,0.9,1.48873,...,11.2114,0.0374,9.1061,1,0.22,0.45567,,,6,period


# Visual inspection of every category

In [None]:
# ZTF periodogram
length = blazhko_can_ztf_per.shape[0]
Lids_ztf_per = blazhko_can_ztf_per['LINEAR id'].to_numpy()
print(length)

BE_candidates_ztf_per = pd.DataFrame(())
analysis = BE_analyzer(Lids_ztf_per, length, blazhko_can_ztf_per, BE_candidates_ztf_per, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_ztf_per = analysis.get_save_data()
end = 'ztf_per'
BE_candidates_ztf_per.to_csv("../outputs/blazhko_list"+end+".csv", index=False)
BE_candidates_ztf_per.head()

In [None]:
# LINEAR periodogram
lengthL = blazhko_can_linear_per.shape[0]
Lids_linear_per = blazhko_can_linear_per['LINEAR id'].to_numpy()
print(lengthL)

BE_candidates_linear_per = pd.DataFrame(())
analysis = BE_analyzer(Lids_linear_per, lengthL, blazhko_can_linear_per, BE_candidates_linear_per, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_linear_per = analysis.get_save_data()
end = 'linear_per'
BE_candidates_linear_per.to_csv("../outputs/blazhko_list"+end+".csv", index=False)
BE_candidates_linear_per.head()

In [None]:
# LINEAR chi2
lengthLch = blazhko_can_linear_chi.shape[0]
Lids_linear_chi = blazhko_can_linear_chi['LINEAR id'].to_numpy()
print(lengthLch)

BE_candidates_linear_chi = pd.DataFrame(())
analysis = BE_analyzer(Lids_linear_chi, lengthLch, blazhko_can_linear_chi, BE_candidates_linear_chi, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_linear_chi = analysis.get_save_data()
end = 'linear_chi'
BE_candidates_linear_chi.to_csv("../outputs/blazhko_list"+end+".csv", index=False)
BE_candidates_linear_chi.head()

In [None]:
# ZTF chi2
lengthZ_chi = blazhko_can_ztf_chi.shape[0]
Lids_ztf_chi = blazhko_can_ztf_chi['LINEAR id'].to_numpy()
print(lengthZ_chi)

BE_candidates_ztf_chi = pd.DataFrame(())
analysis = BE_analyzer(Lids_ztf_chi, lengthZ_chi, blazhko_can_ztf_chi, BE_candidates_ztf_chi, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_candidates_ztf_chi = analysis.get_save_data()
end = 'ztf_chi'
BE_candidates_ztf_chi.to_csv("../outputs/blazhko_list"+end+".csv", index=False)
BE_candidates_ztf_chi.head()

In [None]:
# other parameters
length = blazhko_can_other.shape[0]
lids = blazhko_can_other['LINEAR id'].to_numpy()
print(length)

BE_other = pd.DataFrame(())
analysis = BE_analyzer(lids, length, blazhko_can_other, BE_other, fits, periodogr, ztf_data, dataLINEAR)
analysis.display_interface()

In [None]:
BE_other = analysis.get_save_data()
end = 'other'
BE_other.to_csv("../outputs/blazhko_list"+end+".csv", index=False)
BE_other.head()