In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
#| code-fold: false
# IMPORTING LIBRARIES
# --------------------

# AstroML & Astropy
from astroML.datasets import fetch_LINEAR_sample
from astropy.timeseries import LombScargle
from astroML.datasets import fetch_LINEAR_sample
from astroML.datasets import fetch_LINEAR_geneva
from astropy.timeseries import TimeSeries
from astropy.table import Table
from astroML.time_series import MultiTermFit
from astropy.coordinates import SkyCoord
import astropy.units as u


# ZTF
from ztfquery import lightcurve

# Basic libraries
import random
import pickle
import os
import sys
from tqdm import tqdm

# Plotting
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.font_manager import FontProperties
import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import interact, interactive, fixed, interact_manual

# DataFrame analysis
import pandas as pd
import dask.dataframe as dd 

# Math libraries
import numpy as np
import scipy as sc
from scipy.stats import norm
from scipy.signal import find_peaks
from array import array

In [2]:
#| code-fold: false
# CONFIG
sns.set_theme() # setting the theme for plotting
np.random.seed(42)

colors = ['#1A090D', '#D8C99B', '#D8973C', '#BD632F', '#273E47']
cmap = mcolors.ListedColormap(colors)

font = FontProperties()
font.set_family('avenir')
font.set_name('Avenir')
font.set_style('normal')
font.set_size('xx-large')

%matplotlib inline  

In [3]:
#| code-fold: false
# Importing custom libraries
# ----------------------------
sys.path.insert(0,'../src/')
from config import*
#from descriptive_stats import *
#from plots import *

from selection import *
from lc_analysis import *
from blazhko_analysis import *

Loading the data!


In [4]:
#| code-fold: false
# DATA
data = fetch_LINEAR_sample(data_home='../inputs') # fetching the data from astroML data library

In [5]:
end = 'FINAL_FINAL_FULL'

In [22]:
#| code-fold: false
# DATA from previous notebook
lc_analysis = pd.read_csv("../outputs/LC_ANALYSIS_FINAL"+end+".csv")

lc_analysis['Pratio'] = round(lc_analysis['Pratio'], 4)
lc_analysis['Plinear'] = round(lc_analysis['Plinear'], 6)
lc_analysis['Pztf'] = round(lc_analysis['Pztf'], 6)
lc_analysis['Lampl'] = round(lc_analysis['Lampl'], 2)
lc_analysis['Zampl'] = round(lc_analysis['Zampl'], 2)
lc_analysis['L_chi2dofR'] = round(lc_analysis['L_chi2dofR'], 1)
lc_analysis['Lmean_chi2dofR'] = round(lc_analysis['Lmean_chi2dofR'], 1)
lc_analysis['Zchi2dofR'] = round(lc_analysis['Zchi2dofR'], 1)
lc_analysis['Zmean_chi2dofR'] = round(lc_analysis['Zmean_chi2dofR'], 1)
lc_analysis['L_chi2dof'] = round(lc_analysis['L_chi2dof'], 1)
lc_analysis['Lmean_chi2dof'] = round(lc_analysis['Lmean_chi2dof'], 1)
lc_analysis['Zchi2dof'] = round(lc_analysis['Zchi2dof'], 1)
lc_analysis['Zmean_chi2dof'] = round(lc_analysis['Zmean_chi2dof'], 1)
lc_analysis['Ampl_diff'] = round(abs(lc_analysis['Lampl'] - lc_analysis['Zampl']), 2)
lc_analysis['dP'] = round(abs((lc_analysis['Plinear']-lc_analysis['Pztf'])/lc_analysis['Pmean']),5)
lc_analysis['IndicatorType'] = 'NaN'
lc_analysis['ChiType'] = 'NaN'
lc_analysis['BE_score'] = 'NaN'

#periodogr = pickle.load(open("../outputs/periodograms_"+end+".pkl", "rb"))
#fits = np.load("../outputs/fits_"+end+".npy", allow_pickle=True)
#ztf_data = np.load("../outputs/ztf_data_"+end+".npy", allow_pickle=True)

In [23]:
lc_analysis.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,MainPeakZ,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,29848,0.55702,0.557035,0.557,1.0,301,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3e-05,,,
1,50402,0.643303,0.643294,0.6433,1.0,284,0.105976,0.6,0.7,0.107611,...,1.912758,1.916053,303.490137,0.014553,2.074831,0.1,1e-05,,,
2,62892,0.530776,0.530786,0.5308,1.0,276,0.108535,1.1,0.9,0.131104,...,1.002304,1.025584,42.955326,0.061844,4.772949,0.2,2e-05,,,
3,91437,0.674733,0.674737,0.6747,1.0,177,0.040739,2.8,1.3,0.073285,...,1.482062,1.541012,16.963528,0.032789,5.426679,0.16,1e-05,,,
4,95250,0.31387,0.313877,0.3139,1.0,222,0.162522,0.8,0.8,0.865595,...,1.061979,1.092869,32.372936,0.096901,8.201794,0.1,2e-05,,,


In [24]:
cols = ['Plinear', 'Pztf', 'Pmean', 'Pratio', 'NdataLINEAR', 
        'L_rms', 'L_chi2dof', 'L_chi2dofR', 
        'Lmean_rms', 'Lmean_chi2dof', 'Lmean_chi2dofR', 'Lmmax', 'Lampl', 
        'NdataZTF', 'Z_rms', 'Zchi2dof', 
        'Zchi2dofR', 'Zmean_rms', 'Zmean_chi2dof', 'Zmean_chi2dofR', 
        'Zmmax', 'Zampl',
        'MainPeakL', 'BlazhkoPeakL', 'BlazhkoPeriodL','BpowerRatioL','BsignificanceL',
        'MainPeakZ', 'BlazhkoPeakZ', 'BlazhkoPeriodZ', 'BpowerRatioZ', 'BsignificanceZ']

In [29]:
def blazhko_determine(df, dfnew):
    '''
    This algorithm sorts through a DataFrame of light curve parameters and decides which are bad Blazhko Effect candidates,
    which are inter BE candidates, good BE candidates and excellent BE candidates. The parameters we use for determining
    BE candidates are amplitude, chi2 of 2 both LINEAR and ZTF, period and the periodogram analysis (sign of local peaks for BE).

    Arguments:
        df(DataFrame) = input dataframe
        dfnew(DataFrame) = new dataframe for inputing good candidates
    '''
    for i in range(df.shape[0]):
        
        # STEP 1: getting rid of trash
        # ---------
        if df['Ampl_diff'][i]<2:
            if df['L_chi2dofR'][i]<9 or df['Zchi2dofR'][i]<12 or df['Plinear'][i]<4 or df['Pztf'][i]<4:
                if df['NdataLINEAR'][i]>250 or df['NdataZTF'][i]>250:
                    # STEP 2: determine periodogram likelihood of BE
                    # ---------
                    dPmin = 0.01
                    #--- determining if LINEAR part has periodogram indication of BE ---
                    # no daily alias of main period
                    LINEAR_pd_period = (np.abs(df['Plinear'][i]-0.5)>dPmin)&(np.abs(df['Plinear'][i]-1.0)>dPmin)&(np.abs(df['Plinear'][i]-2.0)>dPmin)
                    # blazhko period must be within RR Lyrae range
                    LINEAR_pd_pB = (df['BlazhkoPeriodL'][i]>35)&(df['BlazhkoPeriodL'][i]<325) 
                    # relative strength and significance must be above certain value for it to be noticeable
                    LINEAR_pd_sig = (df['BpowerRatioL'][i]>0.05)&(df['BsignificanceL'][i]>5)
                    #--- determining if ZTF part has periodogram indication of BE ---
                    ZTF_pd_period = (np.abs(df['Pztf'][i]-0.5)>dPmin)&(np.abs(df['Pztf'][i]-1.0)>dPmin)&(np.abs(df['Pztf'][i]-2.0)>dPmin)
                    ZTF_pd_pB = (df['BlazhkoPeriodZ'][i]>35)&(df['BlazhkoPeriodZ'][i]<325) 
                    ZTF_pd_sig = (df['BpowerRatioZ'][i]>0.05)&(df['BsignificanceZ'][i]>5)
                    #---
                    BE = 0
                    if (LINEAR_pd_period&LINEAR_pd_pB&LINEAR_pd_sig):
                        BE += 1
                        df.loc[i, 'IndicatorType'] = 'L'
                    if (ZTF_pd_period&ZTF_pd_pB&ZTF_pd_sig):
                        BE += 1
                        df.loc[i, 'IndicatorType'] = 'Z'
                    # ---
                    if BE>0:
                        row = pd.DataFrame(df.iloc[[int(i)]])
                        dfnew = pd.concat([dfnew, row.reset_index(drop=True)], ignore_index=True, axis=0)
                    else:
                        # STEP 3: determine scorechart for other parameters
                        period = df['dP'][i]
                        chiL = df['L_chi2dofR'][i]
                        chiZ = df['Zchi2dofR'][i]
                        ampl = df['Ampl_diff'][i]

                        # ---

                        p_score = 0
                        chi_score = 0
                        amp_score = 0

                        # ---

                        # PERIOD
                        if period > 4e-5 and period < 0.001: p_score += 2
                        if period > 0.001: p_score += 4
                        
                        # CHI
                        if (chiL > 2.5 and chiL < 4.5):
                            chi_score += 2
                            df.loc[i, 'ChiType'] = 'L'
                        if (chiZ>2.5 and chiZ<4.5): 
                            chi_score += 2
                            df.loc[i, 'ChiType'] = 'Z'
                        if chiL>5:
                            chi_score += 3
                            df.loc[i, 'ChiType'] = 'L'
                        if chiZ>5:
                            chi_score += 3
                            df.loc[i, 'ChiType'] = 'Z'

                        # AMPL
                        if ampl>0.05 and ampl<0.15: amp_score += 1
                        if ampl>0.15 and ampl<2: amp_score += 2

                        # TOTAL SCORE
                        score = p_score + chi_score + amp_score
                        df.loc[i, 'BE_score'] = score

                        if score>5:
                            row = pd.DataFrame(df.iloc[[int(i)]])
                            dfnew = pd.concat([dfnew, row.reset_index(drop=True)], ignore_index=True, axis=0)
        else:
            pass
    return dfnew

In [30]:
blazhko_can = pd.DataFrame(())
blazhko_can = blazhko_determine(lc_analysis, blazhko_can)

In [31]:
blazhko_can.head(50)

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_rms,L_chi2dof,L_chi2dofR,Lmean_rms,...,MainPeakZ,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,439441,0.709248,0.709248,0.7092,1.0,349,0.171897,2.1,1.3,0.249301,...,1.409944,1.414334,227.790433,0.053623,5.321415,0.08,0.0,Z,,
1,664583,0.602994,0.603021,0.603,1.0,449,0.115904,1.8,1.1,0.12566,...,1.658187,1.661797,277.00831,0.082578,9.440164,0.16,4e-05,Z,,
2,798477,0.651627,0.651611,0.6516,1.0,294,0.0,0.0,0.0,0.0,...,0.163617,0.166852,309.119011,143.755306,35.97094,0.0,2e-05,Z,,
3,843294,0.374216,0.748404,0.5613,1.9999,290,0.123428,3.3,1.4,0.933846,...,1.336177,1.339357,314.465409,0.144796,9.391323,0.06,0.66665,Z,,
4,880588,0.600138,0.60014,0.6001,1.0,295,0.110419,3.2,1.2,0.19646,...,1.666267,1.669117,350.877193,0.10431,15.159875,0.2,0.0,L,,
5,924301,0.507503,0.50744,0.5075,0.9999,418,0.095515,13.8,1.9,0.088003,...,3.982313,4.003553,47.080979,0.040311,7.753614,0.28,0.00012,,Z,7.0
6,1005497,0.653607,0.653604,0.6536,1.0,607,0.073102,2.1,1.1,0.076865,...,1.530006,1.549006,52.631579,0.096277,7.546404,0.09,0.0,Z,,
7,1092244,0.649496,0.649558,0.6495,1.0001,590,0.106827,2.3,1.2,0.110552,...,1.539276,1.612536,13.650014,0.072372,1.000328,0.25,0.0001,,Z,6.0
8,1212611,0.630896,0.630893,0.6309,1.0,297,0.049308,0.9,0.9,0.052035,...,1.585073,1.638838,18.599461,0.027675,6.192928,0.17,0.0,L,,
9,1615764,0.488189,0.488191,0.4882,1.0,334,0.053845,3.0,1.5,0.077205,...,2.04838,2.0712,43.821209,0.064493,11.212751,0.24,0.0,Z,,


In [32]:
blazhko_can.shape

(203, 38)