In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#| code-fold: false
# IMPORTING LIBRARIES
# --------------------

# AstroML
from astroML.datasets import fetch_LINEAR_sample
from astropy.timeseries import LombScargle
from astroML.datasets import fetch_LINEAR_sample
from astroML.datasets import fetch_LINEAR_geneva
from astropy.timeseries import TimeSeries
from astropy.table import Table
from astroML.time_series import MultiTermFit

# ZTF
from ztfquery import lightcurve

# Basic libraries
import pickle
import os
import sys
from tqdm import tqdm

# Plotting
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import ticker
import matplotlib.colors as mcolors
from matplotlib.font_manager import FontProperties

# DataFrame analysis
import pandas as pd

# Math libraries
import numpy as np
import scipy as sc
from scipy.stats import norm

# CONFIGURATION
# -------------
sns.set_theme(style='white') # setting the theme for plotting
sys.path.insert(0,'../src/')
np.random.seed(42)

# configuring plotting colors
colors = ['#465BBB', '#3F8FCE', '#7ABBCE', '#3A3865', '#A82F43', '#612A37', '#DC5433', '#F29457']
b1 = '#465BBB'
b2 = '#3F8FCE'
b3 = '#7ABBCE'
b4 = '#3A3865'
black1 = '#22212A'
black2 = '#2D1E21'
or1 = '#A82F43'
or2 = '#612A37'
or3 = '#DC5433'
or4 = '#F29457'
muted_colors = sns.set_palette(sns.color_palette(colors))

# configuring fonts for plotting
font = FontProperties()
font.set_family('serif')
font.set_name('Andale Mono')
font.set_style('normal')
#font.set_size('normal')


%matplotlib inline 

In [3]:
#| code-fold: false
# Importing custom libraries
# ----------------------------
from data_access import*
from helper import*
from RR_lyrae_params import*
from blazhko_analysis import*
from BE_plotting import*

# Visual analysis

The final step of this project is to visually analyze and confirm the algorithms selection of Blazhko candidates. We perform this operation using a custom made interface that displays 4 graphs, letting the user analyze them and click 'KEEP' if it confirms the star is Blazhko, or 'CONTINUE' if it is not.

In [4]:
# DATA
# ======
# LINEAR data
dataL = fetch_LINEAR_sample(data_home='../inputs')

# calcualted data
end = 'ffinal'

# light curve table
lc_analysis = pd.read_csv("../outputs/LC_ANALYSIS_"+end+".csv")
# rr lyrae
rrlyrae = pd.read_csv('../outputs/Lrrlyr_unprocessed.csv')
# blazhko candidates
blazhko_candidates = pd.read_csv('../outputs/blazhko_canmodfinal.csv')
# periodogram data
periodogr = pickle.load(open("../outputs/periodograms_"+end+".pkl", "rb"))
# fits data
fits = np.load("../outputs/fits_"+end+".npy", allow_pickle=True)
# ztf data
ztf_data = np.load("../outputs/ztf_data_filter"+end+".npy", allow_pickle=True)

This process was conducted in 4 phases:
1. ### The phased graph
Firstly, the shape and noisiness of the phased light curves were examined. If the light curve had the correct shape, there wasn't too much noise ($\chi^2$ wasn't too high) and there appeared to be smearing of the phased light curve ( a sign of modulation ), the light curve passed the first phase.

2. ### The periodogram
Secondly, the correctness of the algorithm in recognizing the blazhko frequency was looked at. If the blazhko peaks are right next to the yearly alias and not statistically significant, or if the algorithm detected a false signal, the light curve wouldn't pass this phase.

3. ### The full data
Thirdly, the general shape of the light curve was looked at. If the data showed a wave-like pattern, it would indicate the presence of amplitude modulation.

4. ### Seasons of observation
Finally, the most important phase, where we analyzed the light curve fit for each observation season, comparing how the light curve changed depending on the phased and normalized graph. We look for a translation in the x-coordinate, or a *phase modulation*, or if the width of the light curve changes during seasons, we found *amplitude modulation*.

A star can satisfy all 4 criteria, the first and final stage, or just the second stage to be considered a Blazhko star.

We split up our 409 candidates into 3 categories for more efficient visual analysis:
1. ZTF periodogram group
2. LINEAR periodogram group
3. Other metrics group

## ZTF periodogram group

In [None]:
# ZTF PERIODOGRAM
# =================
ztf_per = category_analysis(blazhko_candidates, fits, periodogr, ztf_data, dataL,id_list=None,parameter='IndicatorType', value='Z')

In [None]:
blazhko_analyzer = ztf_per.get_save_data()
end = 'Z_p'
blazhko_analyzer.to_csv("../outputs/group"+end+".csv", index=False)

print(blazhko_analyzer.shape)

## LINEAR periodogram group

In [None]:
# LINEAR PERIODOGRAM
# ======================
linear_per = category_analysis(blazhko_candidates, fits, periodogr, ztf_data, dataL,id_list=None,parameter='IndicatorType', value='L')

In [None]:
blazhko_analyzerl = linear_per.get_save_data()
end = 'L_p'
blazhko_analyzerl.to_csv("../outputs/group"+end+".csv", index=False)

print(blazhko_analyzerl.shape)

## Score group

In [None]:
# OTHER
# =======
be_other = blazhko_candidates[(blazhko_candidates['IndicatorType']!= 'Z') & (blazhko_candidates['IndicatorType']!= 'L')]
be_other = be_other.reset_index(drop=True)
print(be_other.shape)
be_other.head()

In [None]:
score = category_analysis(be_other, fits, periodogr, ztf_data, dataL, 'other')

In [None]:
blazhko_analyzerother = score.get_save_data()
end = 'score'
blazhko_analyzerother.to_csv("../outputs/group"+end+".csv", index=False)

print(blazhko_analyzerother.shape)

---

In [None]:
FINAL_BE_CATALOGUE = pd.DataFrame()
FINAL_BE_CATALOGUE = pd.concat([FINAL_BE_CATALOGUE, blazhko_analyzer.reset_index(drop=True)],ignore_index=True, axis=0)
FINAL_BE_CATALOGUE = pd.concat([FINAL_BE_CATALOGUE, blazhko_analyzerl.reset_index(drop=True)],ignore_index=True, axis=0)
FINAL_BE_CATALOGUE = pd.concat([FINAL_BE_CATALOGUE, blazhko_analyzerother.reset_index(drop=True)],ignore_index=True, axis=0)

print(FINAL_BE_CATALOGUE.shape)
FINAL_BE_CATALOGUE.head()

In [None]:
FINAL_BE_CATALOGUE.to_csv('../outputs/FINAL_BE_CATALOGUE.csv', index=False)

---

# Analysis of results

Here, we present the final stylized version of all the data collected so far, including a large table from all RR Lyrae with their designated flags for not a candidate, candidate, and confirmed blazhko star. We also present graphs which further analyze our results.

In [None]:
lctype = []

for i in range(len(list(rrlyrae['LINEARobjectID']))):
    lctype.append((rrlyrae['LINEARobjectID'][i], rrlyrae['LCtype'][i]))

be_lctype = []
for i in list(lc_analysis['LINEAR id']):
    for n in lctype:
        if i==n[0]:
            be_lctype.append(n)
print(len(be_lctype))

LCTYPE = [x[1] for x in be_lctype]

lc_analysis['LCtype'] = LCTYPE
lc_analysis.head()

## Creating the master table

In [5]:
lc_analysis.head()

Unnamed: 0,LINEAR id,Plinear,Pztf,Pmean,Pratio,NdataLINEAR,L_chi2dof,L_chi2dofR,Lmean_chi2dof,Lmean_chi2dofR,...,MainPeakZ,BlazhkoPeakZ,BlazhkoPeriodZ,BpowerRatioZ,BsignificanceZ,Ampl_diff,dP,IndicatorType,ChiType,BE_score
0,29848,0.55702,0.55704,0.55703,1.000036,301,3.0,1.4,3.3,1.6,...,1.7952,1.7982,333.3333,0.2573,10.0729,0.37,4e-05,,,
1,50402,0.643303,0.643294,0.643298,0.999986,284,0.6,0.7,0.6,0.7,...,1.5545,1.5918,26.842,0.0027,8.9208,0.21,1e-05,,,
2,62892,0.530776,0.530785,0.53078,1.000017,276,1.1,0.9,1.1,1.0,...,1.884,1.9433,16.8634,0.0048,13.1609,0.02,2e-05,,,
3,91437,0.674733,0.674737,0.674735,1.000006,177,2.8,1.3,2.8,1.3,...,1.4821,1.4849,355.8719,0.0233,16.1566,0.34,1e-05,,,
4,95250,0.31387,0.313876,0.313873,1.000019,222,0.8,0.8,0.9,0.8,...,3.186,3.1889,342.4658,0.0028,13.4528,0.02,2e-05,,,


In [None]:
lc_MASTER = pd.DataFrame(())
lc_MASTER['LINEAR ID'] = lc_analysis['LINEAR id']
lc_MASTER['Plinear'] = lc_analysis['Plinear']
lc_MASTER['Pztf'] = lc_analysis['Pztf']
lc_MASTER['N_L'] = lc_analysis['NdataLINEAR']
lc_MASTER['N_Z'] = lc_analysis['NdataZTF']
lc_MASTER['L_chi2r'] = lc_analysis['L_chi2dofR']
lc_MASTER['Z_chi2r'] = lc_analysis['Zchi2dofR']
lc_MASTER['L_chi2'] = lc_analysis['L_chi2dof']
lc_MASTER['Z_chi2'] = lc_analysis['Zchi2dof']
lc_MASTER['Lampl'] = lc_analysis['Lampl']
lc_MASTER['Zampl'] = lc_analysis['Zampl']
lc_MASTER['Ampl_diff'] = lc_analysis['Ampl_diff']
lc_MASTER['BpeakL'] = lc_analysis['BlazhkoPeakL']
lc_MASTER['BpeakZ'] = lc_analysis['BlazhkoPeakZ']
lc_MASTER['BperiodL'] = lc_analysis['BlazhkoPeriodL']
lc_MASTER['BperiodZ'] = lc_analysis['BlazhkoPeriodZ']
lc_MASTER['LCtype'] = lc_analysis['LCtype']
lc_MASTER['Periodogram_f'] = '-'
lc_MASTER['B_score'] = 0  # Assuming a default value of 0 for B_score
lc_MASTER['Blazhko_f'] = '-1'  # Default value

In [None]:
lcLids = list(lc_MASTER['LINEAR ID'])
bcLids = list(blazhko_candidates['LINEAR id'])
bLids = list(FINAL_BE_CATALOGUE['LINEAR id'])

In [None]:
for n, i in enumerate(lcLids):
    for m, j in enumerate(bcLids):
        if i == j:
            if blazhko_candidates.loc[m, 'BE_score'] > 0:
                lc_MASTER.loc[n, 'B_score'] = blazhko_candidates.loc[m, 'BE_score']
            for nm, k in enumerate(bLids):
                if i == k:
                    if FINAL_BE_CATALOGUE.loc[nm, 'IndicatorType'] == 'Z' or FINAL_BE_CATALOGUE.loc[nm, 'IndicatorType'] == 'L' or FINAL_BE_CATALOGUE.loc[nm, 'IndicatorType'] == 'LZ':
                        lc_MASTER.loc[n, 'Periodogram_f'] = FINAL_BE_CATALOGUE.loc[nm, 'IndicatorType']
                        lc_MASTER.loc[n, 'Blazhko_f'] = '2'
                    else:
                        lc_MASTER.loc[n, 'Blazhko_f'] = '1'
                    break  
                else:
                    lc_MASTER.loc[n, 'Blazhko_f'] = '0'
            break 

In [None]:
lc_MASTER.head()

In [None]:
lc_MASTER.to_csv("../outputs/lc_MASTER.csv", index=False)

In [None]:
with open('../PAPER/lc_master.tex', "w") as f:
    f.write(lc_MASTER.to_latex(index=False))

---

### Selecting the Blazhko stars from the main catalogue

In [None]:
b_MAIN = pd.DataFrame(columns=lc_MASTER.columns)
lids = list(lc_MASTER['LINEAR ID'])

for n, i in enumerate(lids):
    if lc_MASTER.loc[n, 'Blazhko_f'] in [0, 1, 2]:  # Check for valid Blazhko_f values
        row = lc_MASTER.iloc[[n]]  # Extract the row as a DataFrame
        b_MAIN = pd.concat([b_MAIN, row], ignore_index=True)

print(b_MAIN.shape)
b_MAIN.head()

---

# Plots

In [None]:
fig = plt.figure(figsize=(10,10))
ax = plt.axes(xscale='linear', yscale='linear') 
ax.xaxis.set_tick_params(labelsize=18) 
ax.yaxis.set_tick_params(labelsize=18) 
plt.rc('font', size=24) 


sns.scatterplot(data=lc_MASTER, x='L_chi2r', y='Z_chi2r', marker='x', color=b1)
sns.scatterplot(data=FINAL_BE_CATALOGUE, x='L_chi2dofR', y='Zchi2dofR', marker='o', color=or3)
plt.xlim([0, 20])
plt.ylim([0, 20])
plt.xlabel(r'LINEAR $\chi^2_{dof}$', fontproperties=font, fontsize=22)
plt.ylabel(r'ZTF $\chi^2_{dof}$', fontproperties=font,fontsize=22)
plt.plot([1.8, 1.8], [0, 20], ls='--', c='black')
plt.plot([3.0, 3.0], [0, 20], ls='--', c='black')
plt.plot([0, 20.0], [2, 2], ls='--', c='black')
plt.plot([0, 20.0], [4, 4], ls='--', c='black')


#plt.savefig('../img_rsc/chi_scatter_zi.png', dpi=150)
plt.show()

- number of blazhko stars
- algorithm success
- incidence rate