In [1]:
import pandas as pd
from lcfunctions import lasair_clean, load_lasair_lc
from featureextractor import FeatureExtractor
from metadatafeatures import gaiadr3append
from raw_processing import label_schemes
import warnings
warnings.filterwarnings("ignore")

### Load examples and labels

In [2]:
# Load dataset
cvs_df = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_BTS_manual_labelled.csv')

# Process the dataframe
dataset = label_schemes(cvs_df)
dataset

Unnamed: 0,oid_ztf,oid_aavso,type_aavso,ra,dec,Eclipsing,CV_Types,CV_subtypes,CV_subsubtypes,eclipse_clear,manual_label,Clarity,labels_1,labels_2,labels_3,labels_4
0,ZTF18abryuah,ASASSN-19dp,AM,35.745917,43.653639,0,polar,,,0.0,AM_Her,1.0,polar,polar,polar,magnetic
1,ZTF18abtrvgp,BMAM-V789,AM,38.492292,41.623972,0,polar,,,0.0,AM_Her,1.0,polar,polar,polar,magnetic
2,ZTF17aaaehby,CSS 091026:002637+242916,AM,6.654417,24.487694,0,polar,,,0.0,AM_Her,1.0,polar,polar,polar,magnetic
3,ZTF18abgjgiq,MGAB-V3453,AM,8.972375,43.561528,0,polar,,,0.0,AM_Her,1.0,polar,polar,polar,magnetic
4,ZTF18abumlux,MGAB-V3769,AM,353.638167,40.430611,0,polar,,,0.0,AM_Her,1.0,polar,polar,polar,magnetic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5703,ZTF22aazmooy,,N,10.539250,41.258667,0,nova,,,0.0,nova_decline,1.0,nova_decline,nova,nova,nova
5704,ZTF22abfxmpc,,N,10.625167,41.936806,0,nova,,,0.0,nova,1.0,nova,nova,nova,nova
5705,ZTF22abnrgno,,N,10.941000,41.266278,0,nova,,,0.0,nova,1.0,nova,nova,nova,nova
5706,ZTF22abtltcw,,N,10.357167,40.739806,0,nova,,,0.0,nova_decline,1.0,nova_decline,nova,nova,nova


### Extract custom features

In [3]:
save = False
date = '2023_03_25'
# List of objects.
lst = dataset.oid_aavso.to_list()

objlist = dataset['oid_ztf'].to_list()[0:0]
feature_df = pd.DataFrame()
for obj in objlist:
    type = dataset[dataset['oid_ztf']==obj]['type_aavso'].iloc[0]
    print(obj, type)

    # Load and process lasair light curve
    lc_test = load_lasair_lc(oid=obj, path=f'../lightcurves_dataset/lasair_{date}')
    lc_appmag_test = lasair_clean(lc_test, limit=25, magerrlim=1)

    # Create a copy of the light curve
    lc = lc_appmag_test.copy()

    # Extract features
    fe = FeatureExtractor(lc=lc)
    feets = fe.extract_feets()
    custom = fe.extract_custom()
    
    # Conactenate custom features to feets
    features_single = pd.concat([feets, custom], axis=1)

    # Add features to dataframe
    feature_df = feature_df.append(features_single, ignore_index=True)

# Identify columns with zero std
zero_std = feature_df.columns[feature_df.std() == 0]
# Drop columns with zero std
feature_df.drop(zero_std, axis=1, inplace=True)

# Add source data to the dataframe
lc_features_df = pd.concat([dataset, feature_df], axis=1)

if save == True:
    # Save the dataframe
    lc_features_df.to_csv(f'../processed_data/lc_features.csv', index=False)

# Load the dataframe
lc_features_df = pd.read_csv(f'../processed_data/lc_features.csv')

# Change NaN in column 8 to None
lc_features_df.iloc[:,8] = lc_features_df.iloc[:,8].fillna('None')

# Save this dataframe
lc_features_df.to_csv(f'../processed_data/lc_features.csv', index=False)

### Extract Gaia features

In [6]:
gaia_df = gaiadr3append(dataset,'oid_ztf')
# Save dataframe
gaia_df.to_csv(f'../processed_data/gaia_features.csv', index=False)
# Load dataframe
gaia_df = pd.read_csv(f'../processed_data/gaia_features.csv')

gaia_df

INFO: Login to gaia TAP server [astroquery.gaia.core]
OK
INFO: Login to gaia data server [astroquery.gaia.core]
OK
500 Error 500:
Can not execute the following SQL: 
DROP TABLE user_dmistry.ast_table
Because: ERROR: table "ast_table" does not exist
500 Error 500:
Can not execute the following SQL: 
DROP TABLE user_dmistry.xmatch_table
Because: ERROR: table "xmatch_table" does not exist
Sending pytable.
Uploaded table 'ast_table'.
Retrieving table 'user_dmistry.ast_table'
Table 'user_dmistry.ast_table' updated.
INFO: Query finished. [astroquery.utils.tap.core]
Table 'ast_table' deleted.
Removed jobs: '['1684063095288O']'.
INFO: Gaia TAP server logout OK [astroquery.gaia.core]
INFO: Gaia data server logout OK [astroquery.gaia.core]
INFO: Login to gaia TAP server [astroquery.gaia.core]
OK
INFO: Login to gaia data server [astroquery.gaia.core]
OK
500 Error 500:
Can not execute the following SQL: 
DROP TABLE user_dmistry.ast_table
Because: ERROR: table "ast_table" does not exist
500 Error 5

Unnamed: 0,oid_ztf,oid_aavso,type_aavso,ra,dec,Eclipsing,CV_Types,CV_subtypes,CV_subsubtypes,eclipse_clear,...,ag_gspphot_upper,ebpminrp_gspphot,ebpminrp_gspphot_lower,ebpminrp_gspphot_upper,ra_prop,dec_prop,distance,absmag_g,absmag_bp,absmag_rp
0,ZTF17aabpjjg,MGAB-V1107,UG,349.877167,51.838167,0,dwarf_nova,,,,...,,,,,349.877166,51.838167,-12998.128381,,,
1,ZTF19ablnrfz,XM79HT,UG,338.221333,57.001833,0,dwarf_nova,,,,...,,,,,338.221334,57.001833,773.382946,11.259908,11.905914,10.693815
2,ZTF18aabvkix,V1227 Her,UGSU+E,253.496083,20.169556,1,dwarf_nova,dwarf_nova_SU_Uma,,0.0,...,,,,,253.496083,20.169555,768.660740,8.673339,8.824500,8.237304
3,ZTF18aabhjxp,ZTF18aabhjxp,UGZ,89.448833,54.331306,0,dwarf_nova,dwarf_nova_Z_Cam,,0.0,...,0.2866,0.1436,0.1296,0.1586,89.448833,54.331306,2027.469375,4.708795,5.062490,4.114098
4,ZTF19acgftux,ZTF19acgftux,UG,276.885250,86.783306,0,dwarf_nova,,,,...,,,,,276.885260,86.783306,7163.031442,6.186868,6.696504,5.288776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5703,ZTF22aazmooy,,N,10.539250,41.258667,0,nova,,,0.0,...,,,,,,,,,,
5704,ZTF22abfxmpc,,N,10.625167,41.936806,0,nova,,,0.0,...,,,,,,,,,,
5705,ZTF22abnrgno,,N,10.941000,41.266278,0,nova,,,0.0,...,,,,,,,,,,
5706,ZTF22abtltcw,,N,10.357167,40.739806,0,nova,,,0.0,...,,,,,,,,,,


In [19]:
# Load dataframe
gaia_df = pd.read_csv(f'../processed_data/gaia_features.csv')
feature_cols = ['ra','dec'] + gaia_df.columns.tolist()[16:]

gaia_features = gaia_df[feature_cols]
# Get columns names for numerical features
num_cols = gaia_features.select_dtypes(include=['float64']).columns.to_list()
final_cols = gaia_df.columns.to_list()[0:16] + num_cols

gaia_df2 = gaia_df[final_cols]

# Save dataframe
gaia_df2.to_csv(f'../processed_data/gaia_features.csv', index=False)