# Build Dataset

We focus on objects classified as CV within AAVSO VSX. This resource is the most up-to-date catalogue of transients/variable stars available online. The resource also includes subtypes that are going to be important for this work.

## 1. Imports

In [1]:
import os
import lasair
import pandas as pd
from astropy import units as u
from astropy.coordinates import SkyCoord

## 2. Generate list of CVs from those catalogued in AAVSO VSX

### 2.1 Load csv of CVs and suspected CVs

See https://www.aavso.org/vsx/. From this link, I dowloaded a list of targets classified as CV by AAVSO. The variability types are detailed in https://www.aavso.org/vsx/index.php?view=about.vartypes.

In [2]:
date = '08092023'
AAVSO_CVS = pd.read_csv(f"../processed_data/AAVSOCVsraw_{date}.csv")
print(f'Number of examples: {AAVSO_CVS.shape[0]}')

# Convert coordinates to ra and dec.
AAVSO_CVS['ra'] = AAVSO_CVS.apply(lambda x: SkyCoord(f'{x["Coords"]}', unit=(u.hourangle, u.deg), equinox='J2000').ra.deg, axis=1)
AAVSO_CVS['dec'] = AAVSO_CVS.apply(lambda x: SkyCoord(f'{x["Coords"]}', unit=(u.hourangle, u.deg), equinox='J2000').dec.deg, axis=1)

AAVSO_CVS.head()

Number of examples: 15895




Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,RX And,000-BBC-342,01 04 35.54 +41 17 57.8,And,UGZ,0.209893,10.3 - 14.8 V,16.148083,41.299389
1,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556
2,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278
3,DX And,000-BCR-612,23 29 46.68 +43 45 04.0,And,UGSS,0.440502,11.0 - 15.5 V,352.4445,43.751111
4,FN And,000-BBC-497,01 11 57.60 +35 17 24.3,And,UGSS,--,12.9 - 19.3 V,17.99,35.290083


### 2.2 Identify those observable by ZTF.

Perform some processing to acquire the coordinates in degree format; select only those observable by ZTF (greater than -31 degree declination); inspect the subtypes.

In [3]:
pd.options.display.max_rows = 10

# Select those observable with ZTF.
AAVSO_CVS_1 = AAVSO_CVS.copy()[AAVSO_CVS['dec']>-31]
AAVSO_CVS_1 = AAVSO_CVS_1.reset_index(drop=True)
AAVSO_CVS_1

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,RX And,000-BBC-342,01 04 35.54 +41 17 57.8,And,UGZ,0.209893,10.3 - 14.8 V,16.148083,41.299389
1,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556
2,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278
3,DX And,000-BCR-612,23 29 46.68 +43 45 04.0,And,UGSS,0.440502,11.0 - 15.5 V,352.444500,43.751111
4,FN And,000-BBC-497,01 11 57.60 +35 17 24.3,And,UGSS,--,12.9 - 19.3 V,17.990000,35.290083
...,...,...,...,...,...,...,...,...,...
11553,ZTF20abmbcza,--,20 22 34.43 +21 02 09.6,Vul,UG:,--,17.2 - 23: r,305.643458,21.036000
11554,ZTF20abopngr,--,20 52 57.13 +26 18 08.2,Vul,UG,--,17.2 - 23.7 r,313.238042,26.302278
11555,ZTF21acatkss,--,20 48 37.96 +27 03 18.8,Vul,UG,--,18.2 - 22.3 r,312.158167,27.055222
11556,ZTF22abmlesb,000-BPL-654,20 51 42.53 +26 30 48.0,Vul,UG,--,15.8 - <23 g,312.927208,26.513333


### 2.3 Inspect CV subtypes

In [4]:
pd.options.display.max_rows = None

# Display subtypes.
print(f'Number of CVs observable with ZTF: {AAVSO_CVS_1.shape[0]}')
AAVSO_CVS_1.Type.value_counts()

Number of CVs observable with ZTF: 11558


Type
UG                    5774
UG:                   1520
UGSU                   754
CV                     608
UGSU:                  275
UGWZ                   164
AM                     151
NL/VY                  149
CV:                    139
UGSS                   127
UGZ                     93
UGZ/IW                  91
UG+E                    88
NL+E                    80
UG|SN                   79
NL                      78
SN|UG                   74
UGWZ:                   69
UGSU+E                  51
DQ                      48
N                       46
UGZ:                    43
NL/VY:                  40
AM:                     38
AM+E                    37
NL:                     37
NB                      32
N:                      29
UGER                    28
CV+E                    26
UGZ/IW:                 26
UGSU/IBWD               26
UG/IBWD                 23
UGSS:                   19
UG+E:                   18
UGSS+E                  18
IBWD                   

### 2.4 Refine subtypes

A colon ( : ) after the variability type -or any other field- means the value/classification is uncertain.

A pipe character ( | ) between two different types signifies a logical OR; the classification is uncertain and all possible types are indicated. An example of this is ELL|DSCT, where the star may be an ellipsoidal binary system or a DSCT-type pulsating variable with half the given period.


In [5]:
pd.options.display.max_rows=50

# Remove objects with uncertain classification and or have several possible types (:, |). 
AAVSO_CVS_2 = AAVSO_CVS_1[(AAVSO_CVS_1['Type'].str.contains(':', regex=True)==False)]
AAVSO_CVS_2 = AAVSO_CVS_2[(AAVSO_CVS_2['Type'].str.contains('\|', regex=True)==False)]
AAVSO_CVS_2 = AAVSO_CVS_2.reset_index(drop=True)

# Inspect dataframe for different CV transient types.
# Eclipsing = AAVSO_CVS_2[(AAVSO_CVS_2.Type.str.contains('E', regex=True)==True) & (AAVSO_CVS_2.Type!='UGER')]
# Eclipsing.Type.value_counts()
AAVSO_CVS_2

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,RX And,000-BBC-342,01 04 35.54 +41 17 57.8,And,UGZ,0.209893,10.3 - 14.8 V,16.148083,41.299389
1,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556
2,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278
3,DX And,000-BCR-612,23 29 46.68 +43 45 04.0,And,UGSS,0.440502,11.0 - 15.5 V,352.444500,43.751111
4,FN And,000-BBC-497,01 11 57.60 +35 17 24.3,And,UGSS,--,12.9 - 19.3 V,17.990000,35.290083
...,...,...,...,...,...,...,...,...,...
8731,ZTF19abasmma,--,19 54 27.43 +24 51 26.6,Vul,UG,--,17.5 - 21.6 g,298.614292,24.857389
8732,ZTF19abxzumq,--,20 54 29.51 +26 30 42.3,Vul,UG,--,15.3 - <23 g,313.622958,26.511750
8733,ZTF20abopngr,--,20 52 57.13 +26 18 08.2,Vul,UG,--,17.2 - 23.7 r,313.238042,26.302278
8734,ZTF21acatkss,--,20 48 37.96 +27 03 18.8,Vul,UG,--,18.2 - 22.3 r,312.158167,27.055222


## 3. Coordinate cross match AAVSO VSX CVs with ZTF targets within Lasair

### 3.1 Define Lasair token and create cache

In [6]:
# Tokens required for Lasair database access via the API: https://lasair-iris.roe.ac.uk/api.
# Starter token - Ten attempts per hour
starterToken = '4b762569bb349bd8d60f1bc7da3f39dbfaefff9a'
# User token - 100 queries an hour
token = '4607a33defa78fa20bef98791680574b6cc13b23'

if not os.path.exists('../cacheLasairXmatch'):
    os.makedirs('../cacheLasairXmatch')
    

### 3.2 Crossmatch with Lasair to obtain ZTF counterparts

In [8]:

def get_ZTFcounterparts(df, racol, deccol, radius):
    # Make a copy of the dataframe.
    df = df.copy()
    
    # Initialise Lasair client.
    L = lasair.lasair_client(token, cache='../cacheLasairXmatch')

    # Loop through the AAVSO CVs list to identify ZTF counterparts.
    for count, (ra, dec) in enumerate(zip(df[racol], df[deccol]), start=0):
        print(count, ra, dec)

        try:
            # Performs a cone search of ZTF objects within Lasair that are within x arccseconds of the coordinated given. 
            c = L.cone(ra=ra, dec=dec, radius=radius, requestType='nearest')
            print(c)

            # Append ZTF object ID and separation from coordinated given into new columns using pd.DataFrame.at().
            df.at[count, 'Xmatch_obj']=c['object']
            df.at[count, 'separation']=c['separation']

        # Print the error that caused the except pathway to be triggered. 
        except Exception as e:
            print(e)
            pass

    return df


In [8]:
# Get ZTF counterparts for first 5000 objects.
pt1 = get_ZTFcounterparts(AAVSO_CVS_2[0:4500].reset_index(drop=True), 'ra', 'dec', radius=2)

# Save the dataframe to a csv file.
pt1.to_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_0_4499.csv', index=False)

0 16.148083333333332 41.299388888888885
Bad Request:{"error":"No object found ra=16.14808 dec=41.29939 radius=2.00"}
1 26.263624999999998 37.94255555555555
{'object': 'ZTF19acgtoku', 'separation': 0.27532186700614003}
2 351.7587083333333 50.12027777777778
{'object': 'ZTF17aabuphg', 'separation': 0.04368385236028549}
3 352.4445 43.75111111111111
Bad Request:{"error":"No object found ra=352.44450 dec=43.75111 radius=2.00"}
4 17.99 35.29008333333333
Bad Request:{"error":"No object found ra=17.99000 dec=35.29008 radius=2.00"}
5 18.883999999999997 37.626555555555555
{'object': 'ZTF18aabfcyi', 'separation': 0.3907323425164502}
6 36.47929166666666 37.56813888888889
{'object': 'ZTF18aabfoia', 'separation': 0.05305830573909658}
7 7.8994583333333335 43.81827777777778
HTTPSConnectionPool(host='lasair-ztf.lsst.ac.uk', port=443): Max retries exceeded with url: /api/cone/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x1404c2350>: Failed to establish a new connection: 

In [9]:
# Get ZTF counterparts for remaining objects.
pt2 = get_ZTFcounterparts(AAVSO_CVS_2[4500:].reset_index(drop=True), 'ra', 'dec', radius=2)

# Save the dataframe to a csv file.
pt2.to_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_4500_last.csv', index=False)

0 334.20970833333325 46.778166666666664
Bad Request:{"error":"No object found ra=334.20971 dec=46.77817 radius=2.00"}
1 331.81224999999995 52.36108333333333
{'object': 'ZTF18abburnj', 'separation': 0.05179697723308924}
2 331.6160833333333 37.78172222222222
{'object': 'ZTF18abxywka', 'separation': 0.32027287959399287}
3 333.2184166666666 46.854305555555555
{'object': 'ZTF18ablsaka', 'separation': 0.37816340207815263}
4 334.85874999999993 39.91008333333333
Bad Request:{"error":"No object found ra=334.85875 dec=39.91008 radius=2.00"}
5 338.314625 39.59430555555556
Bad Request:{"error":"No object found ra=338.31462 dec=39.59431 radius=2.00"}
6 339.1384583333333 37.77272222222222
{'object': 'ZTF18abvwidj', 'separation': 0.3476549513947395}
7 341.96441666666664 36.72202777777778
{'object': 'ZTF18abmnmuw', 'separation': 0.0792007484297882}
8 342.46370833333333 40.63408333333333
{'object': 'ZTF22aasolik', 'separation': 1.2144979063535606}
9 342.5402916666666 50.236444444444444
Bad Request:{"er

In [10]:
# Load the two csv files.
pt1 = pd.read_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_0_4499.csv')
pt2 = pd.read_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_4500_last.csv')

# Concatenate the two dataframes.
AAVSO_CVS_3 = pd.concat([pt1, pt2], ignore_index=True)

# Refine AAVSO VSX CVs to those with ZTF counterparts; reset index.
AAVSO_CVS_3 = AAVSO_CVS_3[AAVSO_CVS_3['Xmatch_obj'].notna()].reset_index(drop=True)
AAVSO_CVS_3

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556,ZTF19acgtoku,0.275322
1,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278,ZTF17aabuphg,0.043684
2,FO And,000-BBC-590,01 15 32.16 +37 37 35.6,And,UGSU,0.07161,13.1 - 18.6 V,18.884000,37.626556,ZTF18aabfcyi,0.390732
3,FS And,000-BBD-866,02 25 55.03 +37 34 05.3,And,UG,--,14.9 - 19.0 V,36.479292,37.568139,ZTF18aabfoia,0.053058
4,HV And,000-BCT-527,00 40 55.38 +43 24 59.5,And,NL/VY,0.1403,15.2 - 20.6 G g,10.230750,43.416528,ZTF18abgjgma,0.181263
...,...,...,...,...,...,...,...,...,...,...,...
5900,ZTF19abasmma,--,19 54 27.43 +24 51 26.6,Vul,UG,--,17.5 - 21.6 g,298.614292,24.857389,ZTF19abasmma,0.237880
5901,ZTF19abxzumq,--,20 54 29.51 +26 30 42.3,Vul,UG,--,15.3 - <23 g,313.622958,26.511750,ZTF19abxzumq,0.049081
5902,ZTF20abopngr,--,20 52 57.13 +26 18 08.2,Vul,UG,--,17.2 - 23.7 r,313.238042,26.302278,ZTF20abopngr,0.100962
5903,ZTF21acatkss,--,20 48 37.96 +27 03 18.8,Vul,UG,--,18.2 - 22.3 r,312.158167,27.055222,ZTF21acatkss,0.152321


### 3.4 Inspect resultant dataframe and remove duplicates

Remove duplicated cross matches; 
Save dataframe to csv

In [11]:
# Display duplicates - those AAVSO_VSX targets with the same ZTF counterparts.
AAVSO_CVS_3[AAVSO_CVS_3.Xmatch_obj.duplicated(keep=False)]

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
1704,ZTF18abacprn,--,21 36 22.64 +63 09 48.5,Cep,UG,--,16.2 - 21.5 g,324.094333,63.163472,ZTF18abacprn,0.246367
1709,ZTF18abbogxd,000-BNZ-280,21 36 22.64 +63 09 48.5,Cep,UG,--,16.5 - 20.6 g,324.094333,63.163472,ZTF18abacprn,0.246367
3744,KLK-51,000-BNZ-706,19 18 52.88 +26 17 35.7,Lyr,UG,--,16.8 - 21.2 r,289.720333,26.29325,ZTF18aasnnsv,0.071291
3807,ZTF18aasnnsv,--,19 18 52.88 +26 17 35.7,Lyr,UG,--,16.9 - 22: g,289.720333,26.29325,ZTF18aasnnsv,0.071291


In [12]:
# Drop selected duplicates and a particular object that has no light curve data (a pain in the arse).
AAVSO_CVS_4 = AAVSO_CVS_3.drop([1709,3744], axis=0).reset_index(drop=True)
AAVSO_CVS_4

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556,ZTF19acgtoku,0.275322
1,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278,ZTF17aabuphg,0.043684
2,FO And,000-BBC-590,01 15 32.16 +37 37 35.6,And,UGSU,0.07161,13.1 - 18.6 V,18.884000,37.626556,ZTF18aabfcyi,0.390732
3,FS And,000-BBD-866,02 25 55.03 +37 34 05.3,And,UG,--,14.9 - 19.0 V,36.479292,37.568139,ZTF18aabfoia,0.053058
4,HV And,000-BCT-527,00 40 55.38 +43 24 59.5,And,NL/VY,0.1403,15.2 - 20.6 G g,10.230750,43.416528,ZTF18abgjgma,0.181263
...,...,...,...,...,...,...,...,...,...,...,...
5898,ZTF19abasmma,--,19 54 27.43 +24 51 26.6,Vul,UG,--,17.5 - 21.6 g,298.614292,24.857389,ZTF19abasmma,0.237880
5899,ZTF19abxzumq,--,20 54 29.51 +26 30 42.3,Vul,UG,--,15.3 - <23 g,313.622958,26.511750,ZTF19abxzumq,0.049081
5900,ZTF20abopngr,--,20 52 57.13 +26 18 08.2,Vul,UG,--,17.2 - 23.7 r,313.238042,26.302278,ZTF20abopngr,0.100962
5901,ZTF21acatkss,--,20 48 37.96 +27 03 18.8,Vul,UG,--,18.2 - 22.3 r,312.158167,27.055222,ZTF21acatkss,0.152321


In [13]:
# Save dataframe to csv.
AAVSO_CVS_4.to_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}.csv', index=False)


### Add sources from bright transient survey.

In [49]:
# Load the csv file.
df_ZTF_CVs = pd.read_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}.csv')

# Load the csv file for BTS.
bts_cvs = pd.read_csv(f'../processed_data/BTS_novae_{date}.csv')

# Sources in BTS not present in the df_ZTF_CVs dataframe.
new_novae = bts_cvs[~bts_cvs['ZTFID'].isin(df_ZTF_CVs['Xmatch_obj'])].copy()

# Combine the RA and Dec columns into a single column like in the df_ZTF_CVs dataframe.
new_novae['RA_Dec'] = new_novae['RA'].astype(str) + ' ' + new_novae['Dec'].astype(str)

# Convert the RA_Dec column into a ra and dec in degrees.
new_novae['ra'] = new_novae['RA_Dec'].apply(lambda x: SkyCoord(x, unit=(u.hourangle, u.deg)).ra.deg)
new_novae['dec'] = new_novae['RA_Dec'].apply(lambda x: SkyCoord(x, unit=(u.hourangle, u.deg)).dec.deg)

# Rename columns.
new_novae.rename(columns={'ZTFID':'Xmatch_obj', 'IAUID':'AUID', 'RA_Dec':'Coords'}, inplace=True)
new_novae['Type'] = 'N'

# Add new_novae dataframe to df_ZTF_CVs.
df_ZTF_CVs = pd.concat([df_ZTF_CVs, new_novae[['Xmatch_obj','AUID','ra','dec', 'Coords','Type']]], ignore_index=True)

# Save dataframe to csv.
df_ZTF_CVs.to_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_BTS.csv', index=False)

pd.options.display.max_rows = 10
df_ZTF_CVs


Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,AR And,000-BBD-275,01 45 03.27 +37 56 33.2,And,UGSS,0.163,11.0 - 17.6 V,26.263625,37.942556,ZTF19acgtoku,0.275322
1,BV And,000-BCT-512,23 27 02.09 +50 07 13.0,And,UG,(45),15.3 - 18.5 V,351.758708,50.120278,ZTF17aabuphg,0.043684
2,FO And,000-BBC-590,01 15 32.16 +37 37 35.6,And,UGSU,0.07161,13.1 - 18.6 V,18.884000,37.626556,ZTF18aabfcyi,0.390732
3,FS And,000-BBD-866,02 25 55.03 +37 34 05.3,And,UG,--,14.9 - 19.0 V,36.479292,37.568139,ZTF18aabfoia,0.053058
4,HV And,000-BCT-527,00 40 55.38 +43 24 59.5,And,NL/VY,0.1403,15.2 - 20.6 G g,10.230750,43.416528,ZTF18abgjgma,0.181263
...,...,...,...,...,...,...,...,...,...,...,...
5925,,AT2022oyb,00:42:29.41 +41:15:43.2,,N,,,10.622542,41.262000,ZTF22aatixbc,
5926,,AT2022qzf,00:42:09.42 +41:15:31.2,,N,,,10.539250,41.258667,ZTF22aazmooy,
5927,,AT2022ubf,00:42:30.04 +41:56:12.5,,N,,,10.625167,41.936806,ZTF22abfxmpc,
5928,,AT2022yax,00:43:45.84 +41:15:58.6,,N,,,10.941000,41.266278,ZTF22abnrgno,


## 4.0 Organise subtypes

### 4.1 Display CV types

See https://www.aavso.org/vsx/index.php?view=about.vartypes for the variable types.

In [50]:
# Load the csv file.
df_ZTF_CVs = pd.read_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_BTS.csv')
pd.options.display.max_rows=None

# Display the different types.
df_ZTF_CVs.Type.value_counts()

Type
UG                    3855
UGSU                   553
CV                     286
AM                     129
NL/VY                  125
UGSS                   113
UGZ                     85
UGZ/IW                  83
NL+E                    78
UG+E                    74
UGWZ                    66
NL                      56
N                       41
UGSU+E                  39
DQ                      35
AM+E                    32
UGER                    27
CV+E                    21
UGSU/IBWD               20
UGSS+E                  17
UG/IBWD                 15
UG/DQ                   13
NB                      12
NL/VY+E                 10
UGZ/IW+E                 9
UG+VY                    9
UGER+UGZ                 9
UGZ+E                    7
NR                       6
UGZ+VY                   6
UGWZ+E                   5
UGWZ+ZZ/GWLIB            4
NA+E                     4
NA+UG                    4
UGSU/IBWD+E              4
DQ+E                     4
N+E                    

### 4.2 Label main CV types and subclasses

**Overarching class**

(CV) Cataclysmic Variable

**Subclasses**

(N) Novae
* (NA) Fast novae
* (NB) Slow novae
* (NC) Very slow novae
* (NR) Recurrent novae

(UG) Dwarf Nova
* (UGSS) SS-Cyg 
* (UGSU) SU-UMa
    * (UGWZ) WZ Sge
    * (UGER) ER UMa
* (UGZ) Z Cam
    * (UGZ/IW) IW And

(IBWD) AM CVn - also includes (UGSU/IBWD; UG/IBWD)

(DQ) DQ Herculis - Intermediate Polars

(AM) AM Herculis - Polars

(NL) Nova-likes
* (VY) VY Scl


### 4.2.1 Label main CV types

In [51]:
# Remove non-CV types
df_ZTF_CVs = df_ZTF_CVs[~df_ZTF_CVs.Type.isin(['V838MON', 'CBSS+E'])].reset_index(drop=True)

# Dataframe of eclipsing systems as defined by the AAVSO VSX.
Eclipsing = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('E', na=False)) &
                       (~df_ZTF_CVs['Type'].str.contains('UGER', na=False)) & 
                       (~df_ZTF_CVs['Type'].str.contains('ELL', na=False))
                       ]

# List of eclipsing system types.
Ecl_Nms = Eclipsing['Type'].value_counts().index.to_list()

# Label eclipsing systems.
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(Ecl_Nms)), 'Eclipsing'] = 1
df_ZTF_CVs.loc[(~df_ZTF_CVs['Type'].isin(Ecl_Nms)), 'Eclipsing'] = 0


# Label CV main types

# (N) Novae (Classical/Recurrent)
Nov = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('N', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NL', na=False))
                ]

Nov_Nms = Nov['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(Nov_Nms)), 'CV_Types'] = 'nova'


# (DN) Dwarf Novae
DN = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('UG', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False))
                ]

DN_Nms = DN['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(DN_Nms)), 'CV_Types'] = 'dwarf_nova'

# (NL) Nova-likes
NL = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('NL', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False))
                ]

NL_Nms = NL['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(NL_Nms)), 'CV_Types'] = 'nova_like'

# (AM) AM Her
AM = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('AM', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                ]

AM_Nms = AM['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(AM_Nms)), 'CV_Types'] = 'polar'

# (DQ) DQ Her
DQ = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                ]

DQ_Nms = DQ['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(DQ_Nms)), 'CV_Types'] = 'intermediate_polar'

# (IBWD) AM CVn
IBWD = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                 ]

IBWD_Nms = IBWD['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(IBWD_Nms)), 'CV_Types'] = 'AMCVn'

# Additional (NA)) Nova
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(['NA+NL/VY'])), 'CV_Types'] = 'nova'

# Display value counts for CV types.
df_ZTF_CVs.CV_Types.value_counts()



CV_Types
dwarf_nova            4981
nova_like              279
polar                  161
nova                    94
intermediate_polar      54
AMCVn                   50
Name: count, dtype: int64

### 4.2.2 Label CV subclasses and subsubclasses

In [54]:
# Label subclasses

# (dwarf_nova) Dwarf Nova subclasses and subsubclasses

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGSS', na=False)), 'CV_subtypes'] = 'dwarf_nova_U_Gem'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGSU', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGWZ', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGER', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGWZ', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_SU_Uma_WZ_Sge'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGER', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_SU_Uma_ER_Uma'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGZ', na=False)), 'CV_subtypes'] = 'dwarf_nova_Z_Cam'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGZ/IW', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_Z_Cam_IW_And'


# (N) Nova subclasses
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NA', na=False)), 'CV_subtypes'] = 'nova_fast'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NB', na=False)), 'CV_subtypes'] = 'nova_slow'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NC', na=False)), 'CV_subtypes'] = 'nova_very_slow'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NR', na=False)), 'CV_subtypes'] = 'nova_recurrent'


# (NL) Nova-like subclasses
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova_like'])) & (df_ZTF_CVs.Type.str.contains('VY', na=False)), 'CV_subtypes'] = 'nova_like_VY_Scl'

# Display dataframe
pd.options.display.max_rows = 50
print(df_ZTF_CVs.CV_subtypes.value_counts())
print(df_ZTF_CVs.CV_subsubtypes.value_counts())

CV_subtypes
dwarf_nova_SU_Uma    698
dwarf_nova_Z_Cam     207
nova_like_VY_Scl     138
dwarf_nova_U_Gem     134
nova_fast             19
nova_slow             15
nova_recurrent         9
nova_very_slow         7
Name: count, dtype: int64
CV_subsubtypes
dwarf_nova_Z_Cam_IW_And     96
dwarf_nova_SU_Uma_WZ_Sge    78
dwarf_nova_SU_Uma_ER_Uma    37
Name: count, dtype: int64


### 4.2.3 Save dataframe to csv.

In [55]:
# Save dataframe to csv
df_ZTF_CVs.to_csv(f'../processed_data/AAVSOCVsZTFxmatch_{date}_BTS_labelled.csv', index=False)

In [56]:
a = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_BTS_manual_labelled.csv')
a.columns

Index(['Name', 'AUID', 'Coords', 'Const', 'Type', 'Period', 'Mag', 'ra', 'dec',
       'Xmatch_obj', 'separation', 'Eclipsing', 'CV_Types', 'CV_subtypes',
       'CV_subsubtypes', 'eclipse_clear', 'Clarity', 'manual_label',
       'Unnamed: 18'],
      dtype='object')