# Build Dataset

We focus on objects classified as CV within AAVSO VSX. This resource is the most up-to-date catalogue of transients/variable stars available online. The resource also includes subtypes that are going to be important for this work.

## 1. Imports

In [1]:
import os
import lasair
import pandas as pd
from astropy import units as u
from astropy.coordinates import SkyCoord

## 2. Generate list of CVs from those catalogued in AAVSO VSX

### 2.1 Load csv of CVs and suspected CVs

See https://www.aavso.org/vsx/. From this link, I dowloaded a list of targets classified as CV by AAVSO. The variability types are detailed in https://www.aavso.org/vsx/index.php?view=about.vartypes.

In [2]:
AAVSO_CVS = pd.read_csv("../processed_data/AAVSOCVsraw_21032023.csv")
print(f'Number of examples: {AAVSO_CVS.shape[0]}')

# Convert coordinates to ra and dec.
AAVSO_CVS['ra'] = AAVSO_CVS.apply(lambda x: SkyCoord(f'{x["Coords"]}', unit=(u.hourangle, u.deg), equinox='J2000').ra.deg, axis=1)
AAVSO_CVS['dec'] = AAVSO_CVS.apply(lambda x: SkyCoord(f'{x["Coords"]}', unit=(u.hourangle, u.deg), equinox='J2000').dec.deg, axis=1)

AAVSO_CVS.head()

Number of examples: 15392




Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611


### 2.2 Identify those observable by ZTF.

Perform some processing to acquire the coordinates in degree format; select only those observable by ZTF (greater than -31 degree declination); inspect the subtypes.

In [3]:
pd.options.display.max_rows = 10

# Select those observable with ZTF.
AAVSO_CVS_1 = AAVSO_CVS.copy()[AAVSO_CVS['dec']>-31]
AAVSO_CVS_1 = AAVSO_CVS_1.reset_index(drop=True)
AAVSO_CVS_1

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611
...,...,...,...,...,...,...,...,...,...
11158,V1309 Sco,000-BFT-812,17 57 32.94 -30 43 10.0,Sco,V838MON,--,7.9 - 20.4: V,269.387250,-30.719444
11159,CK Vul,000-BCH-958,19 47 38.12 +27 18 47.8,Vul,V838MON,--,2.6 V - <23 r',296.908833,27.313278
11160,ZTF19adakuot,000-BNK-782,00 40 37.91 +40 34 52.8,And,V838MON:,--,14.9 - <22 r CR,10.157958,40.581333
11161,DLT18x,000-BMR-583,00 14 01.72 -23 11 35.8,Cet,V838MON:,--,16.2 - <22 r,3.507167,-23.193278


### 2.3 Inspect CV subtypes

In [4]:
pd.options.display.max_rows = None

# Display subtypes.
print(f'Number of CVs observable with ZTF: {AAVSO_CVS_1.shape[0]}')
AAVSO_CVS_1.Type.value_counts()

Number of CVs observable with ZTF: 11163


UG                    5687
UG:                   1487
UGSU                   738
CV                     598
UGSU:                  256
UGWZ                   157
NL/VY                  147
CV:                    137
UGSS                   129
AM                     120
UGZ                     97
UG+E                    86
UGZ/IW                  86
NL+E                    75
UG|SN                   75
SN|UG                   73
NL                      72
UGSU+E                  50
DQ                      48
UGWZ:                   45
N                       44
UGZ:                    44
NL/VY:                  38
NL:                     36
AM:                     35
NB                      32
UGER                    30
AM+E                    30
N:                      29
UGSU/IBWD               26
CV+E                    26
UG/IBWD                 23
UGZ/IW:                 22
UG+E:                   19
UGSS:                   18
IBWD                    16
UGSS+E                  15
N

### 2.4 Refine subtypes

A colon ( : ) after the variability type -or any other field- means the value/classification is uncertain.

A pipe character ( | ) between two different types signifies a logical OR; the classification is uncertain and all possible types are indicated. An example of this is ELL|DSCT, where the star may be an ellipsoidal binary system or a DSCT-type pulsating variable with half the given period.


In [5]:
pd.options.display.max_rows=50

# Remove objects with uncertain classification and or have several possible types (:, |). 
AAVSO_CVS_2 = AAVSO_CVS_1[(AAVSO_CVS_1['Type'].str.contains(':', regex=True)==False)]
AAVSO_CVS_2 = AAVSO_CVS_2[(AAVSO_CVS_2['Type'].str.contains('\|', regex=True)==False)]
AAVSO_CVS_2 = AAVSO_CVS_2.reset_index(drop=True)

# Inspect dataframe for different CV transient types.
# Eclipsing = AAVSO_CVS_2[(AAVSO_CVS_2.Type.str.contains('E', regex=True)==True) & (AAVSO_CVS_2.Type!='UGER')]
# Eclipsing.Type.value_counts()
AAVSO_CVS_2

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611
...,...,...,...,...,...,...,...,...,...
8490,V4332 Sgr,000-BCD-402,18 50 36.70 -21 23 28.9,Sgr,V838MON,--,8.0 - 19.8 V,282.652917,-21.391361
8491,OGLE-2002-BLG-360,--,17 57 38.97 -29 46 04.8,Sgr,V838MON,240:,11.3 - 20 Ic,269.412375,-29.768000
8492,V1309 Sco,000-BFT-812,17 57 32.94 -30 43 10.0,Sco,V838MON,--,7.9 - 20.4: V,269.387250,-30.719444
8493,CK Vul,000-BCH-958,19 47 38.12 +27 18 47.8,Vul,V838MON,--,2.6 V - <23 r',296.908833,27.313278


## 3. Coordinate cross match AAVSO VSX CVs with ZTF targets within Lasair

### 3.1 Define Lasair token and create cache

In [6]:
# Tokens required for Lasair database access via the API: https://lasair-iris.roe.ac.uk/api.
# Starter token - Ten attempts per hour
starterToken = '4b762569bb349bd8d60f1bc7da3f39dbfaefff9a'
# User token - 100 queries an hour
token = '4607a33defa78fa20bef98791680574b6cc13b23'

if not os.path.exists('../cacheLasairXmatch'):
    os.makedirs('../cacheLasairXmatch')
    

### 3.2 Crossmatch with Lasair to obtain ZTF counterparts

In [7]:

def get_ZTFcounterparts(df, racol, deccol, radius):
    # Make a copy of the dataframe.
    df = df.copy()
    
    # Initialise Lasair client.
    L = lasair.lasair_client(token, cache='../cacheLasairXmatch')

    # Loop through the AAVSO CVs list to identify ZTF counterparts.
    for count, (ra, dec) in enumerate(zip(df[racol], df[deccol]), start=0):
        print(count, ra, dec)

        try:
            # Performs a cone search of ZTF objects within Lasair that are within x arccseconds of the coordinated given. 
            c = L.cone(ra=ra, dec=dec, radius=radius, requestType='nearest')
            print(c)

            # Append ZTF object ID and separation from coordinated given into new columns using pd.DataFrame.at().
            df.at[count, 'Xmatch_obj']=c['object']
            df.at[count, 'separation']=c['separation']

        # Print the error that caused the except pathway to be triggered. 
        except Exception as e:
            print(e)
            pass

    return df


In [14]:
# Get ZTF counterparts for first 5000 objects.
pt1 = get_ZTFcounterparts(AAVSO_CVS_2[0:4500].reset_index(drop=True), 'ra', 'dec', radius=2)

# Save the dataframe to a csv file.
pt1.to_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_0_4499.csv', index=False)

0 35.74591666666666 43.653638888888885
{'object': 'ZTF18abryuah', 'separation': 0.37855041059646055}
1 38.49229166666666 41.62397222222222
{'object': 'ZTF18abtrvgp', 'separation': 0.13425622485012198}
2 6.654416666666666 24.487694444444447
{'object': 'ZTF17aaaehby', 'separation': 0.30876912482879415}
3 8.972375 43.561527777777776
{'object': 'ZTF18abgjgiq', 'separation': 0.5224733104298058}
4 353.6381666666666 40.43061111111111
{'object': 'ZTF18abumlux', 'separation': 0.031041688163620965}
5 349.0150833333333 -5.452472222222222
{'object': 'ZTF18absgnqy', 'separation': 0.1854113319169726}
6 326.72229166666665 -2.3056666666666663
{'object': 'ZTF19abisjkc', 'separation': 0.46469110574878286}
7 312.11629166666665 0.8358055555555556
{'object': 'ZTF18abwvggo', 'separation': 0.3596882610423953}
8 295.35437499999995 15.382027777777779
{'object': 'ZTF18aaxdlbl', 'separation': 0.7601020139145069}
9 302.76704166666667 0.9219444444444445
{'object': 'ZTF18absgzlu', 'separation': 0.15063703104736983}

In [18]:
# Get ZTF counterparts for remaining objects.
pt2 = get_ZTFcounterparts(AAVSO_CVS_2[4500:].reset_index(drop=True), 'ra', 'dec', radius=2)

# Save the dataframe to a csv file.
pt2.to_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_4500_last.csv', index=False)

0 280.4559166666666 38.84433333333334
{'object': 'ZTF18ablwuyy', 'separation': 0.1296746985467178}
1 277.4248333333333 26.47833333333333
{'object': 'ZTF19aaviqnb', 'separation': 0.015367925632528692}
2 277.6334166666666 42.68772222222222
{'object': 'ZTF20abazeye', 'separation': 0.05133899320044929}
3 281.5805416666666 33.81194444444444
{'object': 'ZTF18adjchby', 'separation': 0.1740854304640344}
4 287.7330416666666 28.976916666666664
{'object': 'ZTF22aapnsco', 'separation': 0.14711528264918566}
5 274.575625 31.34263888888889
{'object': 'ZTF18abjsdwy', 'separation': 0.11630991943417657}
6 288.59475 36.45605555555556
{'object': 'ZTF18admgggc', 'separation': 0.0573369870537101}
7 275.0129166666666 45.58683333333334
{'object': 'ZTF18abguobu', 'separation': 0.36517502759605935}
8 273.92137499999995 47.45127777777778
{'object': 'ZTF18aanijwl', 'separation': 1.0509818617522533}
9 284.7604583333333 40.73013888888889
Request limit exceeded. Either wait an hour, or see API documentation to incre

In [8]:
# Load the two csv files.
pt1 = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_0_4499.csv')
pt2 = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_4500_last.csv')

# Concatenate the two dataframes.
AAVSO_CVS_3 = pd.concat([pt1, pt2], ignore_index=True)

# Refine AAVSO VSX CVs to those with ZTF counterparts; reset index.
AAVSO_CVS_3 = AAVSO_CVS_3[AAVSO_CVS_3['Xmatch_obj'].notna()].reset_index(drop=True)
AAVSO_CVS_3

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639,ZTF18abryuah,0.378550
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972,ZTF18abtrvgp,0.134256
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694,ZTF17aaaehby,0.308769
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528,ZTF18abgjgiq,0.522473
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611,ZTF18abumlux,0.031042
...,...,...,...,...,...,...,...,...,...,...,...
5680,FY Vul,000-BCH-627,19 41 39.93 +21 45 58.4,Vul,UGZ/IW,--,13.4 - 15.9: V,295.416375,21.766222,ZTF18aavyouk,0.870329
5681,MGAB-V796,--,19 27 12.91 +26 57 33.0,Vul,UGZ/IW,--,18.5 - 21.1 g,291.803792,26.959167,ZTF18abdfrfw,0.152010
5682,ZTF18abnudna,--,20 15 01.78 +26 39 36.8,Vul,UGZ/IW,--,18.6 - 20.4 r,303.757417,26.660222,ZTF18abnudna,0.181263
5683,V0838 Mon,000-BBM-355,07 04 04.82 -03 50 50.6,Mon,V838MON,--,6.7 - 16.05 V,106.020083,-3.847389,ZTF19acymjkg,0.275339


### 3.4 Inspect resultant dataframe and remove duplicates

Remove duplicated cross matches; 
Save dataframe to csv

In [9]:
# Display duplicates - those AAVSO_VSX targets with the same ZTF counterparts.
AAVSO_CVS_3[AAVSO_CVS_3.Xmatch_obj.duplicated(keep=False)]

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
1916,ZTF18abacprn,--,21 36 22.64 +63 09 48.5,Cep,UG,--,16.2 - 21.5 g,324.094333,63.163472,ZTF18abacprn,0.294503
1921,ZTF18abbogxd,000-BNZ-280,21 36 22.64 +63 09 48.5,Cep,UG,--,16.5 - 20.6 g,324.094333,63.163472,ZTF18abacprn,0.294503
3142,KLK-51,000-BNZ-706,19 18 52.88 +26 17 35.7,Lyr,UG,--,16.8 - 21.2 r,289.720333,26.29325,ZTF18aasnnsv,0.059757
3184,ZTF18aasnnsv,--,19 18 52.88 +26 17 35.7,Lyr,UG,--,16.9 - 22: g,289.720333,26.29325,ZTF18aasnnsv,0.059757


In [10]:
# Drop selected duplicates and a particular object that has no light curve data (a pain in the arse).
AAVSO_CVS_4 = AAVSO_CVS_3.drop([1916,3142], axis=0).reset_index(drop=True)
AAVSO_CVS_4

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639,ZTF18abryuah,0.378550
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972,ZTF18abtrvgp,0.134256
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694,ZTF17aaaehby,0.308769
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528,ZTF18abgjgiq,0.522473
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611,ZTF18abumlux,0.031042
...,...,...,...,...,...,...,...,...,...,...,...
5678,FY Vul,000-BCH-627,19 41 39.93 +21 45 58.4,Vul,UGZ/IW,--,13.4 - 15.9: V,295.416375,21.766222,ZTF18aavyouk,0.870329
5679,MGAB-V796,--,19 27 12.91 +26 57 33.0,Vul,UGZ/IW,--,18.5 - 21.1 g,291.803792,26.959167,ZTF18abdfrfw,0.152010
5680,ZTF18abnudna,--,20 15 01.78 +26 39 36.8,Vul,UGZ/IW,--,18.6 - 20.4 r,303.757417,26.660222,ZTF18abnudna,0.181263
5681,V0838 Mon,000-BBM-355,07 04 04.82 -03 50 50.6,Mon,V838MON,--,6.7 - 16.05 V,106.020083,-3.847389,ZTF19acymjkg,0.275339


In [11]:
# Save dataframe to csv.
AAVSO_CVS_4.to_csv('../processed_data/AAVSOCVsZTFxmatch_21032023.csv', index=False)


### Add sources from bright transient survey.

In [12]:
# Load the csv file.
df_ZTF_CVs = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023.csv')

# Load the csv file for BTS.
bts_cvs = pd.read_csv('../processed_data/BTS_novae.csv')
bts_cvs

# Sources in BTS not present in the df_ZTF_CVs dataframe.
new_novae = bts_cvs[~bts_cvs['ZTFID'].isin(df_ZTF_CVs['Xmatch_obj'])].copy()

# Combine the RA and Dec columns into a single column like in the df_ZTF_CVs dataframe.
new_novae['RA_Dec'] = new_novae['RA'].astype(str) + ' ' + new_novae['Dec'].astype(str)

# Convert the RA_Dec column into a ra and dec in degrees.
new_novae['ra'] = new_novae['RA_Dec'].apply(lambda x: SkyCoord(x, unit=(u.hourangle, u.deg)).ra.deg)
new_novae['dec'] = new_novae['RA_Dec'].apply(lambda x: SkyCoord(x, unit=(u.hourangle, u.deg)).dec.deg)

# Rename columns.
new_novae.rename(columns={'ZTFID':'Xmatch_obj', 'IAUID':'AUID', 'RA_Dec':'Coords'}, inplace=True)
new_novae['Type'] = 'N'

# Add new_novae dataframe to df_ZTF_CVs.
df_ZTF_CVs = pd.concat([df_ZTF_CVs, new_novae[['Xmatch_obj','AUID','ra','dec', 'Coords','Type']]], ignore_index=True)

# Save dataframe to csv.
df_ZTF_CVs.to_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_BTS.csv', index=False)

pd.options.display.max_rows = 10
df_ZTF_CVs

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639,ZTF18abryuah,0.378550
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972,ZTF18abtrvgp,0.134256
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694,ZTF17aaaehby,0.308769
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528,ZTF18abgjgiq,0.522473
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611,ZTF18abumlux,0.031042
...,...,...,...,...,...,...,...,...,...,...,...
5706,,AT2022qzf,00:42:09.42 +41:15:31.2,,N,,,10.539250,41.258667,ZTF22aazmooy,
5707,,AT2022ubf,00:42:30.04 +41:56:12.5,,N,,,10.625167,41.936806,ZTF22abfxmpc,
5708,,AT2022yax,00:43:45.84 +41:15:58.6,,N,,,10.941000,41.266278,ZTF22abnrgno,
5709,,AT2022zzj,00:41:25.72 +40:44:23.3,,N,,,10.357167,40.739806,ZTF22abtltcw,


## 4.0 Organise subtypes

### 4.1 Display CV types

See https://www.aavso.org/vsx/index.php?view=about.vartypes for the variable types.

In [13]:
# Load the csv file.
df_ZTF_CVs = pd.read_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_BTS.csv')
pd.options.display.max_rows=None

# Display the different types.
df_ZTF_CVs.Type.value_counts()

UG                    3765
UGSU                   537
CV                     288
NL/VY                  125
UGSS                   117
AM                     100
UGZ                     91
UGZ/IW                  81
NL+E                    72
UG+E                    71
UGWZ                    60
NL                      51
N                       39
UGSU+E                  37
DQ                      35
UGER                    29
AM+E                    25
CV+E                    21
UGSU/IBWD               19
UG/IBWD                 16
UGSS+E                  14
UG/DQ                   13
NB                      11
NL/VY+E                 10
UGZ+E                    7
NR                       6
UGWZ+ZZ/GWLIB            4
UGWZ+E                   4
UGSU/IBWD+E              4
NA+UG                    4
DQ+E                     4
IBWD+E                   3
NR+E                     3
NC                       3
N+E                      3
NA+E                     3
UG/IBWD+E                2
V

### 4.2 Label main CV types and subclasses

**Overarching class**

(CV) Cataclysmic Variable

**Subclasses**

(N) Novae
* (NA) Fast novae
* (NB) Slow novae
* (NC) Very slow novae
* (NR) Recurrent novae

(UG) Dwarf Nova
* (UGSS) SS-Cyg 
* (UGSU) SU-UMa
    * (UGWZ) WZ Sge
    * (UGER) ER UMa
* (UGZ) Z Cam
    * (UGZ/IW) IW And

(IBWD) AM CVn - also includes (UGSU/IBWD; UG/IBWD)

(DQ) DQ Herculis - Intermediate Polars

(AM) AM Herculis - Polars

(NL) Nova-likes
* (VY) VY Scl


### 4.2.1 Label main CV types

In [14]:
# Remove non-CV types
df_ZTF_CVs = df_ZTF_CVs[~df_ZTF_CVs.Type.isin(['V838MON', 'CBSS+E'])].reset_index(drop=True)

# Dataframe of eclipsing systems as defined by the AAVSO VSX.
Eclipsing = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('E', na=False)) &
                       (~df_ZTF_CVs['Type'].str.contains('UGER', na=False)) & 
                       (~df_ZTF_CVs['Type'].str.contains('ELL', na=False))
                       ]

# List of eclipsing system types.
Ecl_Nms = Eclipsing['Type'].value_counts().index.to_list()

# Label eclipsing systems.
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(Ecl_Nms)), 'Eclipsing'] = 1
df_ZTF_CVs.loc[(~df_ZTF_CVs['Type'].isin(Ecl_Nms)), 'Eclipsing'] = 0


# Label CV main types

# (N) Novae (Classical/Recurrent)
Nov = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('N', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NL', na=False))
                ]

Nov_Nms = Nov['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(Nov_Nms)), 'CV_Types'] = 'nova'


# (DN) Dwarf Novae
DN = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('UG', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False))
                ]

DN_Nms = DN['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(DN_Nms)), 'CV_Types'] = 'dwarf_nova'

# (NL) Nova-likes
NL = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('NL', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False))
                ]

NL_Nms = NL['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(NL_Nms)), 'CV_Types'] = 'nova_like'

# (AM) AM Her
AM = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('AM', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                ]

AM_Nms = AM['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(AM_Nms)), 'CV_Types'] = 'polar'

# (DQ) DQ Her
DQ = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('DQ', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) & 
                (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                ]

DQ_Nms = DQ['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(DQ_Nms)), 'CV_Types'] = 'intermediate_polar'

# (IBWD) AM CVn
IBWD = df_ZTF_CVs[(df_ZTF_CVs['Type'].str.contains('IBWD', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NA', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NB', na=False)) &
                  (~df_ZTF_CVs['Type'].str.contains('NC', na=False))
                 ]

IBWD_Nms = IBWD['Type'].value_counts().index.to_list()
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(IBWD_Nms)), 'CV_Types'] = 'AMCVn'

# Additional (NA)) Nova
df_ZTF_CVs.loc[(df_ZTF_CVs['Type'].isin(['NA+NL/VY'])), 'CV_Types'] = 'nova'

# Display value counts for CV types.
df_ZTF_CVs.CV_Types.value_counts()



dwarf_nova            4822
nova_like              263
polar                  126
nova                    88
intermediate_polar      54
AMCVn                   46
Name: CV_Types, dtype: int64

### 4.2.2 Label CV subclasses and subsubclasses

In [18]:
# Label subclasses

# (dwarf_nova) Dwarf Nova subclasses and subsubclasses

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGSS', na=False)), 'CV_subtypes'] = 'dwarf_nova_U_Gem'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGSU', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGWZ', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGER', na=False)), 'CV_subtypes'] = 'dwarf_nova_SU_Uma'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGWZ', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_SU_Uma_WZ_Sge'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGER', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_SU_Uma_ER_Uma'

df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGZ', na=False)), 'CV_subtypes'] = 'dwarf_nova_Z_Cam'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['dwarf_nova'])) & (df_ZTF_CVs.Type.str.contains('UGZ/IW', na=False)), 'CV_subsubtypes'] = 'dwarf_nova_Z_Cam_IW_And'


# (N) Nova subclasses
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NA', na=False)), 'CV_subtypes'] = 'nova_fast'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NB', na=False)), 'CV_subtypes'] = 'nova_slow'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NC', na=False)), 'CV_subtypes'] = 'nova_very_slow'
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova'])) & (df_ZTF_CVs.Type.str.contains('NR', na=False)), 'CV_subtypes'] = 'nova_recurrent'


# (NL) Nova-like subclasses
df_ZTF_CVs.loc[(df_ZTF_CVs.CV_Types.isin(['nova_like'])) & (df_ZTF_CVs.Type.str.contains('VY', na=False)), 'CV_subtypes'] = 'nova_like_VY_Scl'

# Display dataframe
pd.options.display.max_rows = 50
df_ZTF_CVs

Unnamed: 0,Name,AUID,Coords,Const,Type,Period,Mag,ra,dec,Xmatch_obj,separation,Eclipsing,CV_Types,CV_subtypes,CV_subsubtypes
0,ASASSN-19dp,000-BMZ-678,02 22 59.02 +43 39 13.1,And,AM,--,16.5 - 20.9 g,35.745917,43.653639,ZTF18abryuah,0.378550,0.0,polar,,
1,BMAM-V789,000-BNS-414,02 33 58.15 +41 37 26.3,And,AM,0.0714789,18.5 - 21.7 r,38.492292,41.623972,ZTF18abtrvgp,0.134256,0.0,polar,,
2,CSS 091026:002637+242916,--,00 26 37.06 +24 29 15.7,And,AM,--,17.1 - 21 CV,6.654417,24.487694,ZTF17aaaehby,0.308769,0.0,polar,,
3,MGAB-V3453,--,00 35 53.37 +43 33 41.5,And,AM,--,18.2 - 20.9 r,8.972375,43.561528,ZTF18abgjgiq,0.522473,0.0,polar,,
4,MGAB-V3769,--,23 34 33.16 +40 25 50.2,And,AM,--,18.8 - 20.8 r,353.638167,40.430611,ZTF18abumlux,0.031042,0.0,polar,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5703,,AT2022qzf,00:42:09.42 +41:15:31.2,,N,,,10.539250,41.258667,ZTF22aazmooy,,0.0,nova,,
5704,,AT2022ubf,00:42:30.04 +41:56:12.5,,N,,,10.625167,41.936806,ZTF22abfxmpc,,0.0,nova,,
5705,,AT2022yax,00:43:45.84 +41:15:58.6,,N,,,10.941000,41.266278,ZTF22abnrgno,,0.0,nova,,
5706,,AT2022zzj,00:41:25.72 +40:44:23.3,,N,,,10.357167,40.739806,ZTF22abtltcw,,0.0,nova,,


### 4.2.3 Save dataframe to csv.

In [19]:
# Save dataframe to csv
df_ZTF_CVs.to_csv('../processed_data/AAVSOCVsZTFxmatch_21032023_BTS_labelled.csv', index=False)

In [None]:
# 