In [1]:
import pandas as pd
import os
from astropy.time import Time

In [2]:
code_path = '/home/john/code/Astro/'
support_data_path = os.path.join(code_path, 'variable_star')
in_file = os.path.join(support_data_path, 'all_star_summary.csv')
sequence_file = os.path.join(support_data_path,'baa_sequences.txt')
sequence_database_file = os.path.join(support_data_path,'baa_sequence_byclass.csv')

In [3]:
def load_baa_sequence_db():
    '''
    Load the BAA star summary data, containing sequence info, variable type, RA, dec, period etc.
    '''
    db = pd.read_csv(sequence_database_file)
    # clean nans in required fields
    db['Star'] = db['Star'].str.encode('ascii','ignore').str.decode('ascii')  #deal with non-ascii chars
    db['Con'] = db['Con'].str.encode('ascii','ignore').str.decode('ascii')  #deal with non-ascii chars
    db = db[db['Star']!='']  #empty cells
    db = db[db['Star'].isna()==False]
    db = db[db['Con'].isna()==False]
    db = db[db['Max'].isna()==False]
    db = db[db['Min'].isna()==False]
    db['Min'] = db['Min'].str.replace('V','')
    db = db[db['Chart'].isna()==False]  
    db['Star']=db['Star'].str.lower()
    db['Con']=db['Con'].str.lower()
    db['Full Name'] = db['Star'] + ' '+ db['Con']
    ra = db['RA'].str.split(expand=True)
    db['RA_hr'] = ra.iloc[:,0].astype('int')
    db['RA_min'] = ra.iloc[:,1].astype('int')
    dec = db['Dec'].str.split(expand=True)
    db['Dec_deg'] = dec.iloc[:,0].astype('float')
    db['Dec_min'] = dec.iloc[:,1].astype('float')
    return db

In [4]:
def load_baa_vsdb():
    # read in BAA variable star summary data as a Panda Data Frame
    vsdb = pd.read_csv(in_file)
    # rename to match the seq_db DataFrame
    vsdb = vsdb.rename(columns={'Variable Star':'Full Name'})
    vsdb['Full Name'] = vsdb['Full Name'].str.lower()
    # get the JD portion of the first/last observation entries, and store as Astropy
    #  Time objects
    first_obs = vsdb['First Observation JD / UT'].str.split(expand=True)
    latest_obs = vsdb['Latest Observation JD / UT'].str.split(expand=True)
    vsdb['First JD'] = Time(first_obs.iloc[:,0].tolist(), format='jd')
    vsdb['Latest JD'] = Time(latest_obs.iloc[:,0].tolist(), format='jd')
    return vsdb

In [5]:
seq_db = load_baa_sequence_db()
seq_db.head()

Unnamed: 0,Star,Con,RA,Dec,Type,Max,Min,Period,Chart,Class,Eclipse Duration,Full Name,RA_hr,RA_min,Dec_deg,Dec_min
0,r,and,00 24,+38 35,M,5.8,15.2,409.2,53.02,pulsating,,r and,0,24,38.0,35.0
1,w,and,02 18,+44 18,M,6.7,14.6,397.3,35.02,pulsating,,w and,2,18,44.0,18.0
2,aq,and,00 28,+35 35,SRb,7.7,9.5,169.0,303.01,pulsating,,aq and,0,28,35.0,35.0
3,bz,and,00 38,+45 36,Lb,7.7,8.6,,304.01,pulsating,,bz and,0,38,45.0,36.0
4,rs,and,23 55,+48 38,SRa,7.0,9.4,136.0,334.01,pulsating,,rs and,23,55,48.0,38.0


In [6]:
# Load database
vsdb_full = load_baa_vsdb()
# Database filtering
# VALID CONSTELLATION: where the Variable Star column has where there are two fields in the name (e.g. 'TX Dra')
#     and the VS Constellation column matches a valid constellation
vsdb_allconst = vsdb_full
#vsdb_allconst['Valid_nFields'] = vsdb_allconst['Full Name'].str.count(' ')

#remove rows which don't have 1 nFields
#vsdb_allconst.drop(vsdb_allconst[vsdb_allconst.Valid_nFields != 1].index, inplace=True)

# set the second field to be a 'Constellation' description.  Change to lower case
vs_const = vsdb_full['Full Name'].str.split(expand=True)
vsdb_full['VS Constellation'] = vs_const[1].str.lower()
vsdb_full.describe()

Unnamed: 0,Number of Observations,Number of Observers,Maximum Magnitude,Minimum Magnitude,Range
count,2645.0,2645.0,2645.0,2645.0,2645.0
mean,904.272968,9.827977,9.882658,11.994442,2.111784
std,3113.952786,25.811231,2.991841,3.413102,2.315482
min,1.0,1.0,-0.6,1.8,0.0
25%,2.0,1.0,7.6,9.4,0.1
50%,22.0,1.0,10.1,11.9,1.36
75%,298.0,4.0,12.0,14.8,3.3
max,59231.0,302.0,19.0,21.0,17.2


In [8]:
#  Remarkable bit of pandas magic.  Merging two Data Frame based on the name of the star
vs_all = pd.merge(vsdb_full, seq_db, how='left', on='Full Name')

# clean duplicate and unwanted info
#merged_db.drop(['Star', 'Con', 'Max', 'Min', 'First Observation JD / UT'])
vs_all = vs_all.drop(columns=['Star', 'Con', 'Max', 'Min', 'First Observation JD / UT'])
vs_all.columns

Index(['Full Name', 'Number of Observations', 'Number of Observers',
       'Latest Observation JD / UT', 'Maximum Magnitude', 'Minimum Magnitude',
       'Range', 'First JD', 'Latest JD', 'VS Constellation', 'RA', 'Dec',
       'Type', 'Period', 'Chart', 'Class', 'Eclipse Duration', 'RA_hr',
       'RA_min', 'Dec_deg', 'Dec_min'],
      dtype='object')

In [9]:
######## FILTERS ##########
# SEQUENCE FILE EXISTS
# filter on the VS which are available as a sequence file from the BAA
vs_target = vs_all[vs_all['Chart'].notna()]
vs_target = vs_target[vs_target['Chart']!='AAVSO']
vs_target.describe()

Unnamed: 0,Number of Observations,Number of Observers,Maximum Magnitude,Minimum Magnitude,Range,RA_hr,RA_min,Dec_deg,Dec_min
count,323.0,323.0,323.0,323.0,323.0,323.0,323.0,323.0,323.0
mean,5273.582043,51.176471,7.535294,11.260464,3.72517,11.749226,30.541796,34.873065,31.077399
std,7156.366335,52.178234,2.432532,3.575865,2.400548,7.420947,17.710595,22.876376,16.678646
min,6.0,1.0,1.0,3.0,0.3,0.0,0.0,-23.0,0.0
25%,818.0,8.0,6.065,8.6,1.955,5.0,15.5,19.0,18.0
50%,2734.0,36.0,7.0,9.9,2.8,12.0,30.0,35.0,32.0
75%,7085.0,76.5,9.15,14.6,5.5,19.0,46.0,53.5,45.5
max,59231.0,302.0,14.0,20.8,11.78,23.0,59.0,85.0,59.0


In [11]:
# start filtering on conditions
# ignore eclipsing binaries
vs_target = vs_target[vs_target['Class']!='eclipsing']
vs_target['Class'].value_counts()

pulsating      165
cataclysmic     87
Name: Class, dtype: int64

In [14]:
# filter on min magnitude to find suitable binocular targets
vs_target = vs_target[vs_target['Minimum Magnitude']<=9.0]
vs_target.describe()

Unnamed: 0,Number of Observations,Number of Observers,Maximum Magnitude,Minimum Magnitude,Range,RA_hr,RA_min,Dec_deg,Dec_min
count,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0
mean,6239.4,78.653333,5.632667,7.926933,2.294267,11.6,32.08,33.933333,30.92
std,5349.079125,47.509172,1.190032,1.0237,0.857055,7.082525,18.725442,25.519644,16.991063
min,128.0,2.0,1.0,3.0,1.25,0.0,1.0,-13.0,0.0
25%,2316.0,45.5,5.2,7.5,1.9,6.0,13.0,17.0,19.0
50%,4940.0,71.0,5.9,8.3,2.2,12.0,35.0,38.0,29.0
75%,8399.0,104.5,6.5,8.6,2.625,18.0,49.5,55.0,46.0
max,29928.0,237.0,7.2,9.0,7.9,23.0,59.0,85.0,59.0


In [20]:
vs_target = vs_target.sort_values(by=['RA_hr', 'RA_min'])
vs_target_short = vs_target[['Full Name', 'Chart','RA', 'Dec','Class', 'Type', 'Period','Maximum Magnitude', 'Minimum Magnitude','Number of Observations']]
vs_target_short.to_html('vs_binocular_targets.html')
pd.set_option('display.max_rows', 500)
vs_target_short.head(500)

Unnamed: 0,Full Name,Chart,RA,Dec,Class,Type,Period,Maximum Magnitude,Minimum Magnitude,Number of Observations
2518,wz cas,323.01,00 01,+60 21,pulsating,SRb,186,6.3,8.8,7671
1680,tv psc,1972Sep09,00 28,+17 54,pulsating,SR,49.1,4.54,5.9,1791
430,eg and,72.02,00 45,+40 41,cataclysmic,ZAND+E,482.57d,6.4,8.0,7481
552,gamma cas,64.01,00 57,+60 43,cataclysmic,GCAS,,1.5,3.0,16678
2630,z psc,278.01,01 16,+25 46,pulsating,SRb,155.8,6.3,8.3,2259
2181,v465 cas,233.02,01 18,+57 48,pulsating,SRb,60,4.6,8.6,14904
2097,v393 cas,337.01,02 03,+71 18,pulsating,SRa,393,6.95,8.4,4034
727,kk per,344.01,02 10,+56 34,pulsating,Lc,,7.0,9.0,2024
251,bu tau,1983Oct03,03 49,+24 08,cataclysmic,GCAS+LERI+SPB,12630d,4.2,6.74,6179
1465,ss cep,315.01,03 50,+80 19,pulsating,SRb,90,6.2,8.6,5420


In [22]:
vs_lowobs = vs_target_short[vs_target_short['Number of Observations']<=3000]
vs_lowobs.to_html('vs_binocular_targets_lowobs.html')
vs_lowobs.head(90)

Unnamed: 0,Full Name,Chart,RA,Dec,Class,Type,Period,Maximum Magnitude,Minimum Magnitude,Number of Observations
1680,tv psc,1972Sep09,00 28,+17 54,pulsating,SR,49.1,4.54,5.9,1791
2630,z psc,278.01,01 16,+25 46,pulsating,SRb,155.8,6.3,8.3,2259
727,kk per,344.01,02 10,+56 34,pulsating,Lc,,7.0,9.0,2024
1770,uv cam,343.01,04 06,+61 48,pulsating,SRb,294.0,6.9,8.8,2265
2642,zz cam,343.01,04 18,+62 21,pulsating,Lb,,6.7,8.1,2234
1663,tu gem,294.01,06 11,+26 01,pulsating,SRb,217.0,6.8,9.0,2711
1253,rv mon,292.01,06 58,+06 10,pulsating,SRb,132.0,6.5,8.9,1851
2468,w cma,213.02,07 08,-11 55,pulsating,SR,160.0,6.2,8.4,1174
1207,rt cnc,311.01,08 58,+10 51,pulsating,SRb,90.04,5.9,8.7,1863
1742,u hya,109.02,10 38,-13 23,pulsating,SRb,183.1,4.1,6.8,1459
