In [1]:
from glob import glob
import numpy as np
import pandas as pd

In [2]:
read_dir = '../../catalogues/ogle/'
save_dir = '../../catalogues/'

In [3]:
def get_format(path):
    content = []
    copying = False
    countdown = 3
    with open(path, mode = 'r') as f:
        for line in f:
            searchw = ['format', 'file', 'ident.dat']
            matchw = all([w in line.lower() for w in searchw])
            if matchw:
                copying = True
            if copying:
                if '-------------' in line:
                    countdown -= 1
                    if countdown == 0:
                        break
                else:
                    content.append(line)
    columns = ['b_start', 'b_end', 'Format', 'Units', 'Description']
    data = [[l[:4], l[4:7], l[9:15], l[15:23], l[23:]] for l in content[2:]]
    format_ = pd.DataFrame(data, columns=columns)
    format_ = format_.applymap(str.strip)
    f = lambda s: s.strip('-')
    format_.b_start = format_.b_start.apply(f)
    format_.b_end = format_.b_end.apply(f)
    
    # Fill limits
    idx = np.where(format_.b_start=='')[0]
    format_.loc[idx, 'b_start'] = format_.loc[idx, 'b_end']
    idx = np.where(format_.b_end=='')[0]
    format_.loc[idx, 'b_end'] = format_.loc[idx, 'b_start']
    format_.b_start = format_.b_start.astype(int)
    format_.b_end = format_.b_end.astype(int)
    
    return format_

In [4]:
def format_ra(ogle_df):
    strip = lambda s: s.split('.0')[0]
    ogle_df['Ra (h)'] = ogle_df['Ra (h)'].astype(str).apply(strip)
    ogle_df['Ra (m)'] = ogle_df['Ra (m)'].astype(str).apply(strip)
    ogle_df['Ra (s)'] = ogle_df['Ra (s)'].astype(str)
    join = lambda words: ':'.join(words)
    jcols = ['Ra (h)', 'Ra (m)', 'Ra (s)']
    ogle_df['ra'] = ogle_df[jcols].apply(join, axis=1)
    
    return ogle_df

In [5]:
def format_dec(ogle_df):
    strip = lambda s: s.split('.0')[0]
    ogle_df['Dec (sign)'] = ogle_df['Dec (sign)'].astype(str)
    ogle_df['Dec (deg)'] = ogle_df['Dec (deg)'].astype(str).apply(strip)
    ogle_df['Dec (arc m)'] = ogle_df['Dec (arc m)'].astype(str).apply(strip)
    ogle_df['Dec (arc s)'] = ogle_df['Dec (arc s)'].astype(str)
    jcols = ['Dec (deg)', 'Dec (arc m)', 'Dec (arc s)']
    ogle_df['dec'] = ogle_df[jcols].apply(':'.join, axis=1)
    jcols = ['Dec (sign)', 'dec']
    ogle_df['dec'] = ogle_df[jcols].apply(''.join, axis=1)
    
    return ogle_df

In [6]:
def format_ctlg(ctlg_path, format_path):
    rename = {'Right ascension, equinox J2000.0 (hours)':'Ra (h)',
              'Right ascension, equinox 2000.0 (hours)': 'Ra (h)',
              'Right ascension, equinox J2000.0 (minutes)': 'Ra (m)',
              'Right ascension, equinox 2000.0 (minutes)': 'Ra (m)',
              'Right ascension, equinox J2000.0 (seconds)': 'Ra (s)',
              'Right ascension, equinox 2000.0 (seconds)': 'Ra (s)',
              'Declination, equinox J2000.0 (sign)': 'Dec (sign)',
              'Declination, equinox 2000.0 (sign)': 'Dec (sign)',
              'Declination, equinox J2000.0 (degrees)': 'Dec (deg)',
              'Declination, equinox 2000.0 (degrees)': 'Dec (deg)',
              'Declination, equinox J2000.0 (arc minutes)': 'Dec (arc m)',
              'Declination, equinox 2000.0 (arc minutes)': 'Dec (arc m)',
              'Declination, equinox J2000.0 (arc seconds)': 'Dec (arc s)',
              'Declination, equinox 2000.0 (arc seconds)': 'Dec (arc s)',
              'Star\'s ID': 'ID'
             }
    format_ = get_format(format_path)
    slices = list(zip(format_.b_start-1, format_.b_end))
    slices = [slice(*lim) for lim in slices]
    
    with open(ctlg_path, 'r') as f:
        data_raw = f.read()
    data_raw = data_raw.split('\n')
    data_raw = data_raw[:-1]
    data = [[row[s] for s in slices] for row in data_raw]
    header = format_.Description
    
    ctlg = pd.DataFrame(data, columns=header)
    ctlg = ctlg.rename(columns=rename)
    ctlg = format_dec(ctlg)
    ctlg = format_ra(ctlg)
    include = ~ctlg['ra'].str.contains('::')
    ctlg = ctlg[include]
    ctlg['path'] = ctlg_path
    ctlg['class'] = ctlg_path.split('-')[-2]
    return ctlg

In [7]:
cpaths = glob(read_dir + '*ident.dat')
fpaths = [path.rstrip('ident.dat') + 'README.txt' for path in cpaths]
dfs = []

for cpath, fpath in zip(cpaths, fpaths):
    df = format_ctlg(cpath, fpath)
    dfs.append(df)

result = pd.concat(dfs)
result = result.reset_index(drop=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  if __name__ == '__main__':


In [8]:
result.to_csv(save_dir + 'ogle-variable-sources.csv', index=False)

In [9]:
result = pd.read_csv(save_dir + 'ogle-variable-sources.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
result['class'].unique()

array(['rrlyr', 't2cep', 'cep', 'ecl', 'lpv', 'dsct', 'acep'],
      dtype=object)