In [1]:
import pandas as pd
import numpy as np
%pylab
%matplotlib inline
import seaborn as sbn
from repoze.lru import CacheMaker

Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib


In [2]:
def get_data():
    '''
    Grabs the cepheids and sne from the R16 sample
    returns: 
        cepheids: Cepheid dataframe
        sne: sne dataframe
    '''
    filename = '../data/r16_table4.out'
    sne_start = 40
    sne_end = 59
    sne_lines = arange(sne_start,sne_end)
    sne = pd.DataFrame(columns = ['Host', 'sne', 'm^B_0', 'err'], index = arange(sne_end - sne_start))
    ceph_start = 70
    ceph_end = 2346
    cepheid_lines = arange(ceph_start,ceph_end)
    cepheids = pd.DataFrame(columns = ['Field','RA','DEC','ID','Period','VminusI','m_H','sigma_tot','Z'], 
                            index = arange(ceph_end - ceph_start),
                           dtype = 'float')
    f = file(filename)
    for i, line in enumerate(f):
        if i in sne_lines:
            sne.loc[i-sne_start] = line.lower().split()
        if i in cepheid_lines:
            cepheids.loc[i-ceph_start] = line.lower().split()

    f.close()
    cepheids = cepheids.apply(lambda x: pd.to_numeric(x, errors='ignore') );

    sne = sne.apply(lambda x: pd.to_numeric(x, errors='ignore') );


    parallaxes = {'bgcru': (2.23, 0.30,-0.15), 
                  'dtcyg':(2.19,0.33, -0.18), 
                  'ffaql':(2.64,0.16, -0.03),
                  'rtaur':(2.31, 0.19,-0.06),
                  'sscma':(0.348, 0.038, -0.04),
                  'sucas':(2.57,  0.33, -0.13 ),
                  'syaur':(0.428, 0.054, -0.04),
                  'tvul':(2.06,0.22,-0.09 ),
                  'wsgr':(2.30, 0.19, -0.06),
                  'xsgr':(3.17, 0.14, -0.02),
                  'ysgr':(2.13, 0.29, -0.15),
                  'betador':(3.26, 0.14, -0.02),
                  'delceph':(3.71,0.12,-0.01),
                  'etagem':(2.74,0.12,-0.02),
                  'lcar':(2.03,0.16,-0.05)
                 }
    parallaxes = pd.DataFrame.from_dict(parallaxes, orient = 'index', )
    parallaxes.reset_index(inplace=True)
    parallaxes.columns = ['ID', 'parallax', 'p_err', 'LK']
    cepheids = cepheids.merge(parallaxes, on = 'ID', how = 'left')


    cepheids.fillna({'parallax':1e-03, 'p_err':0, 'LK':0}, inplace = True);
    cepheids['err'] = sqrt(cepheids.sigma_tot**2 + (cepheids.p_err / cepheids.parallax * 5/log(10))**2)
    return cepheids, sne

In [3]:
cepheids, sne = get_data()
cepheids['logP'] = cepheids['Period'].apply(np.log10)

In [134]:
cachemaker = CacheMaker()
class ClosestNeighborMagnitude(object):
    def __init__(self, cepheids, max_dist = .5):
        cepheids['logP'] = cepheids.Period.apply(np.log10)
        cepheids['eM'] = 0
        self.cepheids = cepheids
        self.metric = inv(self.cepheids[['logP', 'VminusI']].cov())
        self.known = self.cepheids.loc[self.cepheids.Field.isin(['lmc','galaxy', 'n4258'])]
        self.unknown = self.cepheids.loc[~self.cepheids.Field.isin(['lmc','galaxy', 'n4258'])]
        self.max_dist = max_dist
        print 'building hash'
        self.hash = {ceph: self._get_closest_known_index(ceph) for ceph in self.unknown.T}
        
    def __call__(self, params):
        self.params = params
        for ceph in self.cepheids.T:
            self._get_ceph_magnitude(ceph)
        cachemaker.clear()
    
    def _get_closest_known_index(self, index):
        k_index = self.known.index[np.argmin([self._get_distance(
                        self.cepheids.T[index],
                        self.known.T[k_index]
                        ) for k_index in self.known.T])]
        distance = self._get_distance(
            self.cepheids.T[index],
            self.known.T[k_index]
        )
        if distance < self.max_dist:
            return k_index
        else:
            return None
            
    @cachemaker.lrucache(maxsize = 1000, name = 'magnitudes')       
    def _get_ceph_magnitude(self, index):
        if index == None:
            return np.NaN
        cepheid = self.cepheids.loc[index]
        if cepheid.Field == 'lmc':
            mag = cepheid.m_H - self.params['mu_'+cepheid.Field]+params['dz']
            self.cepheids.loc[index,'M'] =  mag
            self.cepheids.loc[index,'eM'] += cepheid.err
            return mag
        elif cepheid.Field == 'galaxy':
            mag = cepheid.m_H - (5 * np.log10(1.0e-3/cepheid.parallax) + 25 - cepheid.LK)+params['dz']
            self.cepheids.loc[index,'M'] =  mag
            self.cepheids.loc[index,'eM'] += cepheid.err
            return mag
        
        elif cepheid.Field == 'n4258':
            mag = cepheid.m_H - self.params['mu_'+cepheid.Field]
            self.cepheids.loc[index,'M'] =  mag
            self.cepheids.loc[index,'eM'] += cepheid.err
            return mag
        elif cepheid.Field == 'm31':
            closest_ceph = self.hash[index]
            mag = self._get_ceph_magnitude(closest_ceph)
            self.cepheids.loc[index,'M'] =  mag
            self.cepheids.loc[index,'eM'] += cepheid.err
            return mag           
        else:
            closest_ceph = self.hash[index]
            mag = self._get_ceph_magnitude(closest_ceph)
            self.cepheids.loc[index,'M'] =  mag
            self.cepheids.loc[index,'eM'] += cepheid.err
            return mag
        


        
    def _get_distance(self, x, y):
        x = x[['logP','VminusI']].values
        y = y[['logP','VminusI']].values
        return sqrt(np.dot(x-y, np.dot(self.metric, (x-y).T)))



In [135]:
starts = {'m101': 29.144437436768779,  'n3370': 32.085326573203133, 
          'n3447': 31.925511937144158, 'n1448': 31.331612808727602, 
          'u9391': 32.876957912796556, 'n3982': 31.691652487648998, 
          'n1309': 32.538213812056632, 'n1365': 31.29349520994554, 
          'lmc': 18.491674020476449,   'm31': 24.432759814673339, 
          'n3021': 32.434801375871849, 'n4424': 30.913725676693797, 
          'n4258': 29.352312664474258, 'n5917': 32.193993084145056, 
          'n5584': 31.791532724280536, 'n2442': 31.535445988717694, 
          'n7250': 31.486608822858241, 'n4038': 31.419939642742925, 
          'n1015': 32.567894400797492, 'n4536': 30.920497941433734, 
          'n3972': 31.65839514301733, 'n4639': 31.528312982189139
          }
params = dict([('mu_'+key, starts[key]) for key in starts.keys()])
params['dz'] = 0

In [None]:
get_magnitude = ClosestNeighborMagnitude(cepheids)


building hash


In [None]:
get_magnitude(params)

In [None]:
scatter(get_magnitude.known['logP'], get_magnitude.known['VminusI'], s = 1)
scatter(get_magnitude.unknown['logP'], get_magnitude.unknown['VminusI'], s = 1)
loc = get_magnitude.cepheids.M.notnull()
scatter(get_magnitude.cepheids.loc[loc, 'logP'], get_magnitude.cepheids.loc[loc, 'VminusI'], s = 1)
