In [155]:
#Cleaning to-dos:

#write docstrings


import numpy as np
import os
import sys
import pandas as pd

class OpenCluster():

    '''
    Open cluster class.
    '''

    def __init__(self, name, sampedro_name, radius=0., age=0., distance=100.):

        #prompt and file specific cluster name
        self.name=name
        self.sampedro_name=sampedro_name
        #radius in acrmin
        self.radius=radius
        #age in Gyr
        self.age=age
        #distance in pc
        self.distance=distance
        #subcats with different numbers of membership allocations
        self.sampedro=[]
        self.sampedro_n1=[]
        self.sampedro_n2=[]
        self.sampedro_n3=[]
        #Pan-STARRS catalog
        self.PS=[]
        #K2 EVEREST search for the cluster 
        self.K2=[]
        #subcat of self.K2 which has 2MASS IDs
        self.K2MASS=[]

    #----------------------------------------------------------------------------------------------

    def loadcatalogs(self,debug=False):

        #path="/work1/eilin/data/CLUSTERS_01/k2-panstarrs-sampedro"
        path='/home/ekaterina/Documents/Matching_Catalogs'
        os.chdir(path)
    #SAMPEDRO
        self.sampedro = pd.read_csv(path+'/cats/'+self.sampedro_name+'_Sampedro_cluster_members_query.csv',
                                       usecols=[2,3,5,57,58,59],
                                       delimiter='\t',
                                      )

        self.sampedro['RAJ2000'] = to_xms(self.sampedro,'RAJ2000','udeg')
        self.sampedro['DEJ2000'] = to_xms(self.sampedro,'DEJ2000','udeg')
    
    #Pan-STARRS
        self.PS = pd.read_csv(path+'/cats/'+self.sampedro_name+'_ps_vs_ucac4.csv',
                                       usecols=['objID','UCAC4','gmag','rmag','imag','zmag','ymag','RAJ2000','DEJ2000'],
                                      )
        self.PS.rename(index=int,
                      columns={'RAJ2000':'RAJ2000_PS','DEJ2000':'DEJ2000_PS'},
                      inplace=True)

    #K2
        self.K2 = pd.read_csv(path+'/cats/'+self.sampedro_name+'_k2_search.txt',
                                       usecols=['K2 ID','RA (J2000)','Dec (J2000)','2MASS ID','UCAC ID','J Mag','H Mag','K Mag'],
                                       skiprows=[1],
                                      )
        self.K2.rename(index=int,
                       columns={'K2 ID':'EPIC','RA (J2000)':'RAJ2000','Dec (J2000)':'DEJ2000',
                                '2MASS ID':'2MASS','UCAC ID':'UCAC4','J Mag':'J','H Mag':'H','K Mag':'K'},
                       inplace=True)
        self.K2['RAJ2000'] = to_xms(self.K2,'RAJ2000','h')
        self.K2['DEJ2000'] = to_xms(self.K2,'DEJ2000','d')
        
    #K2MASS
        self.K2MASS = self.K2[self.K2['2MASS'].notnull()]

        if debug == True:
            print('Currently working in \"' + os.getcwd()+ '\"\n')
            print('These are our K2 data:\n\n{}\n'.format(self.K2.head()))
            print('These are our Sampedro data:\n\n{}\n'.format(self.sampedro.head()))
            print('These are our Pan-STARRS data:\n\n{}\n'.format(self.PS.head()))
            print('\nNumber of Everest LCs: \n{}\n'.format(len(self.K2)))
            print('\nNumber of Everest LCs with 2MASS ID: \n{}\n'.format(len(self.K2MASS)))		

        return

    def refinesampedro(self,debug=False):

        self.sampedro_n1=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==1]
        self.sampedro_n2=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==2]
        self.sampedro_n3=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==3]

        if debug==True:
            print('These are single validation members:\n\n{}\n'.format(self.sampedro_n1.head()))
            print('These are double validation members:\n\n{}\n'.format(self.sampedro_n2.head()))
            print('These are triple validation members:\n\n{}\n'.format(self.sampedro_n3.head()))

        return


In [156]:
def to_xms(df,name, x):

    list_ = df[name].tolist()
    if x == 'h':
        s = [item[:2]+'h'+item[3:5]+'m'+item[6:]+'s' for item in list_]
        s = Angle(s,unit=u.deg)
    elif x == 'd':
        s = [item[:3]+'d'+item[4:6]+'m'+item[7:]+'s' for item in list_]
        s = Angle(s,unit=u.deg)
    elif x == 'udeg':
        s = Angle(list_,unit=u.deg)
    else:
        print('Could not convert {} in data frame. Check your data.'.format(name))

    return pd.Series(s).values
#---------------------------------------------------------------------------------------------------------------------

def sampedro_match(sampedro, K2, debug=False):
    
    FOM = pd.merge(K2,sampedro[['UCAC4','RAJ2000','DEJ2000']],left_on='UCAC4',right_on='UCAC4',suffixes=('_K2','_Sampedro'))
    FOM['del'] = np.sqrt(np.square(FOM.RAJ2000_K2-FOM.RAJ2000_Sampedro) + np.square(FOM.DEJ2000_K2-FOM.DEJ2000_Sampedro))
    if debug == True:
        print('This is the UCAC4 matched catalog: \n{}\n'.format(FOM.info()))
    return FOM

#--------------------------------------------------------------------------------------------

def second_order_match(lK2,lPS,sampedro,debug=False):

    '''

    Matching K2 and Pan-STARRS (conditional on Sampedro cluster membership with (n) shared assessments).

    '''

    intermediate = sampedro_match(sampedro, lK2,debug=debug)

    SOM = intermediate.merge(lPS, on='UCAC4', how='inner')
    SOM['del2'] = np.sqrt(np.square(SOM.RAJ2000_Sampedro-SOM.RAJ2000_PS) + np.square(SOM.DEJ2000_Sampedro-SOM.DEJ2000_PS))
    if debug == True:
        print('This is K2 matched with Sampedro by UCAC4 ID: \n{}\n'.format(intermediate.info()))
        print('This is the UCAC4 double-matched catalog with K2 AND Pan-STARRS:'
              '\n(Redunandancies may occur!)'
              '\n{}\n'.format(SOM.info()))
    

    return SOM

#------------------------------------------------------------------------------------------------

def find_closest(SOM,debug=False):

    new_SOM=pd.DataFrame(columns=SOM.columns)
    for index, row in SOM.iterrows():

        grouped = SOM[SOM.UCAC4 == row.UCAC4]
        new_SOM = pd.concat([new_SOM,grouped[grouped.del2 == np.min(grouped.del2)]])
        SOM.drop(SOM.index[SOM.UCAC4 == row.UCAC4],inplace=True)
        
    if debug == True:
        print('This is the end result after removing duplicates in Pan-STARRS: \n{}\n'.format(new_SOM.info()))
        
    return new_SOM

In [157]:
def wrap_cross(inputs,debug=True):
    for item in inputs:
        obj=OpenCluster(item[0],item[1], radius=item[2], age=item[3])
        obj.loadcatalogs(debug=debug)
        print('\nMatching catalogs for ' + obj.name + ':\n')
        obj.refinesampedro(debug=debug)
        SOM = second_order_match(obj.K2,obj.PS,obj.sampedro_n1,debug=debug)
        new_SOM = find_closest(SOM,debug=debug)
        FOM = sampedro_match(obj.sampedro_n1,obj.K2MASS,debug=debug)
        #new_SOM.to_csv(obj.sampedro_name + '_inter1.csv')
        #FOM.to_csv(obj.sampedro_name + '_inter2.csv')
        sample = wrap_further(FOM,SOM,obj.sampedro_name, debug=False)
        print('Done!')
    return sample


In [158]:
def wrap_further(FOM, SOM,name, debug=False):
    
    #Add JHK to results, where grizy colours already exist.
    for index, row in SOM.iterrows():
        id_ = FOM.index[FOM.EPIC == row.EPIC]
        row['J'] = FOM.J.loc[id_]
    SOM.to_csv(name + '_parameter.csv')
    if debug == True:
        print('This is the joined info on the matched sample:\n\n{}\n'.format(SOM.head()))
    return SOM


In [159]:
inputs=[]
#inputs.append(['M67','M67', 15, 4.0])
#inputs.append(['Ruprecht 147','Ruprecht_147', 30, 2.5, 300.])
inputs.append(['M44','M44', 47, 0.73, 181.5])
sample = wrap_cross(inputs,debug=False)




Matching catalogs for M44:

Done!
