In [1]:
#Cleaning to-dos:

#write docstrings

from astropy.coordinates import Angle
import astropy.units as u
import numpy as np
import os
import sys
import pandas as pd

class OpenCluster():

    '''
    Open cluster class.
    '''

    def __init__(self, name, sampedro_name, radius=0., age=0., distance=100.):

        #prompt and file specific cluster name
        self.name=name
        self.sampedro_name=sampedro_name
        #radius in acrmin
        self.radius=radius
        #age in Gyr
        self.age=age
        #distance in pc
        self.distance=distance
        #subcats with different numbers of membership allocations
        self.sampedro=[]
        self.sampedro_n1=[]
        self.sampedro_n2=[]
        self.sampedro_n3=[]
        #Yadav proper motion membership
        self.yadav=[]
        #Pan-STARRS catalog
        self.PS=[]
        #K2 EVEREST search for the cluster 
        self.K2=[]
        #subcat of self.K2 which has 2MASS IDs
        self.K2MASS=[]

    #----------------------------------------------------------------------------------------------

    def loadcatalogs(self,debug=False):

        #path="/work1/eilin/data/CLUSTERS_01/k2-panstarrs-sampedro"
        path='/home/ekaterina/Documents/Matching_Catalogs'
        os.chdir(path)
    #SAMPEDRO
        self.sampedro = pd.read_csv(path+'/cats/'+self.sampedro_name+'_Sampedro_cluster_members_query.csv',
                                       usecols=[2,3,5,57,58,59],
                                       #delimiter='\t',
                                      )

        self.sampedro['RAJ2000'] = to_xms(self.sampedro,'RAJ2000','udeg')
        self.sampedro['DEJ2000'] = to_xms(self.sampedro,'DEJ2000','udeg')
    
    #Pan-STARRS
        self.PS = pd.read_csv(path+'/cats/'+self.sampedro_name+'_ps_vs_ucac4.csv',
                                       usecols=['objID','UCAC4','gmag','rmag','imag','zmag','ymag','RAJ2000','DEJ2000'],
                                      )
        self.PS.rename(index=int,
                      columns={'RAJ2000':'RAJ2000_PS','DEJ2000':'DEJ2000_PS'},
                      inplace=True)

    #K2
        self.K2 = pd.read_csv(path+'/cats/'+self.sampedro_name+'_k2_search.txt',
                                       usecols=['KEP Mag','K2 ID','RA (J2000)','Dec (J2000)','2MASS ID','UCAC ID','J Mag','H Mag','K Mag'],
                                       skiprows=[1],
                                      )
        self.K2.rename(index=int,
                       columns={'KEP Mag':'Kp','K2 ID':'EPIC','RA (J2000)':'RAJ2000','Dec (J2000)':'DEJ2000',
                                '2MASS ID':'2MASS','UCAC ID':'UCAC4','J Mag':'J','H Mag':'H','K Mag':'K'},
                       inplace=True)
        self.K2['RAJ2000'] = to_xms(self.K2,'RAJ2000','h')
        self.K2['DEJ2000'] = to_xms(self.K2,'DEJ2000','d')
        
    #K2MASS
        self.K2MASS = self.K2[self.K2['2MASS'].notnull()]
        
    #Yadav
        self.yadav = pd.read_csv(path+'/cats/'+self.sampedro_name+'_Yadav_cluster_membership.csv',
                                skiprows=54,
                                names=['_RAJ2000','_DEJ2000','Seq','RAJ2000','DEJ2000','Bmag','Vmag','Icmag','pmRA','pmDE','Pmb','HRV']
                                )
        self.yadav = self.yadav[self.yadav.Pmb > 50]

        
        if debug == True:
            print('Currently working in \"' + os.getcwd()+ '\"\n')
            print('These are our K2 data:\n\n{}\n'.format(self.K2.head()))
            print('These are our Sampedro data:\n\n{}\n'.format(self.sampedro.head()))
            print('These are our Pan-STARRS data:\n\n{}\n'.format(self.PS.head()))
            print('These are our Yadav data:\n\n{}\n'.format(self.yadav.head()))
            print('\nNumber of Everest LCs: \n{}\n'.format(len(self.K2)))
            print('\nNumber of Everest LCs with 2MASS ID: \n{}\n'.format(len(self.K2MASS)))		

        return

    def refinesampedro(self,debug=False):

        self.sampedro_n1=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==1]
        self.sampedro_n2=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==2]
        self.sampedro_n3=self.sampedro[(self.sampedro.ClassM1+self.sampedro.ClassM2+self.sampedro.ClassM3)==3]

        if debug==True:
            print('These are single validation members:\n\n{}\n'.format(self.sampedro_n1.head()))
            print('These are double validation members:\n\n{}\n'.format(self.sampedro_n2.head()))
            print('These are triple validation members:\n\n{}\n'.format(self.sampedro_n3.head()))

        return


In [2]:
def to_xms(df,name, x):

    list_ = df[name].tolist()
    if x == 'h':
        s = [item[:2]+'h'+item[3:5]+'m'+item[6:]+'s' for item in list_]
        s = Angle(s,unit=u.deg)
    elif x == 'd':
        s = [item[:3]+'d'+item[4:6]+'m'+item[7:]+'s' for item in list_]
        s = Angle(s,unit=u.deg)
    elif x == 'udeg':
        s = Angle(list_,unit=u.deg)
    else:
        print('Could not convert {} in data frame. Check your data.'.format(name))

    return pd.Series(s).values
#---------------------------------------------------------------------------------------------------------------------


def sampedro_match(sampedro, K2, debug=False):
    
    FOM = pd.merge(K2,sampedro[['UCAC4','RAJ2000','DEJ2000']],left_on='UCAC4',right_on='UCAC4',suffixes=('_K2','_Sampedro'))
    FOM['del'] = np.sqrt(np.square(FOM.RAJ2000_K2-FOM.RAJ2000_Sampedro) + np.square(FOM.DEJ2000_K2-FOM.DEJ2000_Sampedro))
    if debug == True:
        print('This is the UCAC4 matched catalog: \n{}\n'.format(FOM.info()))
    return FOM

#--------------------------------------------------------------------------------------------

def second_order_match(lK2,lPS,sampedro,debug=False):

    '''

    Matching K2 and Pan-STARRS (conditional on Sampedro cluster membership with (n) shared assessments).

    '''

    intermediate = sampedro_match(sampedro, lK2,debug=debug)

    SOM = intermediate.merge(lPS, on='UCAC4', how='inner')
    SOM['del2'] = distance(SOM,s1='_Sampedro',s2='_PS')
    #np.sqrt(np.square(SOM.RAJ2000_Sampedro-SOM.RAJ2000_PS) + np.square(SOM.DEJ2000_Sampedro-SOM.DEJ2000_PS))
    if debug == True:
        print('This is K2 matched with Sampedro by UCAC4 ID: \n{}\n'.format(intermediate.info()))
        print('This is the UCAC4 double-matched catalog with K2 AND Pan-STARRS:'
              '\n(Redunandancies may occur!)'
              '\n{}\n'.format(SOM.info()))
    

    return SOM


#------------------------------------------------------------------------------------------------

def find_closest(SOM,debug=False):

    new_SOM=pd.DataFrame(columns=SOM.columns)
    for index, row in SOM.iterrows():

        grouped = SOM[SOM.UCAC4 == row.UCAC4]
        new_SOM = pd.concat([new_SOM,grouped[grouped.del2 == np.min(grouped.del2)]])
        SOM.drop(SOM.index[SOM.UCAC4 == row.UCAC4],inplace=True)
        
    if debug == True:
        print('This is the end result after removing duplicates in Pan-STARRS: \n{}\n'.format(new_SOM.info()))
        
    return new_SOM

In [3]:
def yadav_match(yadav,K2,debug=False):
    
    member = yadav[['RAJ2000','DEJ2000','Pmb']]
   # distance = np.sqrt(np.square(FOM.RAJ2000_K2-FOM.RAJ2000_Sampedro) + np.square(FOM.DEJ2000_K2-FOM.DEJ2000_Sampedro))
    K2['distance'] = pd.Series('NaN')
    
    for index, row in K2.iterrows():
        member['d'] = distance(row, df2=member)
        K2['distance'].iloc[index] = member.d[member.d == member.d.min()].iloc[0]
    
    selected_K2 = K2[K2.distance < 1./3600.]
    
    if debug == True:
        print('These are Yadav matched K2 samples\n\n{}\n'.format(selected_K2.info()))
    return K2


def distance(df,s1='_x',s2='_y',df2=pd.DataFrame().dropna()):
    
    if df2.empty == False:
 
        return np.sqrt(np.square(df['RAJ2000']-df2.RAJ2000) + np.square(df['DEJ2000']-df2.DEJ2000))
    else:
        return np.sqrt(np.square(df['RAJ2000'+s1]-df['RAJ2000'+s2]) + np.square(df['DEJ2000'+s1]-df['DEJ2000'+s2]))

#---------------------------------------------------------------------------------------------------------------------


In [4]:
def wrap(inputs,debug=True):
    for item in inputs:
        obj=OpenCluster(item[0],item[1], radius=item[2], age=item[3])
        obj.loadcatalogs(debug=debug)
        print('\nMatching catalogs for ' + obj.name + ':\n')
        obj.refinesampedro(debug=debug)
        SOM = second_order_match(obj.K2,obj.PS,obj.sampedro_n1,debug=debug)
        new_SOM = find_closest(SOM,debug=debug)

        FOM = sampedro_match(obj.sampedro_n1,obj.K2MASS,debug=debug)

        sample = wrap_further(FOM,new_SOM,obj.sampedro_name, debug=False)
        print('Done!')
    return sample


In [5]:
def wrap_further(FOM, SOM, name, debug=False):
    
    '''
    
    Add JHK to results, where grizy colours already exist.
    
    '''

    for index, row in SOM.iterrows():
        id_ = FOM.index[FOM.EPIC == row.EPIC]
        row['J'] = FOM.J.loc[id_]
        row['H'] = FOM.H.loc[id_]
        row['K'] = FOM.K.loc[id_]

    SOM.to_csv('share/' + name + '/' + name + '_parameter.csv')
    if debug == True:
        print('This is the joined info on the matched sample:\n\n{}\n'.format(SOM.head()))
    return SOM


In [13]:
def wrap_M67(inputs,debug=True):
    for item in inputs:
        obj=OpenCluster(item[0],item[1], radius=item[2], age=item[3])
        obj.loadcatalogs(debug=debug)
        print('\nMatching catalogs for ' + obj.name + ':\n')
        K2 = yadav_match(obj.yadav,obj.K2,debug=False)
        selected_K2 = K2[K2.distance < 1./3600.]
        selected_K2.to_csv('select.csv',columns=['EPIC','RAJ2000','DEJ2000'])
        #selected_K2 = pd.read_csv('select.csv')
        ps = pd.read_csv('cats/M67_ps_vs_yadav_select.csv')
        print(selected_K2.head())
        #print(ps.head)
        yadav = selected_K2.merge(ps, left_index=True, right_on='col1',suffixes=('_K2','_PS'))
        yadav.to_csv('share/' + obj.sampedro_name + '/' + obj.sampedro_name + '_parameter.csv')
        
        if debug == True:
            print('This is the sample with membership assessed with Yadav proper motions\n\n{}\n'.format(yadav.head()))
        
        print(yadav['EPIC'])
        print('Done!')
    return yadav

inputs=[]
inputs.append(['M67','M67', 15, 4.0])
wrap_M67(inputs,debug=False)


Matching catalogs for M67:



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


         EPIC     RAJ2000    DEJ2000       J       H       K      Kp  \
1   228682441  132.877458  11.815250     NaN     NaN     NaN  13.310   
3   211410963  132.621046  11.806506  16.039  15.386  15.462  18.602   
7   211411112  132.612450  11.808697  12.177  11.903  11.829  13.405   
12  211416648  132.619529  11.891700  14.653  14.113  13.941  16.425   
17  211396422  132.874708  11.586428  11.620  11.366  11.317  12.695   

         UCAC4             2MASS     distance  
1          NaN               NaN  9.31582e-05  
3          NaN  08502905+1148234   7.1803e-05  
7   510-048372  08502698+1148313  8.28281e-06  
12  510-048375  08502868+1153300  7.30487e-06  
17  508-049252  08512993+1135112  7.66989e-06  
0     228682441
1     211410963
2     211411112
3     211416648
4     211396422
5     211412571
6     211418561
7     211418176
8     211403974
9     211417817
10    211413212
11    211427268
12    211395699
13    211411894
14    211406540
15    211407971
16    211411722
17    2

Unnamed: 0,EPIC,RAJ2000_K2,DEJ2000_K2,J,H,K,Kp,UCAC4,2MASS,distance,...,e_zKmag,zFlags,ymag,e_ymag,yKmag,e_yKmag,yFlags,col1,RAJ2000.1,DEJ2000.1
0,228682441,132.877458,11.815250,,,,13.310,,,9.31582e-05,...,,32804,12.9885,0.0088,12.9308,0.0119,16696,1,132.877458,11.815250
1,211410963,132.621046,11.806506,16.039,15.386,15.462,18.602,,08502905+1148234,7.1803e-05,...,0.0068,115000,17.2755,0.0024,17.3568,0.0095,115000,3,132.621046,11.806506
2,211411112,132.612450,11.808697,12.177,11.903,11.829,13.405,510-048372,08502698+1148313,8.28281e-06,...,0.0004,115000,13.0373,0.0049,13.0517,0.0063,115000,7,132.612450,11.808697
3,211416648,132.619529,11.891700,14.653,14.113,13.941,16.425,510-048375,08502868+1153300,7.30487e-06,...,0.0045,115000,15.6877,0.0031,15.7365,0.0130,115000,12,132.619529,11.891700
4,211396422,132.874708,11.586428,11.620,11.366,11.317,12.695,508-049252,08512993+1135112,7.66989e-06,...,,98340,12.4603,0.0021,12.4695,0.0009,115000,17,132.874708,11.586428
5,211412571,132.595142,11.830033,14.742,14.156,14.028,16.520,510-048367,08502285+1149480,0.000119641,...,0.0036,115000,15.8426,0.0072,15.9221,0.0086,115000,25,132.595142,11.830033
6,211418561,132.620579,11.920000,15.607,15.004,14.727,17.839,,08502894+1155120,3.0288e-05,...,0.0063,115000,16.7386,0.0022,16.8085,0.0122,115000,27,132.620579,11.920000
7,211418176,132.616892,11.914044,12.968,12.665,12.563,14.223,510-048374,08502805+1154505,1.30715e-05,...,0.0013,115000,13.8437,0.0052,13.8521,0.0053,115000,28,132.616892,11.914044
8,211403974,132.618046,11.702722,11.899,11.654,11.587,12.983,509-047159,08502833+1142097,1.12778e-05,...,,114724,12.7077,0.0028,12.7166,0.0037,115000,33,132.618046,11.702722
9,211417817,132.610679,11.908992,15.636,14.832,14.714,17.951,,08502656+1154323,3.33487e-05,...,0.0144,115000,16.7507,0.0057,16.8453,0.0045,115000,36,132.610679,11.908992


In [7]:
inputs=[]
inputs.append(['M67','M67', 15, 4.0])
#inputs.append(['NGC 2158','NGC_2158', 2.5, 2.0,3600.])
#inputs.append(['NGC 1817','NGC_1817', 8., 1.0,1500.])
#inputs.append(['M45','M45',55.,0.12,120.])
#inputs.append(['Ruprecht 147','Ruprecht_147', 30, 2.5, 300.])
#inputs.append(['M44','M44', 47, 0.73, 181.5])
sample = wrap(inputs,debug=True)


Currently working in "/home/ekaterina/Documents/Matching_Catalogs"

These are our K2 data:

        EPIC     RAJ2000    DEJ2000       J       H       K      Kp UCAC4  \
0  228682410  132.832921  11.811300     NaN     NaN     NaN  18.530   NaN   
1  228682441  132.877458  11.815250     NaN     NaN     NaN  13.310   NaN   
2  228682412  132.895329  11.704950     NaN     NaN     NaN  19.510   NaN   
3  211410963  132.621046  11.806506  16.039  15.386  15.462  18.602   NaN   
4  211413327  132.620504  11.841414  15.300  14.714  14.462  17.494   NaN   

              2MASS  
0               NaN  
1               NaN  
2               NaN  
3  08502905+1148234  
4  08502891+1150290  

These are our Sampedro data:

        UCAC4  RAJ2000  DEJ2000  ClassM1  ClassM2  ClassM3
0  508-049159  132.594  11.5724        0        0        0
1  508-049161  132.605  11.5835        0        0        0
2  508-049164  132.612  11.5619        0        0        0
3  508-049170  132.634  11.5302        1      