In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
# Watermark is not required for this code, but is included for information. 
import watermark
%load_ext watermark
%watermark -a "ELEANOR LUTZ" -d -v -iv -m

watermark 1.8.1
numpy     1.15.4
pandas    0.23.4
ELEANOR LUTZ 2019-07-26 

CPython 3.7.1
IPython 7.2.0

compiler   : MSC v.1900 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
CPU cores  : 12
interpreter: 64bit


In [3]:
# Read in processed star data (created in 1_process_starbase_data.ipynb)
hip_df = pd.read_csv("./data/processed/hygdata_processed.csv", low_memory=False)

# Exclude "culture" folders that are replicated in other folders. 
# "chinese_medieval" and "western" are used instead of the alternates in the list below ("exclude") 
exclude = ['western_SnT', 'western_hlad', 'western_rey', 'chinese_contemporary', 'chinese']
fabfiles = glob.glob('./data/skycultures/*/')
fabfiles = [x.split("\\")[-2] for x in fabfiles]
fabfiles = [x for x in fabfiles if x not in exclude]

# Print all cultures that will be analyzed in this Jupyter notebook
print(fabfiles)

['arabic', 'arabic_moon_stations', 'armintxe', 'aztec', 'belarusian', 'boorong', 'chinese_medieval', 'dakota', 'egyptian', 'hawaiian_starlines', 'indian', 'inuit', 'japanese_moon_stations', 'kamilaroi', 'korean', 'lokono', 'macedonian', 'maori', 'maya', 'mongolian', 'mulapin', 'navajo', 'norse', 'northern_andes', 'ojibwe', 'romanian', 'sami', 'sardinian', 'seleucid', 'siberian', 'tongan', 'tukano', 'tupi', 'western']


In [4]:
'''
Make a table (df) of all named asterisms, as well as the RA/DEC location information
(this code just uses one of the stars as the plotting location).
The num_pairs column describes how many lines (star pairs) are included in the asterism. 
The result is saved as a separate file for each culture. 
'''

for name in fabfiles: 
    fname = './data/skycultures/'+name+'/constellationship.fab'

    df = pd.read_csv(fname, header=None, encoding='utf-8')
    df['constellation'] = df[0].str.split().str.get(0).astype(str)
    df['num_pairs'] = df[0].str.split().str.get(1)
    df['star'] = df[0].str.split().str[2]
    df.drop(0, axis=1, inplace=True)

    # Merge by names of constellations
    lines = []
    if name == 'chinese_medieval': 
        # names available in original language characters
        readname = './data/skycultures/'+name+'/constellation_names.zh_CN.fab'
    else: 
        readname = './data/skycultures/'+name+'/constellation_names.eng.fab'
    with open(readname, encoding='utf-8') as f: 
        for line in f: 
            if line[0] != '#':
                lines.append(str(line))
    newlines = []
    for line in lines:
        line = line.replace("\n","").replace("_","").split('"')
        line[0] = line[0].strip()
        if len(line[0]) != 0:
            if len(line[1]) == 0:
                line[1] = line[3]
            newlines.append(line)
        
    cs = [x[0] for x in newlines]
    ns = [x[1] for x in newlines]
    df_names = pd.DataFrame.from_dict({"constellation":cs, "name":ns})
    df = pd.merge(df, df_names, on="constellation", how='left')

    # Merge star locations back into original database
    df['ra'] = ''
    df['dec'] = ''
    for index, row in df.iterrows(): 
        star = row['star']
        # Manually fix problematic star IDs
        # SOURCE: Hipparcos catalog. Mapped to HD ID identifier. 
        # http://tdc-www.harvard.edu/catalogs/hipparcos.html
        # https://www.cosmos.esa.int/web/hipparcos/search-facility
        if star == '78727':
            temp = hip_df[hip_df['hd']== 144069.]
        else:
            temp = hip_df[hip_df['hip']==float(star)]
        if len(temp) != 1:
            print(star, 'has issues in merging with HIP catalogue')
            display(temp.head())
        df.at[index, 'ra'] = temp['ra'].tolist()[0]
        df.at[index, 'dec'] = temp['dec'].tolist()[0]

    df['culture'] = name
    df.drop('constellation', axis=1, inplace=True)
    df.sort_values(by='num_pairs', ascending=False, inplace=True)
    df.to_csv('./data/processed/skycultures/asterism_names/'+name+'_names.csv', index=False)
    
display(df.head())

Unnamed: 0,num_pairs,star,name,ra,dec,culture
40,9,114131,Grus,23.1147,-43.5204,western
9,9,71795,Bootes,14.6858,13.7283,western
28,9,94779,Cygnus,19.285,53.3685,western
24,9,91875,Corona Australis,18.7297,-38.3234,western
23,9,53740,Crater,10.9962,-18.2988,western


In [5]:
'''
Map each named asterism to the correct color using the colormap.csv file. 
Combine data from all cultures into one dataframe. 
'''

names = sorted(glob.glob('./data/processed/skycultures/asterism_names/*_names.csv'))
colors = pd.read_csv("./data/processed/colormap.csv")

namedf = pd.DataFrame()
for name in names:
    temp = pd.read_csv(name, encoding="utf-8")
    culture = temp['culture'].unique()
    assert len(culture) == 1
    culture = culture[0]
    color = colors[colors['culture'] == culture]['color'].tolist()
    assert len(color) == 1
    color = color[0]
    temp['color'] = color
    namedf = pd.concat([namedf, temp])
    
namedf.sort_values(by='num_pairs', ascending=False, inplace=True)    
print('Total of', len(namedf), 'named asterisms from', len(namedf.culture.unique()), 'cultures')
display(namedf.head())
namedf.to_csv('./data/processed/named_asterisms_to_plot.csv', index=False)

Total of 1270 named asterisms from 34 cultures


Unnamed: 0,num_pairs,star,name,ra,dec,culture,color
3,58,17772,Yai,3.805021,50.736767,tukano,#ae7b3a
5,43,78820,Aña,16.09062,-19.805453,tukano,#ae7b3a
2,43,78820,Ema (Guira-nhandu),16.09062,-19.805453,tupi,#d23958
3,42,60718,Veado,12.443311,-63.099092,tupi,#d23958
92,39,1170,URimGuun,0.244005,-18.932866,korean,#2f8cae
