In [1]:
import numpy as np
import pandas as pd
import glob, os

In [2]:
# Watermark is not required for this code, but is included for information. 
import watermark
%load_ext watermark
%watermark -a "ELEANOR LUTZ" -d -v -iv -m

numpy     1.15.4
watermark 1.8.1
pandas    0.23.4
ELEANOR LUTZ 2019-07-26 

CPython 3.7.1
IPython 7.2.0

compiler   : MSC v.1900 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
CPU cores  : 12
interpreter: 64bit


In [3]:
# Read in processed star data (created in 1_process_starbase_data.ipynb)
hip_df = pd.read_csv("./data/processed/hygdata_processed.csv", low_memory=False)

# Exclude "culture" folders that are replicated in other folders. 
# "chinese_medieval" and "western" are used instead of the alternates in the list below ("exclude") 
exclude = ['western_SnT', 'western_hlad', 'western_rey', 'chinese_contemporary', 'chinese']
fabfiles = glob.glob('./data/skycultures/*/')
fabfiles = [x.split("\\")[-2] for x in fabfiles]
fabfiles = [x for x in fabfiles if x not in exclude]

# Print all cultures that will be analyzed in this Jupyter notebook
print(fabfiles)

['arabic', 'arabic_moon_stations', 'armintxe', 'aztec', 'belarusian', 'boorong', 'chinese_medieval', 'dakota', 'egyptian', 'hawaiian_starlines', 'indian', 'inuit', 'japanese_moon_stations', 'kamilaroi', 'korean', 'lokono', 'macedonian', 'maori', 'maya', 'mongolian', 'mulapin', 'navajo', 'norse', 'northern_andes', 'ojibwe', 'romanian', 'sami', 'sardinian', 'seleucid', 'siberian', 'tongan', 'tukano', 'tupi', 'western']


In [4]:
'''
Make a table (df) of all named stars, as well as the RA/DEC location information for the star.
The result is saved as a separate file for each culture. 
The file also includes the total number of named stars available for that culture. 
'''

for name in fabfiles:  
    if name == 'chinese_medieval': 
        # names available in original language characters
        fname = './data/skycultures/'+name+'/star_names.zh_CN.fab'
    else:
        fname = './data/skycultures/'+name+'/star_names.fab'
    if os.path.isfile(fname):
        lines = []
        with open(fname, encoding='utf-8') as f: 
            for line in f: 
                if line[0] != '#':
                    lines.append(str(line))
        newlines = []
        for line in lines:
            line = line.replace("\n","").replace("_","").replace("(","").replace(")","").split('|')
            line[0] = ''.join(i for i in line[0] if i.isdigit())
            if len(line[0]) != 0:
                line[1] = line[1].split('"')[1]
                newlines.append(line)
        cs = [x[0] for x in newlines]
        ns = [x[1] for x in newlines]
        df = pd.DataFrame.from_dict({"star":cs, "name":ns})
        
        df['ra'] = ''
        df['dec'] = ''
        for index, row in df.iterrows(): 
            star = row['star']
            # Manually fix problematic star IDs
            # SOURCE: Hipparcos catalog. Mapped to HD ID identifier. 
            # http://tdc-www.harvard.edu/catalogs/hipparcos.html
            # https://www.cosmos.esa.int/web/hipparcos/search-facility
            if star == '78727':
                temp = hip_df[hip_df['hd']== 144069.]
            else:
                try: 
                    temp = hip_df[hip_df['hip']==float(star)]
                    if len(temp) != 1:
                        print(star, 'has issues in merging with HIP catalogue')
                        display(temp.head())
                    df.at[index, 'ra'] = temp['ra'].tolist()[0]
                    df.at[index, 'dec'] = temp['dec'].tolist()[0]
                except:
                    print(star, "not processed for culture", name)

        df['culture'] = name
        df['total_culture_names'] = len(df)
        if len(df) == 0:
            print("No star name data for", name)
        else:
            df.to_csv('./data/processed/skycultures/star_names/'+name+'_names.csv', index=False, encoding='utf-8')
    else:
        print("No star name data for", name) # For reference so I can go through and manually verify 

No star name data for armintxe
No star name data for aztec
No star name data for japanese_moon_stations
No star name data for kamilaroi
No star name data for navajo
No star name data for northern_andes
No star name data for sami
No star name data for tukano


In [5]:
'''
Map each named star to the correct culture color using the colormap.csv file. 
Combine data from all cultures into one dataframe. 
'''

names = sorted(glob.glob('./data/processed/skycultures/star_names/*_names.csv'))
colors = pd.read_csv("./data/processed/colormap.csv")

namedf = pd.DataFrame()
for name in names:
    temp = pd.read_csv(name, encoding="utf-8")
    culture = temp['culture'].unique()
    assert len(culture) == 1
    culture = culture[0]
    color = colors[colors['culture'] == culture]['color'].tolist()
    assert len(color) == 1
    color = color[0]
    temp['color'] = color
    namedf = pd.concat([namedf, temp])
    
namedf.sort_values(by='total_culture_names', ascending=False, inplace=True)    
print('Total of', len(namedf), 'named stars from', len(namedf.culture.unique()), 'cultures')
display(namedf.head())
namedf.to_csv('./data/processed/named_stars_to_plot.csv', index=False, encoding='utf-8')

Total of 2310 named stars from 26 cultures


Unnamed: 0,star,name,ra,dec,culture,total_culture_names,color
875,95771,Anser,19.478427,24.664905,western,876,#5bc4c7
334,83608,Alrakis,17.088929,54.470042,western,876,#5bc4c7
347,86201,Adfar Aldib II,17.615859,68.75797,western,876,#5bc4c7
346,86201,Al Dhih,17.615859,68.75797,western,876,#5bc4c7
345,86620,Dsiban,17.699461,72.156911,western,876,#5bc4c7


In [6]:
'''
For reference, print out the six most named stars in the entire dataset. 
(This data was not saved to file or used directly to plot the map)
'''

df = pd.read_csv('./data/processed/named_stars_to_plot.csv', encoding='utf-8')
display(df.head())
df_grouped = df.groupby('star').count()
df_grouped.sort_values(by='name', ascending=False, inplace=True)
df_grouped.reset_index(inplace=True)
display(df_grouped.head(6))

stars = df[df['star'].isin(df_grouped.head(6)['star'].tolist())]
stars = stars.drop_duplicates('star', keep='first')
display(stars)

Unnamed: 0,star,name,ra,dec,culture,total_culture_names,color
0,95771,Anser,19.478427,24.664905,western,876,#5bc4c7
1,83608,Alrakis,17.088929,54.470042,western,876,#5bc4c7
2,86201,Adfar Aldib II,17.615859,68.75797,western,876,#5bc4c7
3,86201,Al Dhih,17.615859,68.75797,western,876,#5bc4c7
4,86620,Dsiban,17.699461,72.156911,western,876,#5bc4c7


Unnamed: 0,star,name,ra,dec,culture,total_culture_names,color
0,11767,24,24,24,24,24,24
1,80763,19,19,19,19,19,19
2,69673,18,18,18,18,18,18
3,21421,17,17,17,17,17,17
4,32349,17,17,17,17,17,17
5,91262,17,17,17,17,17,17


Unnamed: 0,star,name,ra,dec,culture,total_culture_names,color
259,69673,Arcturus,14.26103,19.18241,western,876,#5bc4c7
471,32349,Aschere,6.752481,-16.716116,western,876,#5bc4c7
634,91262,Vega,18.61564,38.783692,western,876,#5bc4c7
740,11767,Polaris,2.52975,89.264109,western,876,#5bc4c7
814,80763,Antares,16.490128,-26.432002,western,876,#5bc4c7
861,21421,Aldebaran,4.598677,16.509301,western,876,#5bc4c7
