In [1]:
import pandas as pd
import numpy as np

In [2]:
# Watermark is not required for this code, but is included for information. 
import watermark
%load_ext watermark
%watermark -a "ELEANOR LUTZ" -d -v -iv -m

watermark 1.8.1
numpy     1.15.4
pandas    0.23.4
ELEANOR LUTZ 2019-07-11 

CPython 3.7.1
IPython 7.2.0

compiler   : MSC v.1900 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
CPU cores  : 12
interpreter: 64bit


## Data Source
The asterism data used in this Jupyter Notebook is from the open-source [Stellarium Astronomy Software](https://stellarium.org/). 

In [3]:
df = pd.read_csv('./data/stellarium_western_asterisms/constellationship.fab', header=None)
df['constellation'] = df[0].str.split().str.get(0)
df['num_pairs'] = df[0].str.split().str.get(1)
df['stars'] = df[0].str.split().str[2:]
df.drop(0, axis=1, inplace=True)
display(df.head())

df_names = pd.read_csv('./data/stellarium_western_asterisms/constellation_names.eng.fab', header=None)
df_names = df_names[0].str.replace('\t', '').str.split('"', expand=True)
df_names.drop([2, 3, 4], axis=1, inplace=True)
df_names.columns = ['constellation', 'name']
display(df_names.head())

assert len(df) == len(df_names)
df = pd.merge(df, df_names, on="constellation")
display(df.head())

Unnamed: 0,constellation,num_pairs,stars
0,Aql,8,"[98036, 97649, 97649, 97278, 97649, 95501, 955..."
1,And,5,"[677, 3092, 3092, 5447, 9640, 5447, 5447, 4436..."
2,Scl,3,"[116231, 4577, 4577, 115102, 115102, 116231]"
3,Ara,7,"[88714, 85792, 85792, 83081, 83081, 82363, 823..."
4,Lib,5,"[77853, 76333, 76333, 74785, 74785, 72622, 726..."


Unnamed: 0,constellation,name
0,Aql,Aquila
1,And,Andromeda
2,Scl,Sculptor
3,Ara,Ara
4,Lib,Libra


Unnamed: 0,constellation,num_pairs,stars,name
0,Aql,8,"[98036, 97649, 97649, 97278, 97649, 95501, 955...",Aquila
1,And,5,"[677, 3092, 3092, 5447, 9640, 5447, 5447, 4436...",Andromeda
2,Scl,3,"[116231, 4577, 4577, 115102, 115102, 116231]",Sculptor
3,Ara,7,"[88714, 85792, 85792, 83081, 83081, 82363, 823...",Ara
4,Lib,5,"[77853, 76333, 76333, 74785, 74785, 72622, 726...",Libra


In [4]:
stars = [float(y) for x in df['stars'].tolist() for y in x]
stars = sorted(set(stars))

hip_df = pd.read_csv('./data/processed/hygdata_processed.csv', low_memory=False)
ras, decs, = [], []
for star in stars: 
    temp = hip_df[hip_df['hip']==star]
    assert len(temp) == 1
    ras.append(temp['ra'].tolist()[0])
    decs.append(temp['dec'].tolist()[0])

star_df = pd.DataFrame(data={'star_ID':stars, 'ra':ras, 'dec':decs})
display(star_df.head())

Unnamed: 0,star_ID,ra,dec
0,677.0,0.139791,29.090432
1,746.0,0.152887,59.14978
2,765.0,0.156836,-45.747426
3,1067.0,0.220598,15.183596
4,1562.0,0.323799,-8.823921


In [5]:
df['ra'] = ''
df['dec'] = ''

for index, row in df.iterrows(): 
    ras, decs = [], []
    for star in row['stars']: 
        temp = hip_df[hip_df['hip']==float(star)]
        assert len(temp) == 1
        ras.append(temp['ra'].tolist()[0])
        decs.append(temp['dec'].tolist()[0])
    df.at[index, 'ra'] = ras
    df.at[index, 'dec'] = decs
    
display(df.head())

Unnamed: 0,constellation,num_pairs,stars,name,ra,dec
0,Aql,8,"[98036, 97649, 97649, 97278, 97649, 95501, 955...",Aquila,"[19.921887, 19.846388, 19.846388, 19.770994, 1...","[6.406763, 8.868322000000003, 8.86832200000000..."
1,And,5,"[677, 3092, 3092, 5447, 9640, 5447, 5447, 4436...",Andromeda,"[0.139791, 0.655462, 0.655462, 1.162194, 2.064...","[29.090432, 30.861024, 30.861024, 35.620558, 4..."
2,Scl,3,"[116231, 4577, 4577, 115102, 115102, 116231]",Sculptor,"[23.549512, 0.976766, 0.976766, 23.313733, 23....","[-37.818268, -29.357449, -29.357449, -32.53202..."
3,Ara,7,"[88714, 85792, 85792, 83081, 83081, 82363, 823...",Ara,"[18.11052, 17.530695, 17.530695, 16.977006, 16...","[-50.09147700000001, -49.876145, -49.876145, -..."
4,Lib,5,"[77853, 76333, 76333, 74785, 74785, 72622, 726...",Libra,"[15.897093, 15.592104999999998, 15.59210499999...","[-16.729293, -14.789537, -14.789537, -9.382917..."


In [6]:
zodiacs = ['Aquarius', 'Aries', 'Cancer', 'Capricornus', 'Gemini', 'Leo', 'Libra', 
           'Pisces', 'Sagittarius', 'Scorpius', 'Taurus', 'Virgo']
df['zodiac'] = df['name'].isin(zodiacs)
assert df['zodiac'].sum() == 12
display(df.head())
df.to_csv('./data/processed/asterisms.csv', index=False)

Unnamed: 0,constellation,num_pairs,stars,name,ra,dec,zodiac
0,Aql,8,"[98036, 97649, 97649, 97278, 97649, 95501, 955...",Aquila,"[19.921887, 19.846388, 19.846388, 19.770994, 1...","[6.406763, 8.868322000000003, 8.86832200000000...",False
1,And,5,"[677, 3092, 3092, 5447, 9640, 5447, 5447, 4436...",Andromeda,"[0.139791, 0.655462, 0.655462, 1.162194, 2.064...","[29.090432, 30.861024, 30.861024, 35.620558, 4...",False
2,Scl,3,"[116231, 4577, 4577, 115102, 115102, 116231]",Sculptor,"[23.549512, 0.976766, 0.976766, 23.313733, 23....","[-37.818268, -29.357449, -29.357449, -32.53202...",False
3,Ara,7,"[88714, 85792, 85792, 83081, 83081, 82363, 823...",Ara,"[18.11052, 17.530695, 17.530695, 16.977006, 16...","[-50.09147700000001, -49.876145, -49.876145, -...",False
4,Lib,5,"[77853, 76333, 76333, 74785, 74785, 72622, 726...",Libra,"[15.897093, 15.592104999999998, 15.59210499999...","[-16.729293, -14.789537, -14.789537, -9.382917...",True
