## Import modules

In [1]:
import pandas as pd

## Read in and Inspect Data

In [2]:
moon_df = pd.read_csv('full-moon-calendar-1900-2050/full_moon.csv')
moon_df.head()

Unnamed: 0,Day,Date,Time
0,Monday,15 January 1900,08:07:30 pm
1,Wednesday,14 February 1900,02:50:12 pm
2,Friday,16 March 1900,09:11:48 am
3,Sunday,15 April 1900,02:02:06 am
4,Monday,14 May 1900,04:36:36 pm


In [3]:
births1_df = pd.read_csv('fivethirtyeight-births-dataset/US_births_1994-2003_CDC_NCHS.csv')
births1_df.head()

Unnamed: 0,year,month,date_of_month,day_of_week,births
0,1994,1,1,6,8096
1,1994,1,2,7,7772
2,1994,1,3,1,10142
3,1994,1,4,2,11248
4,1994,1,5,3,11053


In [4]:
births2_df = pd.read_csv('fivethirtyeight-births-dataset/US_births_2000-2014_SSA.csv')
births2_df.head()

Unnamed: 0,year,month,date_of_month,day_of_week,births
0,2000,1,1,6,9083
1,2000,1,2,7,8006
2,2000,1,3,1,11363
3,2000,1,4,2,13032
4,2000,1,5,3,12558


## Combining birth datasets

In [5]:
# Combine birth data into one dataframe
births_df = pd.concat([births1_df, births2_df], ignore_index=True)

In [6]:
births_df.shape

(9131, 5)

In [7]:
# Dropping duplicate dates since the datasets overlap from 2000-2003

births_df.drop_duplicates(subset=['year', 'month', 'date_of_month'], keep='last', inplace=True)

In [8]:
# Verify new number of rows

births_df.shape

(7670, 5)

## Preparing full moon dataset to merge with births

- Full moon dataset should have separate columns for month, day, and year, for easier merging with births

In [None]:
moon_df.columns

In [None]:
# There is an extra space in front of the word 'Date' in the column, so rename

moon_df.rename(columns={' Date': 'Date', ' Time': 'Time'}, inplace=True)

In [None]:
# Create date series from moon_df

moon_date = moon_df.loc[:, 'Date']
moon_date.head()

In [None]:
# Split date into 3 columns

moon_date_df = moon_date.str.split(expand=True)
moon_date_df.head()

In [None]:
# Renaming columns

moon_date_df.rename(columns={0: 'date_of_month', 1: 'month', 2: 'year'}, inplace=True)

In [None]:
# Merging back to moon_df

dates_moon_df = moon_df.merge(moon_date_df, left_index=True, right_index=True)
dates_moon_df.head()

In [None]:
# Something is wrong with the format of the column label "Day". Renaming has not worked, so I'll duplicate the column and drop the original.

# Pull column out as a series

day_of_week = dates_moon_df.iloc[:, 0]
day_of_week.head()

In [None]:
# Add this series as a column back to dates_moon_df
dates_moon_df['day_of_week'] = day_of_week
dates_moon_df.head()

In [None]:
# Drop original Day column using column index

dates_moon_df.drop(dates_moon_df.columns[0], axis=1, inplace=True)
dates_moon_df.head()

## Preparing birth data for merge
 
- For future use in Tableau, which recognizes date data, the month column should have the name of the month rather than a number.