# BDA3 Section 21.2 birthdays and birthdates example

In [None]:
import numpy as np
import scipy as sc
import pandas as pd
import matplotlib as plc
import seaborn as sb
from urllib.request import urlopen
import datetime as dt
%matplotlib inline

## Read birthdates data from the web

In [None]:
# Data from National Vital Statistics System mortality data
# Provided by Michael Kern using Google BigQuery
# Referenced in BDA3 p.516

url="http://www.mechanicalkern.com/static/birthdates-1968-1988.csv"   # Birth dates data URL

f = urlopen(url)                                                      # get file object for URL contents

bdates = pd.read_csv(f)                                               # create pandas DataFrame with read_csv()

bdates.head()                                                         # take a look at a few records

## Convert y-m-d values to pandas datetime values

In [None]:
bdates['dt'] = pd.Series(pd.to_datetime(bdates.year.astype(str)+'-'+bdates.month.astype(str)+'-'+bdates.day.astype(str)))

## Generate a sequential time index

In [None]:
bdates['t'] = (bdates.dt-pd.to_datetime('1968-12-31'))/dt.timedelta(days=1)

##  Generate index s for yearly smooth seasonal pattern

In [None]:
bdates['s'] = bdates.t % 365.25             # s is sequential day number t mod 365.25
bdates.head()

## Generate special days indicators

In [None]:
bdates['spec_01_01'] = pd.Series(((bdates.month==1) & (bdates.day==1)))

bdates['spec_02_14'] = pd.Series(((bdates.month==2) & (bdates.day==14)))

bdates['spec_02_29'] = pd.Series(((bdates.month==2) & (bdates.day==29)))

bdates['spec_04_01'] = pd.Series(((bdates.month==4) & (bdates.day==1)))

bdates['spec_07_04'] = pd.Series(((bdates.month==7) & (bdates.day==4)))

bdates['spec_10_31'] = pd.Series(((bdates.month==10) & (bdates.day==31)))

bdates['spec_12_25'] = pd.Series(((bdates.month==12) & (bdates.day==25)))

bdates['spec_12_26'] = pd.Series(((bdates.month==12) & (bdates.day==26)))

bdates['spec_12_27'] = pd.Series(((bdates.month==12) & (bdates.day==27)))

bdates['spec_12_28'] = pd.Series(((bdates.month==12) & (bdates.day==28)))

bdates['spec_12_29'] = pd.Series(((bdates.month==12) & (bdates.day==29)))

bdates['spec_12_30'] = pd.Series(((bdates.month==12) & (bdates.day==30)))

bdates['spec_12_31'] = pd.Series(((bdates.month==12) & (bdates.day==31)))

bdates.head()

## Generate weekend indicator

In [None]:
bdates['weekend'] = pd.Series(((bdates.day_of_week==6) | (bdates.day_of_week==7)))
bdates

In [None]:
bdates.to_csv("BDA3_birthdays_and_birthdates.csv")