In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl

US Birthrate Data

In [None]:
# shell command to download the data:
!cd data && curl -O https://raw.githubusercontent.com/jakevdp/data-CDCbirths/master/births.csv

In [None]:
births = pd.read_csv('data/births.csv')
births.head()

In [None]:
births['decade'] = 10 * (births['year'] // 10)
births.head()

In [None]:
births.pivot_table('births', index='decade', columns='gender', aggfunc='sum')

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
births.pivot_table('births', index='year', columns='gender', aggfunc='sum').plot()
plt.ylabel('total births per year')
plt.show()

In [None]:
# sigma clipping
quartiles = np.percentile(births['births'], [25, 50, 75])
mu = quartiles[1]
sig = 0.74 * (quartiles[2] - quartiles[0])

In [None]:
births = births.query('(births > @mu - 5 * @sig) & (births < @mu + 5 * @sig)')

In [None]:
births ['day'] = births['day'].astype(int)

In [None]:
births.index = pd.to_datetime(10000 * births.year + 100 * births.month + births.day, format='%Y%m%d')
births['dayofweek'] = births.index.dayofweek

In [None]:
births.pivot_table('births', index='dayofweek',
columns='decade', aggfunc='mean').plot()
plt.gca().set(xticks=range(7), xticklabels=['Mon', 'Tues', 'Wed', 'Thurs','Fri', 'Sat', 'Sun'])
plt.ylabel('mean births by day')
plt.show()

In [None]:
births_by_date = births.pivot_table('births', [births.index.month, births.index.day])
births_by_date.head()

In [None]:
from datetime import datetime
births_by_date.index = [datetime(2012, month, day) for (month, day) in births_by_date.index]
births_by_date.head()

In [None]:
fig, ax = plt.subplots(figsize=(12, 4))
births_by_date.plot(ax=ax)
plt.show()