In [1]:
import requests 
from my_api_info import get_noaa_token
from bs4 import BeautifulSoup
from time import sleep
import numpy as np
import pandas as pd

In [2]:
def get_month_rfall(YYYY_MM: str) -> requests.models.Response:
    """
    Given an input string representing a month (in the format 
    YYYY-MM, e.g. '2024-01' for January of 2024), returns Response
    from NOAA API with desired monthly precipitation for each MI 
    station.
    """
    # Desired date range
    start_date = YYYY_MM + '-01'
    end_date = YYYY_MM + '-02'

    # API specifications
    base_url = 'https://www.ncei.noaa.gov/cdo-web/api/v2'
    extension = '/data'
    url = base_url + extension

    headers = {'token':get_noaa_token()}
    parameters = {'datasetid':'GSOM',
                'startdate':start_date,
                'enddate':start_date,
                'units':'metric',
                'datatypeid':'PRCP',
                'station':'GHCND:US',
                'locationid':'FIPS:' + '26',
                'limit':1000,
                'includemetadata':'false'}
    r = requests.get(url,
                    headers=headers,
                    params=parameters)

    return r

In [3]:
# Create the DataFrame which will store all the rainfall data
master_df = pd.DataFrame(
    [],
    index=pd.Index([], name='station'),
    columns=pd.Index([], name='date')
)
master_df.index.name = 'station'

In [4]:
# This is the main scraping loop. Just modify the starting 
# segment of the YYYY and the range.

for j in range(0, 20):
    if j < 10:
        YYYY = '190' + str(j)
    else:
        YYYY = '19' + str(j)
    for i in range(1,13):
        # NOAA API requests limited to 5 per second; I'm 
        # just being cautious here
        sleep(1)

        if i < 10: 
            MM = '0' + str(i)
        else:
            MM = str(i)
        YYYY_MM = YYYY + '-' + MM
        r = get_month_rfall(YYYY_MM)
        status_code = r.status_code
        print(f'Year {j} Month {i}: {status_code}')

        if status_code == 200: # Make sure we've succeeded in our request
            # Create DataFrame
            df = pd.DataFrame.from_dict(r.json()['results'])
            df = df.drop(['datatype', 'attributes'], axis=1)
            df['date'] = df['date'].apply(lambda s : s.split('T')[0][:-3])
            # Check for duplicates in the station IDs
            dups_list = [x for x in df.station.duplicated() if x == True]
            if dups_list != []: 
                print(f'When i = {i}, found duplicate stations!!')
            # Rearrange DataFrame
            df = df.pivot(index='station', columns='date', values='value') 

            # Add DataFrame to master_df, looping through newly-found df
            for station in df.index: 
                master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]  

        else: # If there was an error in accessing the API 
            # First wait a bit and then try again; then just add NaN 
            print(f'Retrying {YYYY_MM}...')
            sleep(5)
            r = get_month_rfall(YYYY_MM)
            status_code = r.status_code
            print(f'Year {j} Month {i}: {status_code}')

            if status_code == 200: # Make sure we've succeeded in our request
                # Create DataFrame
                df = pd.DataFrame.from_dict(r.json()['results'])
                df = df.drop(['datatype', 'attributes'], axis=1)
                df['date'] = df['date'].apply(lambda s : s.split('T')[0][:-3])
                # Check for duplicates in the station IDs
                dups_list = [x for x in df.station.duplicated() if x == True]
                if dups_list != []: 
                    print(f'When i = {i}, found duplicate stations!!')
                # Rearrange DataFrame
                df = df.pivot(index='station', columns='date', values='value') 

                # Add DataFrame to master_df, looping through newly-found df
                for station in df.index: 
                    master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]  

            else: # Second time it fails, add NaNs
                for station in master_df.index:
                    master_df.loc[station, YYYY_MM] = np.nan
                print(f'Inserting NaN column to master_df for {YYYY_MM}')

    

Year 0 Month 1: 200
Year 0 Month 2: 200
Year 0 Month 3: 200
Year 0 Month 4: 200
Year 0 Month 5: 200
Year 0 Month 6: 200
Year 0 Month 7: 200
Year 0 Month 8: 200
Year 0 Month 9: 200
Year 0 Month 10: 200
Year 0 Month 11: 200
Year 0 Month 12: 200
Year 1 Month 1: 200
Year 1 Month 2: 200
Year 1 Month 3: 200
Year 1 Month 4: 200
Year 1 Month 5: 200
Year 1 Month 6: 200
Year 1 Month 7: 200
Year 1 Month 8: 200
Year 1 Month 9: 200
Year 1 Month 10: 200
Year 1 Month 11: 200
Year 1 Month 12: 200
Year 2 Month 1: 200
Year 2 Month 2: 200
Year 2 Month 3: 200
Year 2 Month 4: 200
Year 2 Month 5: 503
Retrying 1902-05...
Year 2 Month 5: 200
Year 2 Month 6: 200
Year 2 Month 7: 200
Year 2 Month 8: 200
Year 2 Month 9: 200
Year 2 Month 10: 200
Year 2 Month 11: 200
Year 2 Month 12: 200
Year 3 Month 1: 200
Year 3 Month 2: 503
Retrying 1903-02...
Year 3 Month 2: 200
Year 3 Month 3: 200
Year 3 Month 4: 200
Year 3 Month 5: 200
Year 3 Month 6: 200
Year 3 Month 7: 200
Year 3 Month 8: 200
Year 3 Month 9: 200
Year 3 Mont

  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 8 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 9 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 10 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 11 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 12 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 13 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 9: 503
Retrying 1914-09...
Year 14 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 14 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 7: 503
Retrying 1915-07...
Year 15 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 11: 503
Retrying 1915-11...
Year 15 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 15 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 16 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 1: 503
Retrying 1917-01...
Year 17 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 17 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 18 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 1: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 2: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 3: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 4: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 5: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 6: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 7: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 8: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 9: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 10: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 11: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


Year 19 Month 12: 200


  master_df.loc[station, YYYY_MM] = df.loc[station, YYYY_MM]


In [5]:
# Display final DataFrame
master_df.head(10)

date,1900-01,1900-02,1900-03,1900-04,1900-05,1900-06,1900-07,1900-08,1900-09,1900-10,...,1919-03,1919-04,1919-05,1919-06,1919-07,1919-08,1919-09,1919-10,1919-11,1919-12
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GHCND:USC00200032,18.4,82.9,73.8,47.1,84.6,80.0,178.8,63.4,53.9,71.8,...,72.0,,137.5,46.6,,,85.5,105.7,45.2,
GHCND:USC00200108,33.8,82.6,,9.5,97.8,56.5,100.8,56.4,52.3,44.7,...,156.3,71.1,,99.1,40.6,68.6,160.0,122.2,41.7,34.4
GHCND:USC00200146,40.4,91.3,50.7,49.8,66.0,31.8,90.9,131.1,53.6,71.4,...,127.5,60.2,114.3,87.4,56.6,53.0,95.0,104.5,63.0,26.9
GHCND:USC00200230,27.0,94.4,68.5,36.5,124.7,58.5,74.0,51.8,28.0,56.6,...,88.8,137.0,106.0,110.3,39.0,77.9,67.8,74.6,74.9,13.0
GHCND:USC00200446,25.4,36.7,20.7,,31.7,102.3,76.2,,94.6,97.3,...,,,,,,,,,,
GHCND:USC00200457,25.1,97.6,54.1,19.3,90.6,85.7,96.4,137.7,50.7,107.9,...,,,,,,,,,,
GHCND:USC00200568,37.9,74.5,35.4,52.6,64.9,36.2,118.0,123.9,32.3,90.7,...,79.1,52.3,85.9,22.3,75.0,37.1,95.8,95.1,45.5,11.3
GHCND:USC00200723,40.9,99.9,63.3,25.1,94.7,101.8,61.9,149.7,34.2,43.1,...,,,,,,,,,,
GHCND:USC00200735,101.3,203.1,80.0,50.8,113.2,36.1,148.7,198.5,17.7,50.3,...,,,,,,,,,,
GHCND:USC00200779,75.7,60.5,40.8,66.7,50.1,58.3,113.1,96.8,76.8,96.0,...,117.0,58.6,89.4,56.6,92.4,33.6,53.6,155.4,58.0,14.9


In [6]:
# uncomment to save as CSV file
master_df.to_csv('data/1900-1919.csv')