# Reproduction of "ALBERTA’S FISCAL RESPONSES TO FLUCTUATIONS IN NON-RENEWABLE-RESOURCE REVENUE" in python

This notebook attempts to replicate the empirical results, tables, and figures produced in the paper by Ergete Ferede, published by the University of Calgary school of public policy in Volume 11:24, September 2018.
The original paper is here: [https://www.policyschool.ca/wp-content/uploads/2018/09/NRR-Ferede.pdf](https://www.policyschool.ca/wp-content/uploads/2018/09/NRR-Ferede.pdf)

## Setup and data acquisition

This section of the code loads required modules, downloads the required data sets, and reads them into DataFrames

In [84]:
from pathlib import Path
import requests
import pandas as pd
import stats_can

In [91]:
def download_data():
    """Download the excel file for the analysis from the policy school page
    
    Note the readme sheet on the first file. Credit to Kneebone and Wilkins for
    assembling it, and policy school for hosting it
    
    Returns
    -------
    fname: pathlib.Path
        A path object with the location and name of the data
    """
    print('Downloading data set')
    url = 'https://www.policyschool.ca/wp-content/uploads/2019/01/Provincial-Government-Budget-Data-January-2019FINAL-USE.xlsx'
    response = requests.get(
        url,
        stream=True,
        headers={'user-agent': None}
    )
    fname = Path('.').joinpath('data').joinpath('budgets.xlsx')
    with open (fname, 'wb') as outfile:
        for chunk in response.iter_content(chunk_size=512):
            if chunk: # filter out keep-alive new chunks
                outfile.write(chunk)
    return fname

def get_file():
    """Return the data set, download if necessary"""
    fname = Path('.').joinpath('data').joinpath('budgets.xlsx')
    if not fname.exists():
        download_data()
    return fname


def get_date_index(df):
    """Helper function to turn budget year strings into datetimes"""
    date_index = pd.to_datetime(
        df
        .assign(year=lambda df: df['budget_yr'].str[0:4].astype(int))
        .assign(month=1)
        .assign(day=1)
        [['year', 'month', 'day']]
    )
    return date_index


def read_ab():
    """Read Alberta budget data 
    
    Downloads the data if necessary, reads it in and gives
    the variables easier to work with names
    
    Returns
    -------
    df: pandas.DataFrame
        Alberta's revenue and expenditure tables
    """
    fname = get_file()
    df = (
        pd.read_excel(
            fname,
            sheet_name='Alberta',
            header=3,
            index_col=1,
            skipfooter=21
        )
        # Because of the merged cells we get an empty first row
        .loc[lambda x: x.index.notnull()]
        # Not sure where the empty first column comes from but drop it
        .drop(columns='Unnamed: 0')
        .reset_index()
        .rename(columns={
            'index': 'budget_yr',
            'Personal Income Tax': 'personal_income_tax',
            'Corporation Income Tax': 'corporate_income_tax',
            'Retail Sales Tax': 'retail_sales_tax',
            'Federal Cash Transfers': 'federal_cash_transfers',
            'Natural Resource Revenue': 'natural_resource_revenue',
            'Other Own-Source Revenue': 'other_own_source_revenue',
            'Total Revenue': 'total_revenue',
            'Health': 'health_exp',
            'Social Services': 'social_services_exp',
            'Education': 'education_exp',
            'Other Program Expenditures': 'other_program_exp',
            'Total Program Expenditures': 'total_prog_exp',
            'Debt Service': 'debt_service',
            'Total  Expenditures': 'total_exp',
            'Unnamed: 16': 'annual_deficit'
        })
    )  
    df = df.set_index(get_date_index(df))
    return df

def read_heritage():
    """Heritage Trust fund deposits are separate"""
    fname = get_file()
    df = (
        pd.read_excel(
            fname,
            sheet_name='Alberta',
            header=None,
            names=['budget_yr', 'heritage_deposit'],
            usecols='D,E',
            skiprows=68
        )
        .set_index('budget_yr')
    )
    # A couple observations are missing from the policy school set
    # See https://www.utpjournals.press/doi/pdf/10.3138/cpp.2015-046
    df.loc['2005-06'] = 1750
    df.loc['2006-07'] = 1250
    df.loc['2007-08'] = 918
    
    # Add a dummy variable to indicate heritage fund deposit years
    df['heritage_dummy'] = 1
    
    # Get a datetime index
    date_index = get_date_index(df.reset_index())
    df = df.reset_index().set_index(date_index)
    return df


def per_capita_data():
    """Read in population data to calculate per capita estimates
    
    Rolling mean indexed on January year N+1 is the March to March
    average population for fiscal year N
    Applying a date offset of -1 year and taking only
    January data of these rolling means gives us a population average
    on the same basis as the fiscal tables
    """
    table = '17-10-0009-01'
    df = (
        stats_can.table_to_df(table, path='data')
        .loc[lambda x: x['GEO'] == 'Alberta']
        .loc[lambda x: x['REF_DATE'] >= '1965']
        .set_index('REF_DATE')
        [['VALUE']]
        .rename(columns={'VALUE':'population'})
        .rolling(4, closed='left')
        .mean()
        .reset_index()
        .assign(new_index=lambda df: df['REF_DATE'] - pd.DateOffset(years=1))
        .loc[lambda x: x['new_index'].dt.year >= 1965]
        .loc[lambda x: x['new_index'].dt.month == 1]
        .drop(columns='REF_DATE')
        .set_index('new_index')
        .copy()
    )
    return df

In [92]:
per_capita_data()

Unnamed: 0_level_0,population
new_index,Unnamed: 1_level_1
1965-01-01,1451250.0
1966-01-01,1467500.0
1967-01-01,1496750.0
1968-01-01,1531750.0
1969-01-01,1566250.0
1970-01-01,1602250.0
1971-01-01,1661085.0
1972-01-01,1699308.25
1973-01-01,1729643.75
1974-01-01,1763512.75


## Specification and data



In [49]:
df = read_ab()

In [50]:
df

Unnamed: 0,budget_yr,personal_income_tax,corporate_income_tax,retail_sales_tax,federal_cash_transfers,natural_resource_revenue,other_own_source_revenue,total_revenue,health_exp,social_services_exp,education_exp,other_program_exp,total_prog_exp,debt_service,total_exp,annual_deficit
1965-01-01,1965-66,41.0,31.0,0.0,76.0,248.0,180.0,576.0,114.0,46.0,123.0,131.0,413.0,2.0,415.0,-160.0
1966-01-01,1966-67,56.0,27.0,0.0,97.0,240.0,190.0,609.0,140.0,51.0,169.0,178.0,538.0,2.0,540.0,-70.0
1967-01-01,1967-68,80.0,40.0,0.0,120.0,219.0,341.0,800.0,174.0,56.0,207.0,433.0,870.0,2.0,872.0,72.0
1968-01-01,1968-69,98.0,50.0,0.0,147.0,284.0,404.0,983.0,197.0,63.0,274.0,378.0,912.0,4.0,916.0,-67.0
1969-01-01,1969-70,134.0,64.0,0.0,162.0,255.0,384.0,1000.0,209.0,74.0,308.0,391.0,982.0,4.0,986.0,-14.0
1970-01-01,1970-71,182.0,58.0,0.0,194.0,231.0,452.0,1116.0,234.0,89.0,379.0,418.0,1120.0,9.0,1129.0,12.0
1971-01-01,1971-72,192.0,68.0,0.0,241.0,273.0,472.0,1247.0,261.0,102.0,302.0,586.0,1250.0,17.0,1267.0,20.0
1972-01-01,1972-73,232.0,98.0,0.0,225.0,331.0,523.0,1409.0,270.0,129.0,426.0,523.0,1348.0,23.0,1371.0,-38.0
1973-01-01,1973-74,289.0,113.0,0.0,343.0,639.0,363.0,1748.0,293.0,155.0,450.0,582.0,1480.0,24.0,1504.0,-244.0
1974-01-01,1974-75,347.0,277.0,0.0,498.0,1520.0,288.0,2930.0,396.0,182.0,517.0,952.0,2046.0,31.0,2077.0,-853.0
