In [1]:
# Dependencies

import requests
from config import api_key
import pandas as pd
import datetime as dt
from datetime import datetime as ds
import json
from pprint import pprint
import sqlalchemy
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

### Getting Inflation data for US by reading HTML

In [2]:
# Define the url to read the html

url = 'https://www.in2013dollars.com/us/inflation/1930?amount=1'

In [3]:
# Read the web page and print results

cpi_inflation = pd.read_html(url)
cpi_inflation

[                            0               1
 0     Cumulative price change       1,534.75%
 1      Average inflation rate           3.12%
 2  Converted amount ($1 base)          $16.35
 3  Price difference ($1 base)          $15.35
 4                 CPI in 1930          16.700
 5                 CPI in 2021         273.003
 6           Inflation in 1930          -2.34%
 7           Inflation in 2021           5.37%
 8                  $1 in 1930  $16.35 in 2021,
     Year Dollar Value Inflation Rate
 0   1930        $1.00         -2.34%
 1   1931        $0.91         -8.98%
 2   1932        $0.82         -9.87%
 3   1933        $0.78         -5.11%
 4   1934        $0.80          3.08%
 ..   ...          ...            ...
 87  2017       $14.68          2.13%
 88  2018       $15.04          2.49%
 89  2019       $15.31          1.76%
 90  2020       $15.50          1.23%
 91  2021       $16.35         5.48%*
 
 [92 rows x 3 columns],
                  Initial value              Eq

In [4]:
# Select the required table for CPI and Inflation from the page and build the required dataframe

cpi_inflation_df = cpi_inflation[1]
cpi_inflation_df 

Unnamed: 0,Year,Dollar Value,Inflation Rate
0,1930,$1.00,-2.34%
1,1931,$0.91,-8.98%
2,1932,$0.82,-9.87%
3,1933,$0.78,-5.11%
4,1934,$0.80,3.08%
...,...,...,...
87,2017,$14.68,2.13%
88,2018,$15.04,2.49%
89,2019,$15.31,1.76%
90,2020,$15.50,1.23%


In [5]:
cpi_inflation_df = cpi_inflation_df.replace('\$','',regex=True)
cpi_inflation_df = cpi_inflation_df.replace('\%','',regex=True)
cpi_inflation_df = cpi_inflation_df.replace('\*','',regex=True)
cpi_inflation_df = cpi_inflation_df.astype(float)
cpi_inflation_df

Unnamed: 0,Year,Dollar Value,Inflation Rate
0,1930.0,1.00,-2.34
1,1931.0,0.91,-8.98
2,1932.0,0.82,-9.87
3,1933.0,0.78,-5.11
4,1934.0,0.80,3.08
...,...,...,...
87,2017.0,14.68,2.13
88,2018.0,15.04,2.49
89,2019.0,15.31,1.76
90,2020.0,15.50,1.23


In [6]:
# Rename Columns

cpi_inflation_df.columns = ['date', 'dollar_value', 'inflation_rate']
cpi_inflation_df

Unnamed: 0,date,dollar_value,inflation_rate
0,1930.0,1.00,-2.34
1,1931.0,0.91,-8.98
2,1932.0,0.82,-9.87
3,1933.0,0.78,-5.11
4,1934.0,0.80,3.08
...,...,...,...
87,2017.0,14.68,2.13
88,2018.0,15.04,2.49
89,2019.0,15.31,1.76
90,2020.0,15.50,1.23


### Getting GDP data from St. Louis Fed. using an API call

In [7]:
#https://alfred.stlouisfed.org/series/downloaddata?seid=MABMM301USA657S

url = 'https://api.stlouisfed.org/fred/series/observations?series_id=A191RL1A225NBEA'
realtime_start = '1958-01-01'
realtime_end = '2021-01-31'

In [8]:
# Build query URL

query_url = url + "&realtime_start=" + realtime_start + "&realtime_end=" + realtime_end + "&api_key=" + api_key + "&file_type=json"

In [None]:
# Print the results of the query in a json format

response = requests.get(query_url).json()
pprint(response)

In [None]:
# Select the observations from the response

pprint(response['observations'])

In [None]:
# Dump the results into pandas dataframe

gdp_df = pd.DataFrame(response['observations'])
gdp_df

In [None]:
# Select the required columns 'date' and 'value' for the analysis

gdp_df = gdp_df[['date', 'value']]
gdp_df

In [None]:
# Rename Columns

gdp_df.columns = ['date', 'gdp_rate']
gdp_df

In [None]:
# Change the dtype of the Year column from 'object' to 'datetime' to be able to select the year portion of date

gdp_df['date'] = [ds.strptime(x,'%Y-%m-%d') for x in gdp_df['date']]

In [None]:
# Select just the years from the dates

gdp_df['date'] = gdp_df['date'].dt.year
gdp_df

In [None]:
# Drop the duplicated year values

gdp_df = gdp_df.drop_duplicates(keep='last', subset=['date'])
gdp_df.astype(float)
gdp_df

### Reading xls to obtain Money Stock (M3) in the US

In [None]:
# Read United States values M3 values from 1958 into a dataframe

m3_data = pd.read_excel('us_m3.xls')
m3_data

In [None]:
# Rename the columns

m3_data.columns = ['date', 'm3_rate']
m3_data

In [None]:
#Change date format to year to match the CPI_Inflation table

m3_data['date'] = m3_data['date'].dt.year
m3_data = m3_data.round(decimals=2)
m3_data


### Merge Tables

In [None]:
new_df = pd.merge(cpi_inflation_df, gdp_df, on='date', how='outer')
new_df

In [None]:
combined_df = pd.merge(new_df, m3_data, on='date', how='outer')
combined_df

In [None]:
## Drop Nulls
clean_df = combined_df.dropna(how='any')
clean_df

In [None]:
##plt.plot(clean_df)
clean_df.dtypes

### Create database connection and load the individual tables into database

In [None]:
connection_string = "postgres:Bias1981@localhost:5432/economic_indicators"
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
cpi_inflation_df.to_sql(name='cpi_inflation', con=engine, if_exists='replace', index=False)

In [None]:
gdp_df.to_sql(name='gdp', con=engine, if_exists='replace', index=False)

In [None]:
m3_data.to_sql(name='m3', con=engine, if_exists='replace', index=False)

In [None]:
# Confirm tables exist in database
engine.table_names()

### Query the tables

In [None]:
pd.read_sql_query('select * from cpi_inflation', con=engine).head()

In [None]:
pd.read_sql_query('select * from gdp', con=engine).head()

In [None]:
pd.read_sql_query('select * from m3', con=engine).head()