In [1]:
import os
import sys
ROOT_DIR = os.path.dirname(os.path.abspath('..'))
sys.path.insert(0, os.path.abspath(ROOT_DIR))

%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from jupyterthemes import jtplot
jtplot.style()
import seaborn as sns
import statsmodels
import sklearn

In [2]:
df_gdp = pd.read_csv('../data/processed/gdp_per_capita.csv')
df_gdp.head()

Unnamed: 0,Country,Subject,Measure,Year,Value
0,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1970,40314.0
1,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1971,44484.0
2,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1972,49747.0
3,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1973,60272.0
4,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1974,71158.0


In [3]:
df_prod = pd.read_csv('../data/processed/productivity_growth.csv')
df_prod.head()

Unnamed: 0,Country,Subject,Measure,Year,Unit,Value
0,Australia,Total capital services,Annual growth/change,1985,Percentage,5.122257
1,Australia,Total capital services,Annual growth/change,1986,Percentage,4.945708
2,Australia,Total capital services,Annual growth/change,1987,Percentage,4.964987
3,Australia,Total capital services,Annual growth/change,1988,Percentage,5.063412
4,Australia,Total capital services,Annual growth/change,1989,Percentage,4.829607


In [4]:
df_gdp['Subject'].unique()

array(['Gross Domestic Product (GDP); millions',
       'GDP per head of population', 'GDP per hour worked',
       'GDP per person employed',
       'Labour utilisation (hours worked per head of population)',
       'Hours worked for total employment; millions',
       'Gap in GDP per hour worked with respect to the USA',
       'Total population; persons; thousands',
       'Gap in GDP per capita with respect to the USA',
       'Total employment (number of persons employed); thousands',
       'Gap in labour utilisation with respect to the USA',
       'Average hours worked per person employed'], dtype=object)

In [5]:
df_gdp['Country'].unique()

array(['Australia', 'Austria', 'Belgium', 'Canada', 'Czech Republic',
       'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
       'Iceland', 'Ireland', 'Italy', 'Japan', 'Korea', 'Luxembourg',
       'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland',
       'Portugal', 'Slovak Republic', 'Spain', 'Sweden', 'Switzerland',
       'Turkey', 'United Kingdom', 'United States', 'Brazil', 'Chile',
       "China (People's Republic of)", 'Estonia', 'India', 'Indonesia',
       'Israel', 'Russia', 'Slovenia', 'South Africa',
       'BRIICS economies - Brazil, Russia, India, Indonesia, China and South Africa',
       'European Union (28 countries)', 'OECD - Total', 'G7', 'Colombia',
       'Latvia', 'Lithuania', 'Euro area (19 countries)'], dtype=object)

In [6]:
df_gdp['Measure'].unique()

array(['National currency, current prices',
       'USD, current prices, current PPPs',
       'USD, constant prices, 2015 PPPs', 'As % of the USA (USA=100)',
       'Persons/Hours', 'Gap in % points'], dtype=object)

In [7]:
def make_prod_gdp_data(df_prod, df_gdp):
    mutual_countries = [c for c in df_gdp['Country'].unique() if c in df_prod['Country'].unique()]
    qry = " | ".join([f"(Country == '{i}')" for i in mutual_countries])
    dff_prod = df_prod.query(qry, engine='python')
    dff_gdp = df_gdp.query(qry, engine='python')
    dff_gdp = dff_gdp[dff_gdp['Measure'] == 'USD, current prices, current PPPs']
    
    return(dff_prod, dff_gdp)

dff_prod, dff_gdp = make_prod_gdp_data(df_prod, df_gdp)


In [8]:
dff_prod.head()

Unnamed: 0,Country,Subject,Measure,Year,Unit,Value
0,Australia,Total capital services,Annual growth/change,1985,Percentage,5.122257
1,Australia,Total capital services,Annual growth/change,1986,Percentage,4.945708
2,Australia,Total capital services,Annual growth/change,1987,Percentage,4.964987
3,Australia,Total capital services,Annual growth/change,1988,Percentage,5.063412
4,Australia,Total capital services,Annual growth/change,1989,Percentage,4.829607


In [9]:
dff_prod.Year.unique()

array([1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995,
       1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
       2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
       2018, 2019, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979,
       1980, 1981, 1982, 1983, 1984], dtype=int64)

In [10]:
dff_gdp.Year.unique()

array([1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980,
       1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991,
       1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
       2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015, 2016, 2017, 2018, 2019], dtype=int64)

In [11]:
dff_gdp.head()

Unnamed: 0,Country,Subject,Measure,Year,Value
200,Australia,Gross Domestic Product (GDP); millions,"USD, current prices, current PPPs",1970,58744.34
201,Australia,Gross Domestic Product (GDP); millions,"USD, current prices, current PPPs",1971,64010.806
202,Australia,Gross Domestic Product (GDP); millions,"USD, current prices, current PPPs",1972,69668.594
203,Australia,Gross Domestic Product (GDP); millions,"USD, current prices, current PPPs",1973,78686.076
204,Australia,Gross Domestic Product (GDP); millions,"USD, current prices, current PPPs",1974,86151.644


In [12]:
dff_gdp.Year.dtype

dtype('int64')

In [13]:
dff_prod.Year.dtype

dtype('int64')

In [14]:
# dff_prod.Year = dff_prod.Year.astype('str')
# dff_gdp.Year = dff_gdp.Year.astype('str')

In [15]:
df_gdp.head()

Unnamed: 0,Country,Subject,Measure,Year,Value
0,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1970,40314.0
1,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1971,44484.0
2,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1972,49747.0
3,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1973,60272.0
4,Australia,Gross Domestic Product (GDP); millions,"National currency, current prices",1974,71158.0


In [16]:
dffp_gdp = pd.pivot_table(dff_gdp, values='Value', index=['Country', 'Year'], columns='Subject').reset_index()
dffp_gdp.head()

Subject,Country,Year,GDP per head of population,GDP per hour worked,GDP per person employed,Gross Domestic Product (GDP); millions
0,Australia,1970,4638.882126,5.762338,10750.133182,58744.34
1,Australia,1971,4849.8987,6.193551,11443.716691,64010.806
2,Australia,1972,5195.547594,6.713858,12273.964954,69668.594
3,Australia,1973,5779.645057,7.434041,13495.292839,78686.076
4,Australia,1974,6228.439924,7.903118,14477.85997,86151.644


In [17]:
dfm = pd.merge(dffp_gdp, dff_prod, how='left', left_on=['Country', 'Year'], right_on=['Country', 'Year'])

In [18]:
dfm.dropna()

Unnamed: 0,Country,Year,GDP per head of population,GDP per hour worked,GDP per person employed,Gross Domestic Product (GDP); millions,Subject,Measure,Unit,Value
1,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,"GDP per hour worked, constant prices",Annual growth/change,Percentage,2.499595
2,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,Total hours worked,Annual growth/change,Percentage,1.378602
3,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,Average hours worked per person employed,Annual growth/change,Percentage,-0.959623
4,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,"GDP per person employed, constant prices",Annual growth/change,Percentage,1.515985
5,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,Total employment (number of persons employed),Annual growth/change,Percentage,2.360881
6,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,Labour utilisation (=hours worked per head of ...,Annual growth/change,Percentage,-2.730124
7,Australia,1971,4849.898700,6.193551,11443.716691,6.401081e+04,"GDP per capita, constant prices",Annual growth/change,Percentage,-0.298772
8,Australia,1972,5195.547594,6.713858,12273.964954,6.966859e+04,"GDP per hour worked, constant prices",Annual growth/change,Percentage,2.200434
9,Australia,1972,5195.547594,6.713858,12273.964954,6.966859e+04,Total hours worked,Annual growth/change,Percentage,0.404068
10,Australia,1972,5195.547594,6.713858,12273.964954,6.966859e+04,Average hours worked per person employed,Annual growth/change,Percentage,-1.056939
