In [1]:
import os
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_rows', 20)

<br>

# Functions

In [2]:
def M2Q(df_in):
    d = {"M03":"Q1", "M06":"Q2", "M09":"Q3", "M12":"Q4"}
    df_out = df_in[df_in['TIME'].apply(lambda x: x[-3:] in list(d.keys()))].copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: x[:4]+d[x[-3:]]).copy()
    return(df_out)

def Qp1(df_in):
    d = {"Q1":"Q2", "Q2":"Q3", "Q3":"Q4", "Q4":"Q1"}
    df_out = df_in.copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: str(int(x[:-2])+1)+d[x[-2:]] if x[-1:] == "4" else x[:-2]+d[x[-2:]]).copy()
    return(df_out)

<br>

# Data

In [3]:
path = "./data/csvs/"
!sed -i "s/://g" ./data/csvs/*

>## Quarterly

In [4]:
# GDP and main components
namq_10_gdp = pd.read_csv(path+"namq_10_gdp_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_gdp_lag = Qp1(namq_10_gdp.copy())
# Final consumption aggregates
namq_10_fcs = pd.read_csv(path+"namq_10_fcs_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_fcs_lag = Qp1(namq_10_fcs.copy())

>## Monthly

In [5]:
# Sentiment indicators - monthly data
ei_bssi_m_r2 = M2Q(pd.read_csv(path+"ei_bssi_m_r2_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

**Dealing with ids:**

In [6]:
for df in [namq_10_gdp, namq_10_gdp_lag, namq_10_fcs, namq_10_fcs_lag, ei_bssi_m_r2]:
    df["ID"] = df["TIME"]+df["GEO"]
    df.Value = df.Value.replace(regex=r'[,]+', value='').astype(np.float)

<br>

# $C$

In [7]:
namq_10_fcs.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
168,1978Q1,NO,Norway,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,P311_S14,"Final consumption expenditure of households, durable goods",1000.2,1978Q1NO
169,1978Q1,NO,Norway,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,P312N_S14,"Final consumption expenditure of households, semi-durable goods, non-durable goods and services",12082.9,1978Q1NO


In [8]:
namq_10_fcs[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates()

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
168,P311_S14,"Final consumption expenditure of households, durable goods"
169,P312N_S14,"Final consumption expenditure of households, semi-durable goods, non-durable goods and services"


In [9]:
namq_10_fcs[["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
168,CLV10_MEUR,"Chain linked volumes (2010), million euro"
350,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
892,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


>## $Cd$

In [32]:
df_in = pd.DataFrame(sorted(list(set(namq_10_fcs.ID))), columns=["ID"])

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P311_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Chain linked volumes, percentage change on previous period
var = "CLV_PCH_PRE"
selec = namq_10_fcs_lag[(namq_10_fcs_lag.NA_ITEM=="P311_S14")&(namq_10_fcs_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Consumer confidence indicator
var = "BS-CSMCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Retail confidence indicator
var = "BS-RCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Dummies
countries = set(namq_10_fcs.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

Cd = df_in.copy(); Cd.set_index("ID", inplace=True) 
del var, selec, df_in

In [33]:
Cd

Unnamed: 0_level_0,CLV_PCH_SM,CLV_PCH_PRE,BS-ESI-I,BS-CSMCI-BAL,BS-RCI-BAL,AT,BE,BG,CZ,DE,...,LT,LU,LV,MT,NL,NO,PL,SE,SI,UK
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1985Q1FR,-1.7,-0.9,84.8,-33.9,-21.9,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1985Q2FR,2.2,0.5,90.2,-28.6,-17.2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1985Q3FR,4.0,2.6,88.4,-32.5,-8.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1985Q4FR,4.0,1.8,94.1,-24.4,-9.3,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1986Q1FR,5.3,-0.9,98.7,-16.5,-5.4,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1986Q2FR,9.1,1.7,94.3,-23.3,-5.6,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1986Q3FR,7.7,6.3,98.3,-20.7,-6.8,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1986Q4FR,10.6,0.5,95.3,-21.8,-5.6,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1987Q1FR,7.4,1.8,95.0,-28.1,-8.2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1987Q2FR,4.8,-1.3,96.9,-25.5,-12.2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
X = sm.add_constant(Cd.loc[:, Cd.columns != "CLV_PCH_SM"].copy())
y = Cd.loc[:, "CLV_PCH_SM"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             CLV_PCH_SM   R-squared:                       0.470
Model:                            OLS   Adj. R-squared:                  0.462
Method:                 Least Squares   F-statistic:                     62.03
Date:                Tue, 13 Nov 2018   Prob (F-statistic):          3.37e-211
Time:                        09:56:42   Log-Likelihood:                -5970.9
No. Observations:                1705   AIC:                         1.199e+04
Df Residuals:                    1680   BIC:                         1.213e+04
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          -16.6517      4.559     -3.653   

In [23]:
y

array([-1.7,  2.2,  4. , ...,  6. ,  3.9,  0.9])

>## $Cnd$

<br>

# $I$

In [12]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
270,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR
280,1975Q1,FR,France,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,-0.7,1975Q1FR


In [13]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P51G"][["NA_ITEM","NA_ITEM_LABEL"]].head(1)

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
14553,P51G,Gross fixed capital formation


In [14]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P51G"][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
14553,CLV10_MEUR,"Chain linked volumes (2010), million euro"
15703,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
19133,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


>## $Igfcf$

<br>

# $G$

In [15]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
270,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR
280,1975Q1,FR,France,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,-0.7,1975Q1FR


In [16]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P3_S13"][["NA_ITEM","NA_ITEM_LABEL"]].head(1)

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
14551,P3_S13,Final consumption expenditure of general government


In [17]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P3_S13"][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
14551,CLV10_MEUR,"Chain linked volumes (2010), million euro"
15701,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
19131,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


<br>

# $X$

In [18]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
270,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR
280,1975Q1,FR,France,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,-0.7,1975Q1FR


In [19]:
namq_10_gdp[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates().iloc[4:6,:]

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
14556,P61,Exports of goods
14557,P62,Exports of services


In [20]:
namq_10_gdp[(namq_10_gdp.NA_ITEM == "P61")|(namq_10_gdp.NA_ITEM == "P62")][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
14556,CLV10_MEUR,"Chain linked volumes (2010), million euro"
15706,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
19136,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


>## $Xg$

>## $Xs$

<br>

# $M$

In [21]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
270,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR
280,1975Q1,FR,France,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,-0.7,1975Q1FR


In [22]:
namq_10_gdp[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates().iloc[6:8,:]

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
14558,P71,Imports of goods
14559,P72,Imports of services


In [23]:
namq_10_gdp[(namq_10_gdp.NA_ITEM == "P71")|(namq_10_gdp.NA_ITEM == "P72")][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
14558,CLV10_MEUR,"Chain linked volumes (2010), million euro"
15708,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
19138,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


>## $Mg$

>## $Ms$

<br>

# $\textrm{Confidence Indicators}$

In [14]:
ei_bssi_m_r2[["INDIC","INDIC_LABEL"]].drop_duplicates()

Unnamed: 0,INDIC,INDIC_LABEL
402,BS-CCI-BAL,Construction confidence indicator
403,BS-ESI-I,Economic sentiment indicator
404,BS-ICI-BAL,Industrial confidence indicator
454,BS-CSMCI-BAL,Consumer confidence indicator
9627,BS-RCI-BAL,Retail confidence indicator
18875,BS-SCI-BAL,Services Confidence Indicator


In [35]:
ei_bssi_m_r2

Unnamed: 0,TIME,GEO,GEO_LABEL,INDIC,INDIC_LABEL,S_ADJ,S_ADJ_LABEL,Value,ID
402,1980Q1,DK,Denmark,BS-CCI-BAL,Construction confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-6.4,1980Q1DK
403,1980Q1,DK,Denmark,BS-ESI-I,Economic sentiment indicator,SA,"Seasonally adjusted data, not calendar adjusted data",106.1,1980Q1DK
404,1980Q1,DK,Denmark,BS-ICI-BAL,Industrial confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",5.3,1980Q1DK
408,1980Q1,DE,Germany (until 1990 former territory of the FRG),BS-CCI-BAL,Construction confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-13.1,1980Q1DE
409,1980Q1,DE,Germany (until 1990 former territory of the FRG),BS-ESI-I,Economic sentiment indicator,SA,"Seasonally adjusted data, not calendar adjusted data",101.1,1980Q1DE
410,1980Q1,DE,Germany (until 1990 former territory of the FRG),BS-ICI-BAL,Industrial confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-7.2,1980Q1DE
438,1980Q1,FR,France,BS-CCI-BAL,Construction confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-18.6,1980Q1FR
450,1980Q1,IT,Italy,BS-CCI-BAL,Construction confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-27.2,1980Q1IT
451,1980Q1,IT,Italy,BS-ESI-I,Economic sentiment indicator,SA,"Seasonally adjusted data, not calendar adjusted data",97.7,1980Q1IT
452,1980Q1,IT,Italy,BS-ICI-BAL,Industrial confidence indicator,SA,"Seasonally adjusted data, not calendar adjusted data",-2.0,1980Q1IT


<br>