In [1]:
import os
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_rows', 20)

<br>

# Functions

In [2]:
def M2Q(df_in):
    d = {"M03":"Q1", "M06":"Q2", "M09":"Q3", "M12":"Q4"}
    df_out = df_in[df_in['TIME'].apply(lambda x: x[-3:] in list(d.keys()))].copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: x[:4]+d[x[-3:]]).copy()
    return(df_out)

def Qp1(df_in):
    d = {"Q1":"Q2", "Q2":"Q3", "Q3":"Q4", "Q4":"Q1"}
    df_out = df_in.copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: str(int(x[:-2])+1)+d[x[-2:]] if x[-1:] == "4" else x[:-2]+d[x[-2:]]).copy()
    return(df_out)

<br>

# Data

In [3]:
path = "./data/csvs/"
!sed -i "s/://g" ./data/*

In [4]:
#!head -n 1 data/sts_trtu_q_Label.csv

>## Quarterly

In [5]:
# DATASET Construction - quarterly data - index (2015 = 100) (NACE Rev. 2) [ei_isbu_q]
ei_isbu_q = pd.read_csv("./data/ei_isbu_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
ei_isbu_q_lag = Qp1(ei_isbu_q.copy())

# DATASET Final consumption aggregates [namq_10_fcs]
namq_10_fcs = pd.read_csv("./data/namq_10_fcs_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_fcs_lag = Qp1(namq_10_fcs.copy())

# DATASET: GDP and main components  (output, expenditure and income) [namq_10_gdp]
namq_10_gdp = pd.read_csv("./data/namq_10_gdp_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_gdp_lag = Qp1(namq_10_gdp.copy())

# DATASET Building permits - quarterly data [sts_cobp_q]
sts_cobp_q = pd.read_csv("./data/sts_cobp_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_cobp_q_lag = Qp1(sts_cobp_q.copy())

# DATASET Production in construction - quarterly data [sts_copr_q]
sts_copr_q = pd.read_csv("./data/sts_copr_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_copr_q_lag = Qp1(sts_copr_q.copy())

# DATASET Production in industry - quarterly data [sts_inpr_q]
sts_inpr_q = pd.read_csv("./data/sts_inpr_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_inpr_q_lag = Qp1(sts_inpr_q.copy())

# DATASET Turnover in industry, total - quarterly data [sts_intv_q]
sts_intv_q = pd.read_csv("./data/sts_intv_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_intv_q_lag = Qp1(sts_intv_q.copy())

# DATASET Turnover and volume of sales in wholesale and retail trade - quarterly data [sts_trtu_q]
sts_trtu_q = pd.read_csv("./data/sts_trtu_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_trtu_q_lag = Qp1(sts_trtu_q.copy())

>## Monthly

In [6]:
# DATASET Euro-zone Business Climate Indicator - monthly data [ei_bsci_m_r2]
ei_bsci_m_r2_1 = M2Q(pd.read_csv("./data/ei_bsci_m_r2_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

# DATASET Sentiment indicators - monthly data [ei_bssi_m_r2]
ei_bssi_m_r2 = M2Q(pd.read_csv("./data/ei_bssi_m_r2_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

# DATASET Effective exchange rates indices - monthly data [ei_mfef_m]
ei_mfef_m = M2Q(pd.read_csv("./data/ei_mfef_m_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

**Dealing with ids:**

In [7]:
for df in [ei_isbu_q, ei_isbu_q_lag, namq_10_fcs, namq_10_fcs_lag, namq_10_gdp, 
           namq_10_gdp_lag, sts_cobp_q, sts_cobp_q_lag, sts_copr_q, sts_copr_q_lag, 
           sts_inpr_q, sts_inpr_q_lag, sts_intv_q, sts_intv_q_lag, sts_trtu_q, 
           sts_trtu_q_lag, ei_isbu_q, ei_isbu_q_lag, namq_10_fcs, namq_10_fcs_lag, 
           namq_10_gdp, namq_10_gdp_lag, sts_cobp_q, sts_cobp_q_lag, sts_copr_q, sts_copr_q_lag, 
           sts_inpr_q, sts_inpr_q_lag, sts_intv_q, sts_intv_q_lag, sts_trtu_q, sts_trtu_q_lag, ei_bsci_m_r2_1, ei_bssi_m_r2, ei_mfef_m]:
    df["ID"] = df["TIME"]+df["GEO"]
    df.Value = df.Value.replace(regex=r'[,]+', value='').astype(np.float)
    


># OECD

In [8]:
def OECD():
    GEO_LABEL2GEO = dict(zip(namq_10_gdp.GEO_LABEL.values, namq_10_gdp.GEO.values)); GEO_LABEL2GEO["Germany"] = "DE"
    df_in = pd.read_csv("data/KEI_13112018143436794_[v1-oecd].csv")
    df_in["GEO"] = df_in.Country.map(GEO_LABEL2GEO)
    df_in = df_in[df_in.GEO.notnull()]
    df_in["ID"] = df_in.TIME.str[:4]+df_in.TIME.str[-2:]+df_in.GEO
    return(df_in)

In [9]:
OECD = OECD()

In [10]:
OECD[["SUBJECT", "Subject"]].drop_duplicates()

Unnamed: 0,SUBJECT,Subject
115,LRHUTTTT,"Harmonised unemployment rate all persons, s.a."
3580,ULQEUL01,"Unit labour cost Total Economy, s.a."
4179,SLRTCR03,"Passenger car registrations, s.a."


<br>

# $C$

>## $Cd$

In [11]:
namq_10_fcs.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
168,1978Q1,NO,Norway,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,P311_S14,"Final consumption expenditure of households, durable goods",1000.2,1978Q1NO
169,1978Q1,NO,Norway,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,P312N_S14,"Final consumption expenditure of households, semi-durable goods, non-durable goods and services",12082.9,1978Q1NO


In [12]:
namq_10_fcs[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates()

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
168,P311_S14,"Final consumption expenditure of households, durable goods"
169,P312N_S14,"Final consumption expenditure of households, semi-durable goods, non-durable goods and services"


In [13]:
namq_10_fcs[["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
168,CLV10_MEUR,"Chain linked volumes (2010), million euro"
350,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
892,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


In [14]:
df_in = pd.DataFrame(sorted(list(set(namq_10_fcs.ID))), columns=["ID"])

###########################################################################################

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P311_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Chain linked volumes, percentage change on previous period
var = "CLV_PCH_PRE"
selec = namq_10_fcs_lag[(namq_10_fcs_lag.NA_ITEM=="P311_S14")&(namq_10_fcs_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Consumer confidence indicator
var = "BS-CSMCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Retail confidence indicator
var = "BS-RCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

###########################################################################################

# Dummies
countries = set(namq_10_fcs.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

Cd = df_in.copy(); Cd.set_index("ID", inplace=True) 
del var, selec, df_in

###########################################################################################

# Save
Cd.to_csv("./Cd.csv")

###########################################################################################

# Regression
X = sm.add_constant(Cd.loc[:, Cd.columns != "CLV_PCH_SM"].copy())
y = Cd.loc[:, "CLV_PCH_SM"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             CLV_PCH_SM   R-squared:                       0.470
Model:                            OLS   Adj. R-squared:                  0.462
Method:                 Least Squares   F-statistic:                     62.03
Date:                Tue, 13 Nov 2018   Prob (F-statistic):          3.37e-211
Time:                        23:43:44   Log-Likelihood:                -5970.9
No. Observations:                1705   AIC:                         1.199e+04
Df Residuals:                    1680   BIC:                         1.213e+04
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          -16.6517      4.559     -3.653   

>## $Cnd$

In [15]:
namq_10_fcs.columns

Index(['TIME', 'GEO', 'GEO_LABEL', 'UNIT', 'UNIT_LABEL', 'S_ADJ',
       'S_ADJ_LABEL', 'NA_ITEM', 'NA_ITEM_LABEL', 'Value', 'ID'],
      dtype='object')

In [16]:
namq_10_fcs[["NA_ITEM", "NA_ITEM_LABEL"]].drop_duplicates()

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
168,P311_S14,"Final consumption expenditure of households, durable goods"
169,P312N_S14,"Final consumption expenditure of households, semi-durable goods, non-durable goods and services"


In [17]:
namq_10_fcs[["UNIT", "UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
168,CLV10_MEUR,"Chain linked volumes (2010), million euro"
350,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
892,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


In [18]:
df_in = pd.DataFrame(sorted(list(set(namq_10_fcs.ID))), columns=["ID"])

###########################################################################################

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P312N_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Chain linked volumes, percentage change on previous period
var = "CLV_PCH_PRE"
selec = namq_10_fcs_lag[(namq_10_fcs_lag.NA_ITEM=="P312N_S14")&(namq_10_fcs_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Consumer confidence indicator
var = "BS-CSMCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Retail confidence indicator
var = "BS-RCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

###########################################################################################

# Dummies
countries = set(namq_10_fcs.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

Cnd = df_in.copy().set_index("ID") 
del var, selec, df_in

###########################################################################################

# Save
Cnd.to_csv("./Cnd.csv")

###########################################################################################

# Regression
X = sm.add_constant(Cnd.loc[:, Cnd.columns != "CLV_PCH_SM"].copy())
y = Cnd.loc[:, "CLV_PCH_SM"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             CLV_PCH_SM   R-squared:                       0.627
Model:                            OLS   Adj. R-squared:                  0.622
Method:                 Least Squares   F-statistic:                     117.6
Date:                Tue, 13 Nov 2018   Prob (F-statistic):               0.00
Time:                        23:43:46   Log-Likelihood:                -3516.4
No. Observations:                1705   AIC:                             7083.
Df Residuals:                    1680   BIC:                             7219.
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -3.3862      1.079     -3.137   

<br>

# $I$

>## $Igfcf$

In [19]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,TIME_LABEL,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
324,1975Q1,1975Q1,FR,France,CP_MEUR,"Current prices, million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,67383.3,1975Q1FR
333,1975Q1,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR


In [20]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P51G"][["NA_ITEM","NA_ITEM_LABEL"]].head(1)

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
17463,P51G,Gross fixed capital formation


In [21]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P51G"][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
17463,CP_MEUR,"Current prices, million euro"
17472,CLV10_MEUR,"Chain linked volumes (2010), million euro"
18849,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
22962,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


In [22]:
df_in = pd.DataFrame(sorted(list(set(namq_10_gdp.ID))), columns=["ID"])

###########################################################################################

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P51G")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Chain linked volumes, percentage change on previous period
var = "CLV_PCH_PRE"
selec = namq_10_gdp_lag[(namq_10_gdp_lag.NA_ITEM=="P51G")&(namq_10_gdp_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Consumer confidence indicator
var = "BS-CSMCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Retail confidence indicator
var = "BS-RCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

###########################################################################################

# Dummies
countries = set(namq_10_gdp.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

Igfcf = df_in.copy().set_index("ID") 
del var, selec, df_in

###########################################################################################

# Save
Igfcf.to_csv("./Igfcf.csv")

###########################################################################################

# Regression
X = sm.add_constant(Igfcf.loc[:, Igfcf.columns != "CLV_PCH_SM"].copy())
y = Igfcf.loc[:, "CLV_PCH_SM"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             CLV_PCH_SM   R-squared:                       0.281
Model:                            OLS   Adj. R-squared:                  0.271
Method:                 Least Squares   F-statistic:                     27.17
Date:                Tue, 13 Nov 2018   Prob (F-statistic):          3.11e-118
Time:                        23:43:50   Log-Likelihood:                -7485.2
No. Observations:                1974   AIC:                         1.503e+04
Df Residuals:                    1945   BIC:                         1.519e+04
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          -43.8957      5.750     -7.635   

  return self.params / self.bse
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


<br>

# $G$

In [23]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,TIME_LABEL,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
324,1975Q1,1975Q1,FR,France,CP_MEUR,"Current prices, million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,67383.3,1975Q1FR
333,1975Q1,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR


In [24]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P3_S13"][["NA_ITEM","NA_ITEM_LABEL"]].head(1)

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
17461,P3_S13,Final consumption expenditure of general government


In [25]:
namq_10_gdp[namq_10_gdp.NA_ITEM == "P3_S13"][["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
17461,CP_MEUR,"Current prices, million euro"
17470,CLV10_MEUR,"Chain linked volumes (2010), million euro"
18847,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
22960,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


<br>

# $X$

In [21]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
270,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR
280,1975Q1,FR,France,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,-0.7,1975Q1FR


In [35]:
namq_10_gdp[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates()

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
324,B1GQ,Gross domestic product at market prices
17461,P3_S13,Final consumption expenditure of general government
17462,P5G,Gross capital formation
17463,P51G,Gross fixed capital formation
17465,P52,Changes in inventories
17466,P53,Acquisitions less disposals of valuables
17467,P6,Exports of goods and services
17468,P7,Imports of goods and services
27688,P52_P53,Changes in inventories and acquisitions less disposals of valuables


In [36]:
namq_10_gdp[["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
324,CP_MEUR,"Current prices, million euro"
333,CLV10_MEUR,"Chain linked volumes (2010), million euro"
342,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
351,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


In [22]:
df_in = pd.DataFrame(sorted(list(set(namq_10_gdp.ID))), columns=["ID"])

###########################################################################################

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P51G")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Chain linked volumes, percentage change on previous period
var = "CLV_PCH_PRE"
selec = namq_10_gdp_lag[(namq_10_gdp_lag.NA_ITEM=="P51G")&(namq_10_gdp_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Consumer confidence indicator
var = "BS-CSMCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Retail confidence indicator
var = "BS-RCI-BAL"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

###########################################################################################

# Dummies
countries = set(namq_10_gdp.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

Igfcf = df_in.copy().set_index("ID") 
del var, selec, df_in

###########################################################################################

# Save
Igfcf.to_csv("./Igfcf.csv")

###########################################################################################

# Regression
X = sm.add_constant(Igfcf.loc[:, Igfcf.columns != "CLV_PCH_SM"].copy())
y = Igfcf.loc[:, "CLV_PCH_SM"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:             CLV_PCH_SM   R-squared:                       0.281
Model:                            OLS   Adj. R-squared:                  0.271
Method:                 Least Squares   F-statistic:                     27.17
Date:                Tue, 13 Nov 2018   Prob (F-statistic):          3.11e-118
Time:                        23:43:50   Log-Likelihood:                -7485.2
No. Observations:                1974   AIC:                         1.503e+04
Df Residuals:                    1945   BIC:                         1.519e+04
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const          -43.8957      5.750     -7.635   

  return self.params / self.bse
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


<br>

# $M$

In [11]:
namq_10_gdp.head(2)

Unnamed: 0,TIME,TIME_LABEL,GEO,GEO_LABEL,UNIT,UNIT_LABEL,S_ADJ,S_ADJ_LABEL,NA_ITEM,NA_ITEM_LABEL,Value,ID
324,1975Q1,1975Q1,FR,France,CP_MEUR,"Current prices, million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,67383.3,1975Q1FR
333,1975Q1,1975Q1,FR,France,CLV10_MEUR,"Chain linked volumes (2010), million euro",SCA,Seasonally and calendar adjusted data,B1GQ,Gross domestic product at market prices,237727.0,1975Q1FR


In [12]:
namq_10_gdp[["NA_ITEM","NA_ITEM_LABEL"]].drop_duplicates()

Unnamed: 0,NA_ITEM,NA_ITEM_LABEL
324,B1GQ,Gross domestic product at market prices
17461,P3_S13,Final consumption expenditure of general government
17462,P5G,Gross capital formation
17463,P51G,Gross fixed capital formation
17465,P52,Changes in inventories
17466,P53,Acquisitions less disposals of valuables
17467,P6,Exports of goods and services
17468,P7,Imports of goods and services
27688,P52_P53,Changes in inventories and acquisitions less disposals of valuables


In [13]:
namq_10_gdp[["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
324,CP_MEUR,"Current prices, million euro"
333,CLV10_MEUR,"Chain linked volumes (2010), million euro"
342,CLV_PCH_PRE,"Chain linked volumes, percentage change on previous period"
351,CLV_PCH_SM,"Chain linked volumes, percentage change compared to same period in previous year"


In [14]:
ei_mfef_m.head(2)

Unnamed: 0,TIME,GEO,GEO_LABEL,UNIT,UNIT_LABEL,INDIC,INDIC_LABEL,Value,ID
176,1999Q1,EA19,Euro area (19 countries),I10,"Index, 2010=100",REER37CPI,Real Effective Exchange Rate (deflator consumer price indices - 37 trading partners),95.44,1999Q1EA19
177,1999Q1,EA19,Euro area (19 countries),I10,"Index, 2010=100",REER42CPI,Real Effective Exchange Rate (deflator consumer price indices - 42 trading partners),100.18,1999Q1EA19


In [15]:
ei_mfef_m[["INDIC","INDIC_LABEL"]].drop_duplicates()

Unnamed: 0,INDIC,INDIC_LABEL
176,REER37CPI,Real Effective Exchange Rate (deflator consumer price indices - 37 trading partners)
177,REER42CPI,Real Effective Exchange Rate (deflator consumer price indices - 42 trading partners)


In [16]:
ei_mfef_m[["UNIT","UNIT_LABEL"]].drop_duplicates()

Unnamed: 0,UNIT,UNIT_LABEL
176,I10,"Index, 2010=100"


In [22]:
df_in = pd.DataFrame(sorted(list(set(namq_10_gdp.ID))), columns=["ID"])

###########################################################################################

# Chain linked volumes, percentage change compared to same period in previous year
var = "CLV_PCH_SM"
selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P7")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", "M"]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Cd
selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P311_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", "Cd"]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Cnd
selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P312N_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", "Cnd"]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# G
selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P3_S13")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", "G"]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Igfcf
selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P51G")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", "Igfcf"]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Economic sentiment indicator
var = "BS-ESI-I"
selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

# Real Effective Exchange Rate (deflator consumer price indices - 42 trading partners)
var = "REER42CPI"
selec = ei_mfef_m[ei_mfef_m.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
df_in = pd.merge(df_in, selec, on='ID', how="inner")

###########################################################################################

# Dummies
countries = set(namq_10_gdp.GEO); countries.remove('PT')
for country in sorted(list(countries)):
    df_in[country] = (df_in.ID.str[-2:] == country).astype(int)

M = df_in.copy().set_index("ID") 
del var, selec, df_in

###########################################################################################

# Save
M.to_csv("./M.csv")

###########################################################################################

# Regression
X = sm.add_constant(M.loc[:, M.columns != "M"].copy())
y = M.loc[:, "M"].copy()
results = sm.OLS(y, X).fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      M   R-squared:                       0.556
Model:                            OLS   Adj. R-squared:                  0.549
Method:                 Least Squares   F-statistic:                     77.53
Date:                Wed, 14 Nov 2018   Prob (F-statistic):          2.23e-271
Time:                        00:33:14   Log-Likelihood:                -5379.3
No. Observations:                1701   AIC:                         1.081e+04
Df Residuals:                    1673   BIC:                         1.097e+04
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -23.5341      3.005     -7.833      0.0

<br>

<br>

# Prepare to $\textbf{R}$

In [103]:
# namq_10_gdp
selec = namq_10_gdp[(namq_10_gdp.GEO == "PT") & (namq_10_gdp.UNIT == "CLV10_MEUR")]
for item in list(set(selec.NA_ITEM)):
    selec[selec.NA_ITEM==item][["TIME", "Value"]].to_csv("R/"+item+".csv", index=False)
##

# namq_10_fcs