In [1]:
import os
import numpy as np
import itertools
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
pd.set_option('max_colwidth', 100)
pd.set_option('display.max_rows', 20)

<br>

# Functions

In [2]:
def M2Q(df_in):
    d = {"M03":"Q1", "M06":"Q2", "M09":"Q3", "M12":"Q4"}
    df_out = df_in[df_in['TIME'].apply(lambda x: x[-3:] in list(d.keys()))].copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: x[:4]+d[x[-3:]]).copy()
    return(df_out)

def Qp1(df_in):
    d = {"Q1":"Q2", "Q2":"Q3", "Q3":"Q4", "Q4":"Q1"}
    df_out = df_in.copy()
    df_out.TIME = df_out['TIME'].apply(lambda x: str(int(x[:-2])+1)+d[x[-2:]] if x[-1:] == "4" else x[:-2]+d[x[-2:]]).copy()
    return(df_out)

<br>

# Data

In [3]:
path = "./data/csvs/"
!sed -i "s/://g" ./data/*

In [4]:
#!head -n 1 data/sts_trtu_q_Label.csv

>## Quarterly

In [5]:
# DATASET Construction - quarterly data - index (2015 = 100) (NACE Rev. 2) [ei_isbu_q]
ei_isbu_q = pd.read_csv("./data/ei_isbu_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
ei_isbu_q_lag = Qp1(ei_isbu_q.copy())

# DATASET Final consumption aggregates [namq_10_fcs]
namq_10_fcs = pd.read_csv("./data/namq_10_fcs_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_fcs_lag = Qp1(namq_10_fcs.copy())

# DATASET: GDP and main components  (output, expenditure and income) [namq_10_gdp]
namq_10_gdp = pd.read_csv("./data/namq_10_gdp_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
namq_10_gdp_lag = Qp1(namq_10_gdp.copy())

# DATASET Building permits - quarterly data [sts_cobp_q]
sts_cobp_q = pd.read_csv("./data/sts_cobp_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_cobp_q_lag = Qp1(sts_cobp_q.copy())

# DATASET Production in construction - quarterly data [sts_copr_q]
sts_copr_q = pd.read_csv("./data/sts_copr_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_copr_q_lag = Qp1(sts_copr_q.copy())

# DATASET Production in industry - quarterly data [sts_inpr_q]
sts_inpr_q = pd.read_csv("./data/sts_inpr_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_inpr_q_lag = Qp1(sts_inpr_q.copy())

# DATASET Turnover in industry, total - quarterly data [sts_intv_q]
sts_intv_q = pd.read_csv("./data/sts_intv_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_intv_q_lag = Qp1(sts_intv_q.copy())

# DATASET Turnover and volume of sales in wholesale and retail trade - quarterly data [sts_trtu_q]
sts_trtu_q = pd.read_csv("./data/sts_trtu_q_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna()
sts_trtu_q_lag = Qp1(sts_trtu_q.copy())

>## Monthly

In [6]:
# DATASET Euro-zone Business Climate Indicator - monthly data [ei_bsci_m_r2]
ei_bsci_m_r2_1 = M2Q(pd.read_csv("./data/ei_bsci_m_r2_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

# DATASET Sentiment indicators - monthly data [ei_bssi_m_r2]
ei_bssi_m_r2 = M2Q(pd.read_csv("./data/ei_bssi_m_r2_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

# DATASET Effective exchange rates indices - monthly data [ei_mfef_m]
ei_mfef_m = M2Q(pd.read_csv("./data/ei_mfef_m_1_Data.csv", low_memory=False, usecols=lambda x: x not in ["Flag and Footnotes"]).dropna())

**Dealing with ids:**

In [7]:
for df in [ei_isbu_q, ei_isbu_q_lag, namq_10_fcs, namq_10_fcs_lag, namq_10_gdp, 
           namq_10_gdp_lag, sts_cobp_q, sts_cobp_q_lag, sts_copr_q, sts_copr_q_lag, 
           sts_inpr_q, sts_inpr_q_lag, sts_intv_q, sts_intv_q_lag, sts_trtu_q, 
           sts_trtu_q_lag, ei_isbu_q, ei_isbu_q_lag, namq_10_fcs, namq_10_fcs_lag, 
           namq_10_gdp, namq_10_gdp_lag, sts_cobp_q, sts_cobp_q_lag, sts_copr_q, sts_copr_q_lag, 
           sts_inpr_q, sts_inpr_q_lag, sts_intv_q, sts_intv_q_lag, sts_trtu_q, sts_trtu_q_lag, ei_bsci_m_r2_1, ei_bssi_m_r2, ei_mfef_m]:
    df["ID"] = df["TIME"]+df["GEO"]
    df.Value = df.Value.replace(regex=r'[,]+', value='').astype(np.float)
    


># OECD

In [8]:
def OECD():
    GEO_LABEL2GEO = dict(zip(namq_10_gdp.GEO_LABEL.values, namq_10_gdp.GEO.values)); GEO_LABEL2GEO["Germany"] = "DE"
    df_in = pd.read_csv("data/KEI_13112018143436794_[v1-oecd].csv")
    df_in["GEO"] = df_in.Country.map(GEO_LABEL2GEO)
    df_in = df_in[df_in.GEO.notnull()]
    df_in["ID"] = df_in.TIME.str[:4]+df_in.TIME.str[-2:]+df_in.GEO
    return(df_in)

In [9]:
OECD = OECD()

<br>

# $C$

>## $Cd$

In [31]:
def Cd_Cheat(n):
    #Cnd_vars = ["BS-BCI", "BS-RCI-BAL", "BS-SCI-BAL", "BS-CSMCI-BAL", 
    #            "MIG_DCOG", "C29_C30", "LRHUTTTT", "SLRTCR03"] 
    Cnd_vars = ["BS-RCI-BAL"," BS-SCI-BAL", "C29_C30", "SLRTCR03", "BS-CSMCI-BAL"]
    fake_ns = list(itertools.combinations(Cnd_vars, n))
    fake_df = pd.DataFrame(index=["variables", "R2", "R2Adj"])
    counter = 0
    
    for fake_n in fake_ns:


        df_in = pd.DataFrame(sorted(list(set(namq_10_fcs.ID))), columns=["ID"])
        
        ###########################################################################################
        
        # Chain linked volumes, percentage change compared to same period in previous year
        var = "CLV_PCH_SM"
        selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P311_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Chain linked volumes, percentage change on previous period
        var = "CLV_PCH_PRE"
        selec = namq_10_fcs_lag[(namq_10_fcs_lag.NA_ITEM=="P311_S14")&(namq_10_fcs_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")

        # Business Climate Indicator
        var = "BS-BCI"
        if var in fake_n:
            dic_BCI = dict(zip(ei_bsci_m_r2_1.TIME.values, ei_bsci_m_r2_1.Value.values))
            df_in[var] = df_in.ID.str[:-2].map(dic_BCI)
        
        # Retail Confidence Indicator
        var = "BS-RCI-BAL"
        if var in fake_n:
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Services Confidence Indicator
        var = "BS-SCI-BAL"
        if var in fake_n:
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Consumer Confidence Indicator
        var = "BS-CSMCI-BAL"
        if var in fake_n:
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: MIG - Durable Consumer Goods
        var = "MIG_DCOG"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Manufacture of motor vehicles, trailers, semi-trailers and of other transport equipment
        var = "C29_C30"
        if var in fake_n:
            selec = sts_intv_q[sts_intv_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Harmonised unemployment rate all persons, s.a.
        var = "LRHUTTTT"
        if var in fake_n:
            selec = OECD[OECD.SUBJECT==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")

        # Passenger car registrations, s.a.
        var = "SLRTCR03"
        if var in fake_n:
            selec = OECD[OECD.SUBJECT==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        ###########################################################################################
        
        # Dummies
        countries = set(namq_10_fcs.GEO); countries.remove('PT')
        for country in sorted(list(countries)):
            df_in[country] = (df_in.ID.str[-2:] == country).astype(int)
        
        Cd = df_in.dropna().copy(); Cd.set_index("ID", inplace=True)
        Cd = Cd.loc[:, (Cd != 0).any(axis=0)].copy()
        del var, selec, df_in
        
        ###########################################################################################
        
        # Save
        Cd.to_csv("./Cd.csv")
        
        ###########################################################################################
        
        # Regression
        X = sm.add_constant(Cd.loc[:, Cd.columns != "CLV_PCH_SM"].copy())
        y = Cd.loc[:, "CLV_PCH_SM"].copy()
        results = sm.OLS(y, X).fit()
        
        fake_df[counter] = [fake_n, results.rsquared, results.rsquared_adj]
        counter += 1
        if counter % 100 == 0:
            print(counter/len(fake_ns)*100)
        
    return(fake_df.transpose())

In [32]:
Cd_Cheat(1)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL,)",0.383209,0.375543
1,"( BS-SCI-BAL,)",0.300414,0.293007
2,"(C29_C30,)",0.416586,0.407958
3,"(SLRTCR03,)",0.385289,0.377875
4,"(BS-CSMCI-BAL,)",0.418163,0.410994


In [33]:
Cd_Cheat(2)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL)",0.383209,0.375543
1,"(BS-RCI-BAL, C29_C30)",0.537304,0.529921
2,"(BS-RCI-BAL, SLRTCR03)",0.48666,0.479511
3,"(BS-RCI-BAL, BS-CSMCI-BAL)",0.461436,0.454067
4,"( BS-SCI-BAL, C29_C30)",0.416586,0.407958
5,"( BS-SCI-BAL, SLRTCR03)",0.385289,0.377875
6,"( BS-SCI-BAL, BS-CSMCI-BAL)",0.418163,0.410994
7,"(C29_C30, SLRTCR03)",0.469875,0.461141
8,"(C29_C30, BS-CSMCI-BAL)",0.51259,0.504842
9,"(SLRTCR03, BS-CSMCI-BAL)",0.472796,0.465809


In [34]:
Cd_Cheat(3)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL, C29_C30)",0.537304,0.529921
1,"(BS-RCI-BAL, BS-SCI-BAL, SLRTCR03)",0.48666,0.479511
2,"(BS-RCI-BAL, BS-SCI-BAL, BS-CSMCI-BAL)",0.461436,0.454067
3,"(BS-RCI-BAL, C29_C30, SLRTCR03)",0.58968,0.582286
4,"(BS-RCI-BAL, C29_C30, BS-CSMCI-BAL)",0.554566,0.546928
5,"(BS-RCI-BAL, SLRTCR03, BS-CSMCI-BAL)",0.500842,0.493537
6,"( BS-SCI-BAL, C29_C30, SLRTCR03)",0.469875,0.461141
7,"( BS-SCI-BAL, C29_C30, BS-CSMCI-BAL)",0.51259,0.504842
8,"( BS-SCI-BAL, SLRTCR03, BS-CSMCI-BAL)",0.472796,0.465809
9,"(C29_C30, SLRTCR03, BS-CSMCI-BAL)",0.553004,0.545246


In [35]:
Cd_Cheat(4)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL, C29_C30, SLRTCR03)",0.58968,0.582286
1,"(BS-RCI-BAL, BS-SCI-BAL, C29_C30, BS-CSMCI-BAL)",0.554566,0.546928
2,"(BS-RCI-BAL, BS-SCI-BAL, SLRTCR03, BS-CSMCI-BAL)",0.500842,0.493537
3,"(BS-RCI-BAL, C29_C30, SLRTCR03, BS-CSMCI-BAL)",0.599914,0.592337
4,"( BS-SCI-BAL, C29_C30, SLRTCR03, BS-CSMCI-BAL)",0.553004,0.545246


In [12]:
Cd_Cheat_results = Cd_Cheat(4)

In [13]:
Cd_Cheat_results.sort_values(["R2Adj", "R2"], ascending=[False, False])

Unnamed: 0,variables,R2,R2Adj
43,"(BS-RCI-BAL, BS-SCI-BAL, C29_C30, SLRTCR03)",0.604487,0.596909
49,"(BS-RCI-BAL, BS-CSMCI-BAL, C29_C30, SLRTCR03)",0.599914,0.592337
13,"(BS-BCI, BS-RCI-BAL, C29_C30, SLRTCR03)",0.599118,0.591251
54,"(BS-RCI-BAL, C29_C30, LRHUTTTT, SLRTCR03)",0.596705,0.589068
52,"(BS-RCI-BAL, MIG_DCOG, C29_C30, SLRTCR03)",0.590744,0.582994
42,"(BS-RCI-BAL, BS-SCI-BAL, C29_C30, LRHUTTTT)",0.582895,0.574997
59,"(BS-SCI-BAL, BS-CSMCI-BAL, C29_C30, SLRTCR03)",0.576659,0.568563
48,"(BS-RCI-BAL, BS-CSMCI-BAL, C29_C30, LRHUTTTT)",0.574757,0.566958
12,"(BS-BCI, BS-RCI-BAL, C29_C30, LRHUTTTT)",0.57072,0.562707
62,"(BS-SCI-BAL, MIG_DCOG, C29_C30, SLRTCR03)",0.565959,0.557657


>## $Cnd$

In [26]:
def Cnd_Cheat(n):
    #Cnd_vars = ["BS-BCI", "BS-RCI-BAL", "BS-SCI-BAL", "BS-CSMCI-BAL", "MIG_NDCOG", "G47_FOOD", "G47_NFOOD_X_G473", "G47_X_G473", "LRHUTTTT"]
    Cnd_vars = ["BS-RCI-BAL", "BS-SCI-BAL", "G47_NFOOD_X_G473", "LRHUTTTT", "G47_FOOD"]
    fake_ns = list(itertools.combinations(Cnd_vars, n))
    fake_df = pd.DataFrame(index=["variables", "R2", "R2Adj"])
    counter = 0
    
    for fake_n in fake_ns:
        df_in = pd.DataFrame(sorted(list(set(namq_10_fcs.ID))), columns=["ID"])
        
        ###########################################################################################
    
        # Chain linked volumes, percentage change compared to same period in previous year
        var = "CLV_PCH_SM"
        selec = namq_10_fcs[(namq_10_fcs.NA_ITEM=="P312N_S14")&(namq_10_fcs.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Chain linked volumes, percentage change on previous period
        var = "CLV_PCH_PRE"
        selec = namq_10_fcs_lag[(namq_10_fcs_lag.NA_ITEM=="P312N_S14")&(namq_10_fcs_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Business Climate Indicator
        var = "BS-BCI"
        if var in fake_n:    
            dic_BCI = dict(zip(ei_bsci_m_r2_1.TIME.values, ei_bsci_m_r2_1.Value.values))
            df_in[var] = df_in.ID.str[:-2].map(dic_BCI)
        
        # Retail Confidence Indicator
        var = "BS-RCI-BAL"
        if var in fake_n:    
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Services Confidence Indicator
        var = "BS-SCI-BAL"
        if var in fake_n:    
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Consumer Confidence Indicator
        var = "BS-CSMCI-BAL"
        if var in fake_n:    
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: MIG - Non-Durable Consumer Goods
        var = "MIG_NDCOG"
        if var in fake_n:    
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Retail sale of food, beverages and tobacco
        var = "G47_FOOD"
        if var in fake_n:    
            selec = sts_trtu_q[sts_trtu_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Retail sale of non-food products (except fuel)
        var = "G47_NFOOD_X_G473"
        if var in fake_n:    
            selec = sts_trtu_q[sts_trtu_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Retail trade, except of motor vehicles, motorcyles and fuel
        var = "G47_X_G473"
        if var in fake_n:    
            selec = sts_trtu_q[sts_trtu_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Harmonised unemployment rate all persons, s.a.
        var = "LRHUTTTT"
        if var in fake_n:    
            selec = OECD[OECD.SUBJECT==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        ###########################################################################################
        
        # Dummies
        countries = set(namq_10_fcs.GEO); countries.remove('PT')
        for country in sorted(list(countries)):
            df_in[country] = (df_in.ID.str[-2:] == country).astype(int)
        
        Cnd = df_in.dropna().copy().set_index("ID")
        Cnd = Cnd.loc[:, (Cnd != 0).any(axis=0)].copy()
        del var, selec, df_in
        
        ###########################################################################################
        
        # Save
        #Cnd.to_csv("./Cnd.csv")
        
        ###########################################################################################
        
        # Regression
        X = sm.add_constant(Cnd.loc[:, Cnd.columns != "CLV_PCH_SM"].copy())
        y = Cnd.loc[:, "CLV_PCH_SM"].copy()
        results = sm.OLS(y, X).fit()
        
        fake_df[counter] = [fake_n, results.rsquared, results.rsquared_adj]
        counter += 1
        if counter % 100 == 0:
            print(counter/len(fake_ns)*100)
        
    return(fake_df.transpose())

In [27]:
Cnd_Cheat(1)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL,)",0.466084,0.459448
1,"(BS-SCI-BAL,)",0.646518,0.641615
2,"(G47_NFOOD_X_G473,)",0.457069,0.453402
3,"(LRHUTTTT,)",0.493442,0.487611
4,"(G47_FOOD,)",0.457058,0.45339


In [28]:
Cnd_Cheat(2)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL)",0.674448,0.669667
1,"(BS-RCI-BAL, G47_NFOOD_X_G473)",0.61715,0.614274
2,"(BS-RCI-BAL, LRHUTTTT)",0.629911,0.625025
3,"(BS-RCI-BAL, G47_FOOD)",0.617106,0.61423
4,"(BS-SCI-BAL, G47_NFOOD_X_G473)",0.650834,0.648094
5,"(BS-SCI-BAL, LRHUTTTT)",0.662248,0.657423
6,"(BS-SCI-BAL, G47_FOOD)",0.65083,0.648089
7,"(G47_NFOOD_X_G473, LRHUTTTT)",0.527784,0.524458
8,"(G47_NFOOD_X_G473, G47_FOOD)",0.462399,0.460507
9,"(LRHUTTTT, G47_FOOD)",0.527628,0.524301


In [29]:
Cnd_Cheat(3)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL, G47_NFOOD_X_G473)",0.682445,0.679808
1,"(BS-RCI-BAL, BS-SCI-BAL, LRHUTTTT)",0.683151,0.678385
2,"(BS-RCI-BAL, BS-SCI-BAL, G47_FOOD)",0.682438,0.679802
3,"(BS-RCI-BAL, G47_NFOOD_X_G473, LRHUTTTT)",0.660292,0.657659
4,"(BS-RCI-BAL, G47_NFOOD_X_G473, G47_FOOD)",0.618818,0.617322
5,"(BS-RCI-BAL, LRHUTTTT, G47_FOOD)",0.660067,0.657431
6,"(BS-SCI-BAL, G47_NFOOD_X_G473, LRHUTTTT)",0.668002,0.6653
7,"(BS-SCI-BAL, G47_NFOOD_X_G473, G47_FOOD)",0.651535,0.650107
8,"(BS-SCI-BAL, LRHUTTTT, G47_FOOD)",0.667901,0.665197
9,"(G47_NFOOD_X_G473, LRHUTTTT, G47_FOOD)",0.533299,0.531581


In [30]:
Cnd_Cheat(4)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-RCI-BAL, BS-SCI-BAL, G47_NFOOD_X_G473, LRHUTTTT)",0.692225,0.689594
1,"(BS-RCI-BAL, BS-SCI-BAL, G47_NFOOD_X_G473, G47_FOOD)",0.682921,0.68155
2,"(BS-RCI-BAL, BS-SCI-BAL, LRHUTTTT, G47_FOOD)",0.69207,0.689437
3,"(BS-RCI-BAL, G47_NFOOD_X_G473, LRHUTTTT, G47_FOOD)",0.661352,0.659975
4,"(BS-SCI-BAL, G47_NFOOD_X_G473, LRHUTTTT, G47_FOOD)",0.668672,0.667258


In [15]:
Cnd_Cheat_results = Cnd_Cheat(4)

79.36507936507937


In [16]:
Cnd_Cheat_results.sort_values(["R2Adj", "R2"], ascending=[False, False])

Unnamed: 0,variables,R2,R2Adj
69,"(BS-RCI-BAL, BS-SCI-BAL, G47_NFOOD_X_G473, LRHUTTTT)",0.692225,0.689594
70,"(BS-RCI-BAL, BS-SCI-BAL, G47_X_G473, LRHUTTTT)",0.69214,0.689507
67,"(BS-RCI-BAL, BS-SCI-BAL, G47_FOOD, LRHUTTTT)",0.69207,0.689437
58,"(BS-RCI-BAL, BS-SCI-BAL, BS-CSMCI-BAL, G47_NFOOD_X_G473)",0.689334,0.686647
59,"(BS-RCI-BAL, BS-SCI-BAL, BS-CSMCI-BAL, G47_X_G473)",0.689316,0.686629
57,"(BS-RCI-BAL, BS-SCI-BAL, BS-CSMCI-BAL, G47_FOOD)",0.689299,0.686611
5,"(BS-BCI, BS-RCI-BAL, BS-SCI-BAL, LRHUTTTT)",0.691804,0.686457
3,"(BS-BCI, BS-RCI-BAL, BS-SCI-BAL, G47_NFOOD_X_G473)",0.687787,0.685
4,"(BS-BCI, BS-RCI-BAL, BS-SCI-BAL, G47_X_G473)",0.687785,0.684998
2,"(BS-BCI, BS-RCI-BAL, BS-SCI-BAL, G47_FOOD)",0.687781,0.684994


<br>

# $I$

>## $Igfcf$

In [20]:
def Igfcf_Cheat(n):
    Cnd_vars = ["BS-BCI", "BS-CCI-BAL", "B_C_X_MIG_NRG", "C29_C30", "BS-CCI-BAL"]
    #Cnd_vars = ["BS-BCI", "BS-ICI-BAL", "F_CC11_X_CC113", "BS-CCI-BAL", 
    #            "PROD", "MIG_CAG", "B_C", "B_C_X_MIG_NRG", 
    #            "C29_C30", "C"]    
    # IS-PEI, F_CC112, MIG_NRG_X_E, D
    fake_ns = list(itertools.combinations(Cnd_vars, n))
    fake_df = pd.DataFrame(index=["variables", "R2", "R2Adj"])
    counter = 0
    
    for fake_n in fake_ns:
        
        df_in = pd.DataFrame(sorted(list(set(namq_10_gdp.ID))), columns=["ID"])
        
        ###########################################################################################
        
        # Chain linked volumes, percentage change compared to same period in previous year
        var = "CLV_PCH_SM"
        selec = namq_10_gdp[(namq_10_gdp.NA_ITEM=="P51G")&(namq_10_gdp.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Chain linked volumes, percentage change on previous period
        var = "CLV_PCH_PRE"
        selec = namq_10_gdp_lag[(namq_10_gdp_lag.NA_ITEM=="P51G")&(namq_10_gdp_lag.UNIT==var)][["ID", "Value"]]; selec.columns = ["ID", var]
        df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Business Climate Indicator
        var = "BS-BCI"
        if var in fake_n:
            dic_BCI = dict(zip(ei_bsci_m_r2_1.TIME.values, ei_bsci_m_r2_1.Value.values))
            df_in[var] = df_in.ID.str[:-2].map(dic_BCI)
        
        # Industrial Confidence Indicator
        var = "BS-ICI-BAL"
        if var in fake_n:
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Building permits index - New residential buildings
        var = "IS-PEI"
        if var in fake_n:
            selec = ei_isbu_q[ei_isbu_q.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")

        # Residential buildings, except residences for communities
        var = "F_CC11_X_CC113"
        if var in fake_n:
            selec = sts_cobp_q[sts_cobp_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Two- and more dwelling buildings
        var = "F_CC112"
        if var in fake_n:
            selec = sts_cobp_q[sts_cobp_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Construction confidence indicator
        var = "BS-CCI-BAL"
        if var in fake_n:
            selec = ei_bssi_m_r2[ei_bssi_m_r2.INDIC==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Volume index of production
        var = "PROD"
        if var in fake_n:
            selec = sts_copr_q[sts_copr_q.INDIC_BT==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: MIG - energy (except Section E)
        var = "MIG_NRG_X_E"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: MIG - capital goods
        var = "MIG_CAG"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: Mining and quarrying; manufacturing
        var = "B_C"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: Mining and quarrying; manufacturing (except MIG energy)
        var = "B_C_X_MIG_NRG"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")

        ## Production in industry: Manufacturing
        #var = "C"
        #selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", "C_prod"]
        #df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Production in industry: Electricity, gas, steam and air conditioning supply
        var = "D"
        if var in fake_n:
            selec = sts_inpr_q[sts_inpr_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Manufacture of motor vehicles, trailers, semi-trailers and of other transport equipment
        var = "C29_C30"
        if var in fake_n:
            selec = sts_intv_q[sts_intv_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        # Manufacturing
        var = "C"
        if var in fake_n:
            selec = sts_intv_q[sts_intv_q.NACE_R2==var][["ID", "Value"]]; selec.columns = ["ID", var]
            df_in = pd.merge(df_in, selec, on='ID', how="inner")
        
        ###########################################################################################
        
        # Dummies
        countries = set(namq_10_gdp.GEO); countries.remove('PT')
        for country in sorted(list(countries)):
            df_in[country] = (df_in.ID.str[-2:] == country).astype(int)
        
        Igfcf = df_in.dropna().copy().set_index("ID")
        Igfcf = Igfcf.loc[:, (Igfcf != 0).any(axis=0)].copy()
        del var, selec, df_in
        
        ###########################################################################################
        
        # Save
        Igfcf.to_csv("./Igfcf.csv")
        
        ###########################################################################################
        
        # Regression
        X = sm.add_constant(Igfcf.loc[:, Igfcf.columns != "CLV_PCH_SM"].copy())
        y = Igfcf.loc[:, "CLV_PCH_SM"].copy()
        results = sm.OLS(y, X).fit()
        
        fake_df[counter] = [fake_n, results.rsquared, results.rsquared_adj]
        counter += 1
        if counter % 100 == 0:
            print(counter/len(fake_ns)*100)
        
    return(fake_df.transpose())

In [22]:
Igfcf_Cheat(1)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-BCI,)",0.151485,0.140013
1,"(BS-CCI-BAL,)",0.188767,0.179036
2,"(B_C_X_MIG_NRG,)",0.0642902,0.0513907
3,"(C29_C30,)",0.152018,0.13925
4,"(BS-CCI-BAL,)",0.188767,0.179036


In [23]:
Igfcf_Cheat(2)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-BCI, BS-CCI-BAL)",0.257981,0.247063
1,"(BS-BCI, B_C_X_MIG_NRG)",0.194937,0.182566
2,"(BS-BCI, C29_C30)",0.29738,0.285982
3,"(BS-BCI, BS-CCI-BAL)",0.257981,0.247063
4,"(BS-CCI-BAL, B_C_X_MIG_NRG)",0.23987,0.229011
5,"(BS-CCI-BAL, C29_C30)",0.374198,0.364691
6,"(BS-CCI-BAL, BS-CCI-BAL)",0.188767,0.179036
7,"(B_C_X_MIG_NRG, C29_C30)",0.290454,0.279574
8,"(B_C_X_MIG_NRG, BS-CCI-BAL)",0.23987,0.229011
9,"(C29_C30, BS-CCI-BAL)",0.374198,0.364691


In [24]:
Igfcf_Cheat(3)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-BCI, BS-CCI-BAL, B_C_X_MIG_NRG)",0.296614,0.28541
1,"(BS-BCI, BS-CCI-BAL, C29_C30)",0.442971,0.433821
2,"(BS-BCI, BS-CCI-BAL, BS-CCI-BAL)",0.257981,0.247063
3,"(BS-BCI, B_C_X_MIG_NRG, C29_C30)",0.427307,0.417808
4,"(BS-BCI, B_C_X_MIG_NRG, BS-CCI-BAL)",0.296614,0.28541
5,"(BS-BCI, C29_C30, BS-CCI-BAL)",0.442971,0.433821
6,"(BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30)",0.475686,0.467427
7,"(BS-CCI-BAL, B_C_X_MIG_NRG, BS-CCI-BAL)",0.23987,0.229011
8,"(BS-CCI-BAL, C29_C30, BS-CCI-BAL)",0.374198,0.364691
9,"(B_C_X_MIG_NRG, C29_C30, BS-CCI-BAL)",0.475686,0.467427


In [25]:
Igfcf_Cheat(4)

Unnamed: 0,variables,R2,R2Adj
0,"(BS-BCI, BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30)",0.53311,0.525146
1,"(BS-BCI, BS-CCI-BAL, B_C_X_MIG_NRG, BS-CCI-BAL)",0.296614,0.28541
2,"(BS-BCI, BS-CCI-BAL, C29_C30, BS-CCI-BAL)",0.442971,0.433821
3,"(BS-BCI, B_C_X_MIG_NRG, C29_C30, BS-CCI-BAL)",0.53311,0.525146
4,"(BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30, BS-CCI-BAL)",0.475686,0.467427


In [18]:
Igfcf_Cheat_results = Igfcf_Cheat(4)

47.61904761904761
95.23809523809523


In [19]:
Igfcf_Cheat_results.sort_values(["R2Adj", "R2"], ascending=[False, False])

Unnamed: 0,variables,R2,R2Adj
61,"(BS-BCI, BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30)",0.53311,0.525146
59,"(BS-BCI, BS-CCI-BAL, B_C, C29_C30)",0.532746,0.524776
117,"(BS-ICI-BAL, BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30)",0.515449,0.507478
115,"(BS-ICI-BAL, BS-CCI-BAL, B_C, C29_C30)",0.51429,0.506301
182,"(BS-CCI-BAL, PROD, B_C_X_MIG_NRG, C29_C30)",0.479845,0.471289
180,"(BS-CCI-BAL, PROD, B_C, C29_C30)",0.479777,0.47122
152,"(F_CC11_X_CC113, BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30)",0.477558,0.468875
150,"(F_CC11_X_CC113, BS-CCI-BAL, B_C, C29_C30)",0.477539,0.468855
193,"(BS-CCI-BAL, B_C, C29_C30, C)",0.476998,0.468395
194,"(BS-CCI-BAL, B_C_X_MIG_NRG, C29_C30, C)",0.476984,0.468381


<br>