In [1]:
import pandas as pd


def p_values(data0, data1, cols_order):
    """Get p_values
    :param data0: DataFrame with original regression estimates
    :type data0: DataFrame
    :param data1: DataFrame with estimates from permutation
    :type data1: DataFrame
    :param cols_order: List of ordering of columns
    :type cols_order: List
    :return: DataFrame with p-values
    :rtype: DataFrame
    """
    columns = data0.columns
    pos = data0[(data0[columns] > 0)]
    pos = pos.loc[:, ~pos.isnull().any()]
    neg = data0[(data0[columns] < 0)]
    neg = neg.loc[:, ~neg.isnull().any()]
    val_pos = [
        data1[data1[var] > pos[var].values[0]].count()[var] / 1000
        for var in pos.columns
    ]
    coe_pos = pos.columns
    cols = ["coefficients", "p-value"]
    b_pos = pd.DataFrame(list(zip(coe_pos, val_pos)), columns=cols)
    val_neg = [
        data1[data1[var] < neg[var].values[0]].count()[var] / 1000
        for var in neg.columns
    ]
    coe_neg = neg.columns
    b_neg = pd.DataFrame(list(zip(coe_neg, val_neg)), columns=cols)
    b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
    b = b.set_index("index").stack().unstack(0).reset_index().rename_axis(None, axis=1)
    headers = b.iloc[0]
    b = pd.DataFrame(b.values[1:], columns=headers)
    b = b.iloc[:, 1:]
    b = b[cols_order]
    return b

In [5]:
country = 'joint'
for base in ['', '_1_month', '_2_month']:
    for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_diff/b1_{i}{base}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_good{base}.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_b1_good_{base}.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_

In [4]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for base in ['', '_1_month', '_2_month']:
    for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
        for type1 in ['arc_', 'log_', 'share_']:
            
            perm = pd.DataFrame()
            for i in range(1, 1001):
                df = pd.read_excel(f"{p}{stage}/pestimates_diff/{type1}_b1_{i}{base}.xlsx")
                perm = pd.concat([perm, df])
    
            cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                           [col for col in perm.columns if 'weak_' in col])
            cols_neither = ([col for col in perm.columns if 'neither_' in col])
            perm_strong = perm[cols_strong]
            perm_neither = perm[cols_neither]
    
            perm = perm.reset_index(drop=True)
            orig = pd.read_excel(f"{p}{stage}/pestimates_b1_good_{base}_{type1}.xlsx")
            orig_strong = orig[cols_strong]
            orig_neither = orig[cols_neither]

            df_strong = p_values(orig_strong, perm_strong, cols_strong)
            df_neither = p_values(orig_neither, perm_neither, cols_neither)
            df = pd.concat([df_strong, df_neither], axis = 1)
            df.to_excel(f"{p}{stage}/agg_pvalues_b1_good_{base}_{type1}.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_

In [2]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"
for base in ['', '_1_month', '_2_month']:
    for stage in ['stage1_2','stage3_4','stage5_6']:
        for type1 in ['share_']:
            perm = pd.DataFrame()
            for i in range(1, 1001):
                df = pd.read_excel(f"{p}{stage}/pestimates_trans/{base}b1_all{type1}{i}.xlsx")
                perm = pd.concat([perm, df])
    
            cols = [col for col in perm.columns]
    
            perm = perm.reset_index(drop=True)
            orig = pd.read_excel(f"{p}{stage}/pestimates{base}b1_all{type1}.xlsx")

            df = p_values(orig, perm, cols)

            df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_all{base}{type1}.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [2]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['stage3_4']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_final/{type1}b1b2p_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_b1b2p_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['stage3_4']:
    for type1 in ['arc_', 'log_', '']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_final/{type1}all_country_b1b2p_sent_bert_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_all_country_b1b2p_sent_bert.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_all_country_b1b2p_sent_bert.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"
### 'log_', 'arc_', 
for stage in ['stage3_4']:
    for type1 in ['arc_', 'log_', '']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_final/{type1}all_country_b1b2p_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_sentiment_b1b2p_all_country.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_all_country_b1b2p_sent.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [12]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"


for stage in ['AC', 'SMIs']:
    for type1 in ['']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates/{type1}b1_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_{stage}_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [2]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"
### 'log_', 'arc_', 
for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    for type1 in ['', 'log_', 'arc_']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_final/{type1}all_country_b1_sent_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}all_country_b1_sent.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_all_country_b1_sent.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"
### 'log_', 'arc_', 
for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    for type1 in ['', 'log_', 'arc_']:
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_final/{type1}all_country_b1_sent_bert_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}all_country_b1_sent_bert.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_all_country_b1_sent_bert.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
country = 'joint'
p = f"../../data/04-analysis/{country}/"
stage = 'baseline'
### 'log_', 'arc_', 
for data_type in ['b1b2p_all', 'all_country_b1b2p_sent_bert', 'sent_all_country_b1b2p']:
    for type1 in ['', 'log_', 'arc_']:
        perm = pd.read_excel(f"{p}{stage}/pestimates/{type1}{data_type}.xlsx")
            
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_{data_type}.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_{data_type}.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [6]:
df

Unnamed: 0,pos_rt_c,pos_no_rt_c,neu_rt_c,neu_no_rt_c,neg_rt_c,neg_no_rt_c,n_posts_rt_c,n_posts_no_rt_c,pos_rt_v,pos_no_rt_v,neu_rt_v,neu_no_rt_v,neg_rt_v,neg_no_rt_v,n_posts_rt_v,n_posts_no_rt_v
0,0.702,0.637,0.693,0.593,0.664,0.605,0.731,0.625,0.645,0.18,0.357,0.234,0.548,0.566,0.694,0.319
