In [1]:
import pandas as pd


def p_values(data0, data1, cols_order):
    """Get p_values
    :param data0: DataFrame with original regression estimates
    :type data0: DataFrame
    :param data1: DataFrame with estimates from permutation
    :type data1: DataFrame
    :param cols_order: List of ordering of columns
    :type cols_order: List
    :return: DataFrame with p-values
    :rtype: DataFrame
    """
    columns = data0.columns
    pos = data0[(data0[columns] > 0)]
    pos = pos.loc[:, ~pos.isnull().any()]
    neg = data0[(data0[columns] < 0)]
    neg = neg.loc[:, ~neg.isnull().any()]
    val_pos = [
        data1[data1[var] > pos[var].values[0]].count()[var] / 1000
        for var in pos.columns
    ]
    coe_pos = pos.columns
    cols = ["coefficients", "p-value"]
    b_pos = pd.DataFrame(list(zip(coe_pos, val_pos)), columns=cols)
    val_neg = [
        data1[data1[var] < neg[var].values[0]].count()[var] / 1000
        for var in neg.columns
    ]
    coe_neg = neg.columns
    b_neg = pd.DataFrame(list(zip(coe_neg, val_neg)), columns=cols)
    b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
    b = b.set_index("index").stack().unstack(0).reset_index().rename_axis(None, axis=1)
    headers = b.iloc[0]
    b = pd.DataFrame(b.values[1:], columns=headers)
    b = b.iloc[:, 1:]
    b = b[cols_order]
    return b

In [4]:
# Verifiable
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates_extensive/{type1}extensive_b1b2p_all.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_b1b2p_extensive.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [2]:
# Verifiable
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['baseline']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates/{type1}b1b2p_extensive.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_b1b2p_extensive.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
df

Unnamed: 0,ver_rt,true_rt,fake_rt,n_posts_rt,ver_no_rt,true_no_rt,fake_no_rt,n_posts_no_rt
0,0.071,0.049,0.084,0.054,0.488,0.358,0.351,0.189


In [9]:
# Sentiment Analysis (BERT)
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates_extensive/{type1}extensive_b1b2p_sent_bert.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_extensive_b1b2p_sent_bert.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive_sent_bert.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
# Sentiment Analysis (BERT)
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['baseline']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates/{type1}extensive_b1b2p_sent_bert.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_extensive_b1b2p_sent_bert.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive_sent_bert.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [None]:
# Sentiment (VADER)
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates_extensive/{type1}extensive_b1b2p_sent.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_extensive_b1b2p_sent.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive_sent.xlsx", index=False)

In [4]:
# Sentiment (VADER)
country = 'joint'
p = f"../../data/04-analysis/{country}/"

for stage in ['baseline']:
    for type1 in ['log_', 'arc_', '']:
        perm = pd.DataFrame()
        
        df = pd.read_excel(f"{p}{stage}/pestimates/{type1}sent_extensive_b1b2p.xlsx")
        perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/{type1}pestimates_sent_extensive_b1b2p.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/{type1}agg_pvalues_b1b2p_extensive_sent.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
