In [1]:
import pandas as pd


def p_values(data0, data1, cols_order):
    """Get p_values
    :param data0: DataFrame with original regression estimates
    :type data0: DataFrame
    :param data1: DataFrame with estimates from permutation
    :type data1: DataFrame
    :param cols_order: List of ordering of columns
    :type cols_order: List
    :return: DataFrame with p-values
    :rtype: DataFrame
    """
    columns = data0.columns
    pos = data0[(data0[columns] > 0)]
    pos = pos.loc[:, ~pos.isnull().any()]
    neg = data0[(data0[columns] < 0)]
    neg = neg.loc[:, ~neg.isnull().any()]
    val_pos = [
        data1[data1[var] > pos[var].values[0]].count()[var] / 1000
        for var in pos.columns
    ]
    coe_pos = pos.columns
    cols = ["coefficients", "p-value"]
    b_pos = pd.DataFrame(list(zip(coe_pos, val_pos)), columns=cols)
    val_neg = [
        data1[data1[var] < neg[var].values[0]].count()[var] / 1000
        for var in neg.columns
    ]
    coe_neg = neg.columns
    b_neg = pd.DataFrame(list(zip(coe_neg, val_neg)), columns=cols)
    b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
    b = b.set_index("index").stack().unstack(0).reset_index().rename_axis(None, axis=1)
    headers = b.iloc[0]
    b = pd.DataFrame(b.values[1:], columns=headers)
    b = b.iloc[:, 1:]
    b = b[cols_order]
    return b

In [2]:
country = 'KE'
stage = 'baseline'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b2_april{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_prueba_april.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_prueba_april.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
country = 'KE'
stage = 'baseline'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b2_all{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_prueba.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_prueba.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [7]:
for country in ['joint']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_ver/b1_b2_p_fake_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_fake_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_p_fake_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [6]:
for country in ['joint']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_ver/country_b1b2p_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_fake.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_fake_b1_b2_p.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
for country in ['KE', 'SA']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_fake{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_p.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [4]:
df

Unnamed: 0,strong_ver_rt,strong_ver_rt_f,strong_true_rt,strong_true_rt_f,strong_fake_rt,strong_fake_rt_f,strong_n_posts_rt,strong_n_posts_rt_f,strong_ver_no_rt,strong_ver_no_rt_f,...,neither_n_posts_rt,neither_n_posts_rt_f,neither_ver_no_rt,neither_ver_no_rt_f,neither_true_no_rt,neither_true_no_rt_f,neither_fake_no_rt,neither_fake_no_rt_f,neither_n_posts_no_rt,neither_n_posts_no_rt_f
0,0.034,0.021,0.016,0.011,0.035,0.023,0.024,0.016,0.14,0.46,...,0.335,0.307,0.198,0.121,0.105,0.1,0.297,0.144,0.024,0.001


In [2]:
### B1 JOINT ALL FAKE
for country in ['KE', 'SA']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_p_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [7]:
### B1 JOINT ALL FAKE
for country in ['KE', 'SA']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_fake_all.xlsx", index=False)

FileNotFoundError: [Errno 2] No such file or directory: '../../data/04-analysis/KE/stage1_2/pestimates_b1_b2_fake_all.xlsx'

In [5]:
### B1 JOINT ALL FAKE
for country in ['joint']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_ver/b1_b2_fake_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_fake_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
for country in ['joint']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_ver/country_b1b2_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_fake_b1_b2.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
for country in ['KE', 'SA']:
    for stage in ['stage1_2']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_fake.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_fake_b1_b2.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [2]:
for country in ['KE', 'SA']:
    for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_fake_{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                       [col for col in perm.columns if 'weak_' in col])
        cols_neither = ([col for col in perm.columns if 'neither_' in col])
        perm_strong = perm[cols_strong]
        perm_neither = perm[cols_neither]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_fake_b1.xlsx")
        orig_strong = orig[cols_strong]
        orig_neither = orig[cols_neither]

        df_strong = p_values(orig_strong, perm_strong, cols_strong)
        df_neither = p_values(orig_neither, perm_neither, cols_neither)
        df = pd.concat([df_strong, df_neither], axis = 1)
        df.to_excel(f"{p}{stage}/agg_pvalues_fake_b1.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [6]:
### B1 ALL FAKE
for country in ['SA', 'KE']:
    for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(0, 4):
            df = pd.read_excel(f"{p}{stage}/pestimates_b1_fake_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_fake_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_fake_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [8]:
### B1 JOINT ALL FAKE
for country in ['joint']:
    for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
        p = f"../../data/04-analysis/{country}/"
        perm = pd.DataFrame()
        for i in range(1, 1001):
            df = pd.read_excel(f"{p}{stage}/pestimates_ver/b1_fake_all{i}.xlsx")
            perm = pd.concat([perm, df])
    
        cols = [col for col in perm.columns]
    
        perm = perm.reset_index(drop=True)
        orig = pd.read_excel(f"{p}{stage}/pestimates_b1_fake_all.xlsx")

        df = p_values(orig, perm, cols)

        df.to_excel(f"{p}{stage}/agg_pvalues_b1_fake_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [7]:
### JOINT B1 FAKE
country = 'joint'

for stage in ['stage1_2', 'stage3_4', 'stage5_6']:
    p = f"../../data/04-analysis/{country}/"
    perm = pd.DataFrame()
    for i in range(1, 1001):
        df = pd.read_excel(f"{p}{stage}/pestimates_ver/b1_fake_{i}.xlsx")
        perm = pd.concat([perm, df])
    
    cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
                   [col for col in perm.columns if 'weak_' in col])
    cols_neither = ([col for col in perm.columns if 'neither_' in col])
    perm_strong = perm[cols_strong]
    perm_neither = perm[cols_neither]
    
    perm = perm.reset_index(drop=True)
    orig = pd.read_excel(f"{p}{stage}/pestimates_b1_fake.xlsx")
    orig_strong = orig[cols_strong]
    orig_neither = orig[cols_neither]

    df_strong = p_values(orig_strong, perm_strong, cols_strong)
    df_neither = p_values(orig_neither, perm_neither, cols_neither)
    df = pd.concat([df_strong, df_neither], axis = 1)
    df.to_excel(f"{p}{stage}/agg_pvalues_b1_fake.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [3]:
country = 'joint'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(1, 1001):
    df = pd.read_excel(f"{p}{stage}/pestimates/country_b1b2p_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1b2p_country.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_sentiments_b1b2p.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
country = 'joint'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(1, 1001):
    df = pd.read_excel(f"{p}{stage}/pestimates/all_country_b1b2_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1b2_all_country.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_sentiment_b1b2_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [15]:
country = 'KE'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1_all_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1_all.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_sentiment_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [9]:
country = 'joint'
stage = 'stage5_6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(1, 1001):
    df = pd.read_excel(f"{p}{stage}/pestimates/all_country_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1_all_country.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_sentiment_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [6]:
country = 'joint'
stage = 'stage5_6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(1, 1001):
    df = pd.read_excel(f"{p}{stage}/pestimates/country_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1_country.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_sentiments_b1.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [7]:
country = 'KE'
stage = 'stage5_6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment_b1.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_sentiments_b1.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
country = 'KE'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_p.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [4]:
country = 'SA'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_all{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_p_all.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_p_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [22]:
country = 'KE'
stage = 'stage5_6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_b1.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [20]:
country = 'KE'
stage = 'stage5_6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_all{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1_all.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_b1_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
df

Unnamed: 0,strong_ver_rt,strong_ver_rt_f,strong_true_rt,strong_true_rt_f,strong_n_posts_rt,strong_n_posts_rt_f,strong_ver_no_rt,strong_ver_no_rt_f,strong_true_no_rt,strong_true_no_rt_f,...,neither_true_rt,neither_true_rt_f,neither_n_posts_rt,neither_n_posts_rt_f,neither_ver_no_rt,neither_ver_no_rt_f,neither_true_no_rt,neither_true_no_rt_f,neither_n_posts_no_rt,neither_n_posts_no_rt_f
0,0.558,0.499,0.516,0.533,0.315,0.37,0.281,0.463,0.436,0.389,...,0.023,0.005,0.217,0.127,0.626,0.218,0.197,0.605,0.393,0.611


In [9]:
country = 'SA'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [10]:
df

Unnamed: 0,strong_ver_rt,strong_ver_rt_f,strong_true_rt,strong_true_rt_f,strong_n_posts_rt,strong_n_posts_rt_f,strong_ver_no_rt,strong_ver_no_rt_f,strong_true_no_rt,strong_true_no_rt_f,...,neither_true_rt,neither_true_rt_f,neither_n_posts_rt,neither_n_posts_rt_f,neither_ver_no_rt,neither_ver_no_rt_f,neither_true_no_rt,neither_true_no_rt_f,neither_n_posts_no_rt,neither_n_posts_no_rt_f
0,0.998,1.0,1.0,1.0,0.997,1.0,0.021,0.998,0.005,0.001,...,0.005,0.009,0.717,0.714,0.203,0.306,0.159,0.313,0.124,0.117


In [11]:
country = 'KE'
stage = 'stage1_2'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_all{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols = [col for col in perm.columns]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_b1_b2_all.xlsx")

df = p_values(orig, perm, cols)

df.to_excel(f"{p}{stage}/agg_pvalues_b1_b2_all.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [9]:
country = 'KE'
stage = 'stage6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_sentiment{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_sentiment.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_sentiment.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [11]:
country = 'SA'
stage = 'stage1'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_batch2_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_batch2.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_batch2.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
country = 'KE'
stage = 'stage6'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_endline{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_endline.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_endline.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [8]:
country = 'SA'
stage = 'baseline'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_endline.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [12]:
country = 'KE'
stage = 'AC'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_endline.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [13]:
df

Unnamed: 0,strong_AC,weak_AC,neither_AC
0,0.25,0.348,0.041


In [4]:
country = 'SA'
stage = 'SMIs'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_endline.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
df

Unnamed: 0,strong_smi,weak_smi,neither_smi
0,0.2,0.435,0.263


In [4]:
country = 'KE'
stage = 'BalanceVer'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.to_excel(f"{p}{stage}/agg_pvalues_endline.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()


In [5]:
country = 'SA'
stage = 'stage5'
p = f"../../data/04-analysis/{country}/"
perm = pd.DataFrame()
for i in range(0, 4):
    df = pd.read_excel(f"{p}{stage}/pestimates_endline_filter_{i}.xlsx")
    perm = pd.concat([perm, df])
    
cols_strong = ([col for col in perm.columns if 'strong_' in col] + 
               [col for col in perm.columns if 'weak_' in col])
cols_neither = ([col for col in perm.columns if 'neither_' in col])
perm_strong = perm[cols_strong]
perm_neither = perm[cols_neither]
    
perm = perm.reset_index(drop=True)
orig = pd.read_excel(f"{p}{stage}/pestimates_endline_filter.xlsx")
orig_strong = orig[cols_strong]
orig_neither = orig[cols_neither]

df_strong = p_values(orig_strong, perm_strong, cols_strong)
df_neither = p_values(orig_neither, perm_neither, cols_neither)
df = pd.concat([df_strong, df_neither], axis = 1)
df.rename(columns = 
        {col: col + '_filter' for col in df.columns}, 
        inplace=True)

agg_pvals_normal = pd.read_excel(f"{p}{stage}/agg_pvalues_endline.xlsx")
df = pd.concat([df, agg_pvals_normal], axis = 1)

order2 = ['strong_ver_rt', 'strong_ver_rt_filter', 'strong_true_rt', 'strong_true_rt_filter',
          'strong_n_posts_rt', 'strong_n_posts_rt_filter', 'strong_ver_no_rt', 'strong_ver_no_rt_filter', 
          'strong_true_no_rt', 'strong_true_no_rt_filter', 'strong_n_posts_no_rt', 'strong_n_posts_no_rt_filter', 
          'weak_ver_rt', 'weak_ver_rt_filter', 'weak_true_rt', 'weak_true_rt_filter',
          'weak_n_posts_rt', 'weak_n_posts_rt_filter', 'weak_ver_no_rt', 'weak_ver_no_rt_filter', 
          'weak_true_no_rt', 'weak_true_no_rt_filter', 'weak_n_posts_no_rt', 'weak_n_posts_no_rt_filter',
          'neither_ver_rt', 'neither_ver_rt_filter', 'neither_true_rt', 'neither_true_rt_filter',
          'neither_n_posts_rt', 'neither_n_posts_rt_filter', 'neither_ver_no_rt', 'neither_ver_no_rt_filter', 
          'neither_true_no_rt', 'neither_true_no_rt_filter', 'neither_n_posts_no_rt', 
          'neither_n_posts_no_rt_filter']

df = df[order2]

df.to_excel(f"{p}{stage}/agg_pvalues_endline_filter.xlsx", index=False)

  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
  b = b_pos.append(b_neg).reset_index(drop=True).reset_index()
