In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, sys
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import spearmanr
import matplotlib.dates as mdates
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime, timedelta
import seaborn as sns
import copy

sys.path.append('../utils/')

from utils import *
from analysis_utils import *

In [7]:
"""
Necessary data (both weekly and monthly):
- Shocks
- Confounds
- Trends outcomes
- KSU
"""

prefix = '../data/prepared/merged/'
units = 'weeks'

if units == 'weeks':
    bonf_denom = 7
elif units == 'months':
    bonf_denom = 6
else:
    assert 0

do_controls = False
do_primary = True

# Run primary analyses
pval_thresh_1 = 0.05/bonf_denom

month_tests = 6*15*3 #intvns * outcomes * [assoc, contemp, lagged]
week_tests = 7*12*3 # intvns * outcomes * [assoc, contemp lagged]

all_tests = month_tests + week_tests

pval_thresh_2 = 0.05/all_tests#(month_tests + week_tests)

# Settings for non-bin models
do_differencing = True
include_time = False

# Settings for both
add_L = True
normalize = True
include_month = True

# Settings for bin models
do_differencing_bin = False
include_time_bin = True
duration_months = 5
bin_slope = True

models_to_run = ['assoc', 'contemp', 'lagged']

# KSU params
ksu_lag = None

verbose=True
models = ['assoc', 'contemp', 'lagged', 'bin']


netflix_release_dates = {'tgc': [2019, 10, 16], 'fok': [2011, 5, 6], 'okja': [2017, 6, 28],
                'wth': [2017, 6, 16], 'cowspiracy': [2015, 9, 15], 'owth': [2017, 6, 16], 
                         'yawye': [2024, 1, 1]}

restrictions = {'StewartMilk': ['cowspiracy', 'wth', 'okja', 'owth', 'all_docs'], 
                'StewartPBMilk': ['cowspiracy', 'wth', 'okja', 'owth', 'all_docs'],
                'Zhao': ['wth', 'okja', 'owth', 'tgc', 'all_docs']}

binary_analysis_dct = {'Zhao': ['owth', 'wth', 'okja', 'tgc'], 'NeuhoferLusk': ['tgc'],
                      'StewartMilk': ['cowspiracy', 'owth', 'wth', 'okja'], 
                       'StewartPBMilk': ['cowspiracy', 'owth', 'wth', 'okja']}

normalized = pd.read_csv(prefix + 'merged_' + units + '.csv')

if do_controls:
    normalized_controls = pd.read_csv(prefix + 'merged_controls_' + units + '.csv')

assert units in ['weeks', 'months', 'days']
assert len(set(units) & set(prefix)) !=0 

#Lists
common = ['ds', 'Time', 'Month']

all_docs = ['tgc', 'wth', 'fok', 'cowspiracy', 'okja', 'yawye']
all_ts_albums = ['reputation', 'ts_1989', 'lover', 'speak_now', 'red']
all_climate = ['climate', 'climate_change', 'sustainability']
all_ts_outcomes = ['taylor_swift', 'taylor_swift_lyrics', 'taylor_swift_songs',
                  'taylor_swift_tour', 'taylor_swift_album']

all_drake_albums = ['scorpion', 'take_care', 'views', 'nwts', 'tml']
all_drake_outcomes = ['drake', 'drake_lyrics', 'drake_songs', 'drake_tour', 'drake_album']


if units == 'months':
    all_primary_outcomes = ['plant_based_plus_plant_based', 'vegan', 'vegetarian', 
                           'ksu_beef', 'ksu_pork', 'ksu_chicken']
    
    all_primary_outcomes += ['StewartMilk',  
                            'StewartPBMilk', 'Zhao'] 

    all_secondary_outcomes = ['vegan_informative', 'vegetarian_informative',
           'plant_based_informative', 'vegan_behavior', 'vegetarian_behavior',
           'plant_based_behavior']
    
    all_outcomes = all_primary_outcomes + all_secondary_outcomes 
    
    intvns = ['fok', 'cowspiracy', 'owth', 'tgc', 'yawye', 'all_docs']
    
elif units == 'weeks':
    all_primary_outcomes = ['plant_based_plus_plant_based','vegan', 'vegetarian',
                            'StewartMilk', 
                            'StewartPBMilk',
                            'Zhao']   

    all_secondary_outcomes = ['vegan_informative', 'vegetarian_informative',
           'plant_based_informative', 'vegan_behavior', 'vegetarian_behavior',
           'plant_based_behavior']
    
    all_outcomes = all_primary_outcomes + all_secondary_outcomes
    intvns = ['fok', 'cowspiracy', 'okja', 'wth', 'tgc', 'yawye', 'all_docs'] #'okja', 'wth'        
    
else:
    assert 0
        
# Outcomes, interventions, and models to run
test_outcomes = all_outcomes
test_intvns =  intvns

for model in models_to_run:
    assert model in models

#run_name = 'bin_{num}months_slope{val}'.format(num=duration_months, val=bin_slope)

run_name = 'test'#'ksu_lag_' + str(ksu_lag) + 'assoc_contemp_lagged_2024'

print(run_name)


test


In [9]:
# Shift KSU
if ksu_lag:
    normalized['ksu_chicken'] = normalized['ksu_chicken'].shift(-1*ksu_lag)
    normalized['ksu_pork'] = normalized['ksu_pork'].shift(-1*ksu_lag)
    normalized['ksu_beef'] = normalized['ksu_beef'].shift(-1*ksu_lag)

In [10]:
# Run controls

"""
Positive and negative controls.
Positive controls: (sum of TS albums) x each TS outcome
Negative controls: 
1) (each TS album + sum) x vegan recipes
2) (each doc + sum) x TS outcomes

Binary approach: 
Y_t ~ I_t + L_t + L_t-1 + Y_t-1
"""


    

def run_controls(df, pval_thresh=0.05, PS=False, PS_logistic=False, fit_method='OLS', model='lagged', bonf_denom=1,
                add_L=True, difference=False, normalize=False, include_month=True, include_time=True,
                verbose=False):
    assert model in ['assoc', 'lagged', 'contemp']
    if model == 'assoc':
        assert add_L is False

    # Positive controls #1
    pval_thresh = 0.05/bonf_denom
    total = 0
    passed = 0
    
    trt_lag = 1 if (model == 'lagged') else 0
    non_add_L_set = []
    confounds = []
    
    for X in ['all_ts_albums']:
        for Y in all_ts_outcomes:
            df, analysis_dct = prepare_analysis_dct(df.copy(), X, Y, 
                                                    trt_lag, 
                                                    model, 
                                                    add_L, include_time, 
                                                    include_month, 
                                                    confounds, non_add_L_set)
            
            x_pval, x_beta, _, _ = run_analysis(analysis_dct, df.copy(), trt_lag, non_add_L_set,
                                                PS=PS, PS_logistic=PS_logistic,
                                          fit_method=fit_method,
                                         difference=difference, normalize=normalize,
                                               verbose=verbose, include_time=False,
                                               include_month=False)

            #print(X, Y, x_pval)
            if x_pval < pval_thresh:
                passed += 1
            total += 1
    print('TS Pos Control tests success fraction: ', float(passed)/total, 'Passed: ', passed, 'Total: ', total)
 
    # Positive controls #2
    total = 0
    passed = 0
    
    for X in ['all_drake_albums']:
        for Y in all_drake_outcomes:
            df, analysis_dct = prepare_analysis_dct(df.copy(), X, Y, 
                                                    trt_lag, 
                                                    model, 
                                                    add_L, include_time, 
                                                    include_month, 
                                                    confounds, non_add_L_set)                
            #for alb in all_ts_albums:
            #    analysis_dct['C'].append((alb, 2))
            x_pval, x_beta, _, _ = run_analysis(analysis_dct, df.copy(), trt_lag, non_add_L_set,
                                                PS=PS,
                                          PS_logistic=PS_logistic,
                                          fit_method=fit_method,
                                         difference=difference, normalize=normalize,
                                               verbose=verbose, include_time=False,
                                               include_month=False)

            #print(X, Y, x_pval)

            if x_pval < pval_thresh:
                passed += 1
            total += 1
    print('Drake Pos Control tests success fraction: ', float(passed)/total, 'Passed: ', passed, 'Total: ', total)


    # Negative controls #1
    total1 = 0
    passed1 = 0
    
    X_vals = all_ts_albums + ['all_ts_albums'] + all_drake_albums + ['all_drake_albums']
    confounds = ['health', 'animal_welfare', 'all_climate']
    for X in X_vals:
        for Y in all_primary_outcomes:
            df, analysis_dct = prepare_analysis_dct(df.copy(), X, Y, 
                                                    trt_lag, 
                                                    model, 
                                                    add_L, include_time, 
                                                    include_month, 
                                                    confounds, non_add_L_set)  
                   
            x_pval, x_beta, _, _ = run_analysis(analysis_dct, df.copy(), trt_lag, 
                                                non_add_L_set,
                                                PS=PS, 
                                          PS_logistic=PS_logistic,
                                          fit_method=fit_method,
                                         difference=difference, normalize=normalize,
                                               verbose=verbose, include_time=False,
                                               include_month=False)
            if x_pval >= pval_thresh:
                passed1 += 1
            else:
                print('failed')
                print(X, Y, x_pval)
            total1 += 1
    print('Neg Control #1 tests success fraction: ', float(passed1)/total1, 'Passed: ', passed1, 'Total: ', total1)
    
    # Negative controls #2
    # Add in the other docs as precision covariates?
    total2 = 0
    passed2 = 0
    
    
    Y_vals = all_ts_outcomes + all_drake_outcomes
    for X in all_docs + ['all_docs']:
        for Y in Y_vals:
            
            df, analysis_dct = prepare_analysis_dct(df.copy(), X, Y, 
                                                    trt_lag, 
                                                    model, 
                                                    add_L, include_time, 
                                                    include_month, 
                                                    confounds, non_add_L_set)  
            
            x_pval, x_beta, _, _ = run_analysis(analysis_dct, df.copy(), trt_lag, 
                                                non_add_L_set,
                                                PS=PS, 
                                          PS_logistic=PS_logistic,
                                          fit_method=fit_method,
                                         difference=difference, normalize=normalize,
                                               verbose=verbose, include_time=False,
                                               include_month=False)

            if x_pval >= pval_thresh:
                passed2 += 1
            else:
                print('failed')
                print(X, Y, x_pval)
            total2 += 1
            
    print('Neg Control #2 success fraction: ', float(passed2)/total2, 'Passed: ',  passed2, 'Total: ', total2)
    print('Neg Control success fraction: ', float(passed1 + passed2)/(total1 + total2), 'Passed: ',  passed1 + passed2, 'Total: ', total1 + total2)



In [11]:
"""
Models: assoc, lagged, contemp, bin.
Assoc: Y_t ~ X_t
Contemp: Y_t ~ PS_t + X_t + X_t-1 + Y_t-1 + L_t + L_t-1 + Month
Lagged: Y_t ~ PS_t-1 + X_t-1 + X_t-2 + L_t-1 + L_t-2 + Y_t-2 + Month
Bin: Y_t ~ PS + I_t + I_t*Time + Time + L_t + Month #+ Y_t-1 
"""
def run_primary_analyses(df, X, outcome, PS=False, PS_logistic=False, fit_method='OLS', model='lagged', 
                         add_L=True, difference=False, include_time=False, 
                         include_month=False, duration_months=None, normalize=False, verbose=False):
    assert model in ['assoc', 'lagged', 'contemp', 'bin']
    if model is 'assoc':
        assert add_L is False
        assert PS is False

    if units == 'months':
        other_docs = ['fok', 'cowspiracy', 'owth', 'tgc']

    elif units == 'weeks':
        other_docs = ['fok', 'cowspiracy', 'okja', 'wth', 'tgc']
    elif units == 'days':
        other_docs = ['fok', 'cowspiracy', 'okja', 'wth']
    else:
        assert 0
    bin_other_docs = {'fok': [], 'cowspiracy': ['fok'], 'owth': ['fok', 'cowspiracy'],
                      'okja': ['fok', 'cowspiracy'], 'wth': ['fok', 'cowspiracy'],
                         'tgc': ['fok', 'cowspiracy', 'owth'],
                     'yawye': ['fok', 'cowspiracy', 'owth', 'tgc']}
        
    if duration_months is not None:
        assert model == 'bin'
        init_date = df['ds'][0]
        this_date = '{y}-{m}-{d}'.format(y=netflix_release_dates[X][0], 
                                             m=netflix_release_dates[X][1], d=netflix_release_dates[X][2])

        release_date_time = date_difference(init_date, this_date, units)
        
        if units == 'months':
            offset = duration_months
        elif units == 'weeks':
            offset = duration_months*4
        else:
            assert 0
        df = df.loc[df['Time'] <= release_date_time + offset]
        
        print('Intvn start time: ', df.loc[df[X + '_bin'] == 1].head()['ds'])
        print('After truncating by duration_months: ', df.tail()['ds'])
        
    """
    if combine_owth:
        disaggr_docs = list(set(all_docs) - set(['okja', 'wth'])) + ['owth']
    else:
        disaggr_docs = list(set(all_docs)) 
    """
        
    trt_lag = 1 if (model == 'lagged') else 0
    #trt_lag = 2 if (model == 'lagged') else 0
    
    confounds = ['health', 'animal_welfare', 'all_climate']
    trends_full_additional_confounds = ['food']
    trends_inf_additional_confounds = ['informative_bare']
    trends_behav_additional_confounds = ['behavior_bare']
    consumption_additional_confounds = ['rdpi']
    non_add_L_set = []

    
    category_dct = {'vegan': 'trends_full', 'vegetarian': 'trends_full', 
                    'plant_based_plus_plant_based': 'trends_full', 
                    'vegan_informative': 'trends_inf', 'vegetarian_informative': 'trends_inf',
       'plant_based_informative': 'trends_inf', 
                    'vegan_behavior': 'trends_behav', 'vegetarian_behavior': 'trends_behav',
       'plant_based_behavior': 'trends_behav',
                       'ksu_chicken': 'consumption', 'ksu_pork': 'consumption', 
                    'ksu_beef': 'consumption', 'Zhao': 'consumption', 'NeuhoferLusk': 'consumption',
                   'StewartMilk': 'consumption', 'StewartPBMilk': 'consumption'}
    
    if category_dct[outcome] == 'trends_full':
        confounds += trends_full_additional_confounds
    elif category_dct[outcome] == 'trends_inf':
        confounds += trends_inf_additional_confounds
    elif category_dct[outcome] == 'trends_behav':
        confounds += trends_behav_additional_confounds        
    else:
        assert category_dct[outcome] == 'consumption'
        confounds += consumption_additional_confounds
            
    #for X in disaggr_docs + ['all_docs']:
    #, 
    
    analysis_dct = {'C': [], 'Ind_PS': []}
    
    if include_time:
        #df['Time**2'] = df['Time']*df['Time']
        analysis_dct['C'].append(('Time', [0]))
        analysis_dct['Ind_PS'].append(('Time', [0]))
        #analysis_dct['C'].append(('Time**2', [0]))
        #analysis_dct['Ind_PS'].append(('Time**2', [0]))
        
    if include_month:
        month_dummies = []
        month_vals = sorted(df['Month'].unique())
        for val_idx in range(len(month_vals)):
            if val_idx == 0:
                continue
            this_val = month_vals[val_idx]
            df['Month_Dummy' + str(this_val)] = (df['Month'] == this_val).astype(float)
            month_dummies.append('Month_Dummy' + str(this_val))
        
        
            analysis_dct['C'].append(('Month_Dummy' + str(this_val), [0]))
            analysis_dct['Ind_PS'].append(('Month_Dummy' + str(this_val), [0]))
    
    
    if model == 'lagged':
        #"""
        analysis_dct['X'] = (X, [trt_lag,trt_lag+1])
        analysis_dct['Y'] = (outcome, [trt_lag+1])
         
        
        for c in confounds:
            if add_L and c not in non_add_L_set:
                analysis_dct['C'].append((c, [trt_lag,trt_lag+1]))
            else:
                analysis_dct['C'].append((c, [trt_lag+1]))
                
                
        analysis_dct['Dep_PS'] = (X, [0])
        analysis_dct['Ind_PS'] += [(X, [1]), (outcome, [1])]

        for c in confounds:
            analysis_dct['Ind_PS'].append((c, [1]))          
            if add_L:
                analysis_dct['Ind_PS'].append((c, [0]))   

    elif model == 'contemp':
        analysis_dct['X'] = (X, [0,1])
        analysis_dct['Y'] = (outcome, [1]) 
        
        for c in confounds:
            if add_L and c not in non_add_L_set:
                analysis_dct['C'].append((c, [0,1]))
            else:
                analysis_dct['C'].append((c, [1]))  
                
        analysis_dct['Dep_PS'] = (X, [0])
        analysis_dct['Ind_PS'] += [(X, [1]), (outcome, [1])]

        for c in confounds:
            analysis_dct['Ind_PS'].append((c, [1]))          
            if add_L:
                analysis_dct['Ind_PS'].append((c, [0]))                 


    elif model == 'assoc':
        analysis_dct['X'] = (X, [0])
        analysis_dct['Y'] = (outcome, [])   
        
    elif model == 'bin':
        if not bin_slope:
            #(X + '_bin*Time', [0])
            analysis_dct['X'] = (X + '_bin', [0])
            analysis_dct['Y'] = (outcome, [])
            analysis_dct['C'].append((X + '_bin*Time_C', [0]))  
            if add_L:
                for c in confounds:
                    analysis_dct['C'].append((c, [0]))
                                        
        else:
            #(X + '_bin*Time', [0])
            analysis_dct['X'] = (X + '_bin*Time_C', [0])
            analysis_dct['Y'] = (outcome, [])
            analysis_dct['C'].append((X + '_bin', [0]))   
            if add_L:
                for c in confounds:
                    analysis_dct['C'].append((c, [0]))            
                
        # Specifically the PS regression
        df[X + '_bin_orig'] = df[X + '_bin'].copy()
        analysis_dct['Dep_PS'] = (X + '_bin_orig', [0])
        analysis_dct['Ind_PS'] = []
        if add_L:
            for c in confounds:
                analysis_dct['Ind_PS'].append((c, [0]))            
    
    # Add in confounds
    if X != 'all_docs':
        for other_doc in other_docs:
            if other_doc == X:
                continue
            if model == 'lagged':
                if add_L:
                    analysis_dct['C'].append((other_doc, [trt_lag,trt_lag+1]))
                else:
                    analysis_dct['C'].append((other_doc, [trt_lag+1]))                    
            elif model == 'contemp':
                if add_L:
                    analysis_dct['C'].append((other_doc, [0,1]))
                else:
                    analysis_dct['C'].append((other_doc, [1]))                
            elif model == 'bin':
                if other_doc in bin_other_docs[X]:
                    if add_L:
                        analysis_dct['C'].append((other_doc, [0]))
            elif model == 'assoc':
                pass


    #print('analysis_dct: ', analysis_dct)
            
    x_pval, x_beta, x_se, _ = run_analysis(analysis_dct, df, trt_lag, non_add_L_set, 
                                        PS=PS, PS_logistic=PS_logistic, fit_method=fit_method,
                                  add_L=add_L,
                                  difference=difference,
                                  include_time=False,
                                  include_month=False,
                                  normalize=normalize,
                                 verbose=verbose)          

    return x_pval, x_beta, x_se

In [12]:
# Merge for controls
# 522
if do_controls:
    merged_for_controls = normalized.merge(normalized_controls, on=common)
    run_controls(merged_for_controls.copy(), pval_thresh=0.05, PS=True, PS_logistic=False, 
                 fit_method='GLSAR', model='lagged', bonf_denom=522,
                    add_L=False, difference=True, normalize=True, include_month=True,
                include_time=False, verbose=False)
    
    """
    run_controls_binary(merged_for_controls.copy(), PS=False, PS_logistic=False, fit_method='GLSAR', model='bin', bonf_denom=5,
                 add_L=True,
                duration_months=12,
                difference=False, verbose=False)
    """

In [13]:
# Dfs for heatmaps
heat_dfs = {}
annot_dfs = {}

intvn_map = {'fok': 'FOK', 'cowspiracy': 'Cowspiracy', 'wth': 'WTH', 'okja': "Okja", 'owth': 
             'Okja/WTH', 'tgc': 'TGC', 'yawye': 'YAWYE', 'all_docs': 'All'}
outcome_map = {'vegan': "Searches: `Vegan'", 'vegetarian': "Searches: `Vegetarian'", 
               'plant_based_plus_plant_based': "Searches: `Plant based'",
              'ksu_beef': 'Beef Demand',
              'ksu_chicken': 'Chicken Demand',
              'ksu_pork': 'Pork Demand',
              'Zhao': 'Zhao',
              'NeuhoferLusk': 'NeuhoferLusk',
              'StewartPBMilk': 'StewartPBMilk',
              'StewartMilk': 'StewartMilk',
               'vegan_informative': "Searches: `Vegan', Informative",
           'vegetarian_informative': "Searches: `Vegetarian', Informative", 
               'plant_based_informative': "Searches: `Plant based', Informative", 
               'vegan_behavior': "Searches: `Vegan', Behavior",
           'vegetarian_behavior': "Searches: `Vegetarian', Behavior", 
               'plant_based_behavior': "Searches: `Plant based', Behavior"}

coef_dcts = {}
annot_dcts = {}
pval_dcts = {}
se_dcts = {}

for model in models:
    coef_dcts[model] = {}
    annot_dcts[model] = {}
    pval_dcts[model] = {}
    se_dcts[model] = {}



for model in models:
    for intvn in intvns:
        coef_dcts[model][intvn_map[intvn]] = {}
        annot_dcts[model][intvn_map[intvn]] = {}
        pval_dcts[model][intvn_map[intvn]] = {}
        se_dcts[model][intvn_map[intvn]] = {}
        for outcome in all_outcomes:
            coef_dcts[model][intvn_map[intvn]][outcome_map[outcome]] = np.nan
            annot_dcts[model][intvn_map[intvn]][outcome_map[outcome]] = ''
            pval_dcts[model][intvn_map[intvn]][outcome_map[outcome]] = np.nan
            se_dcts[model][intvn_map[intvn]][outcome_map[outcome]] = np.nan
    heat_dfs[model] = pd.DataFrame(coef_dcts[model])
    annot_dfs[model] = pd.DataFrame(annot_dcts[model])

In [14]:
def eval_pval(pval, thresh1, thresh2, sens_pvals=None):
    if pval < thresh2:
        if sens_pvals:
            for sens_pval in sens_pvals:
                if sens_pval >= thresh2:
                    return '**'
            return '***'
        else:
            return '**'
    elif pval < thresh1:
        return '*'
    else:
        return ''

In [15]:
normalized[['owth_bin', 'owth_bin*Time_C']].corr()

Unnamed: 0,owth_bin,owth_bin*Time_C
owth_bin,1.0,0.814002
owth_bin*Time_C,0.814002,1.0


In [16]:
test_intvns

['fok', 'cowspiracy', 'okja', 'wth', 'tgc', 'yawye', 'all_docs']

In [17]:
if do_primary:
    #for outcome in ['ksu_beef', 'ksu_pork', 'ksu_chicken']:
    sens_add_L = not add_L
    for outcome in test_outcomes:
        for intvn in test_intvns:
            if outcome in restrictions and intvn not in restrictions[outcome]:
                continue
            
            print('intvn: ', intvn)
            
            if 'assoc' in models_to_run:
                print('Association: ')
                assoc_pval, assoc_beta, assoc_se = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False, 
                                     fit_method='GLSAR', model='assoc', 
                                     add_L=False, difference=do_differencing, include_time=include_time, 
                                     include_month=False, normalize=normalize, verbose=verbose)

                print('beta, pval: ', assoc_beta, assoc_pval)
                coef_dcts['assoc'][intvn_map[intvn]][outcome_map[outcome]] = assoc_beta
                annot_dcts['assoc'][intvn_map[intvn]][outcome_map[outcome]] = eval_pval(assoc_pval, pval_thresh_1, pval_thresh_2)
                se_dcts['assoc'][intvn_map[intvn]][outcome_map[outcome]] = assoc_se
                pval_dcts['assoc'][intvn_map[intvn]][outcome_map[outcome]] = assoc_pval
            
            if 'contemp' in models_to_run:
                print('Contemporaneous: ')
                contemp_pval, contemp_beta, contemp_se = run_primary_analyses(normalized.copy(), intvn, outcome, PS=True, 
                                    fit_method='GLSAR', model='contemp', 
                                     add_L=add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)

                contemp_pval_sens1, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=True, 
                                    fit_method='GLSAR', model='contemp', 
                                     add_L=sens_add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)            

                contemp_pval_sens2, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False, 
                                    fit_method='GLSAR', model='contemp', 
                                     add_L=add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)   

                print('beta, pval: ', contemp_beta, contemp_pval)
                coef_dcts['contemp'][intvn_map[intvn]][outcome_map[outcome]] = contemp_beta
                se_dcts['contemp'][intvn_map[intvn]][outcome_map[outcome]] = contemp_se
                contemp_pvals_sens = [contemp_pval_sens1, contemp_pval_sens2]
                print('contemp_pvals_sens: ', contemp_pvals_sens)
                annot_dcts['contemp'][intvn_map[intvn]][outcome_map[outcome]] = eval_pval(contemp_pval, pval_thresh_1, pval_thresh_2, contemp_pvals_sens)
                pval_dcts['contemp'][intvn_map[intvn]][outcome_map[outcome]] = contemp_pval
            
            if 'lagged' in models_to_run:
                print('Lagged: ')
                lagged_pval, lagged_beta, lagged_se = run_primary_analyses(normalized.copy(), intvn, outcome, PS=True, 
                                     fit_method='GLSAR', model='lagged', 
                                     add_L=add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)

                lagged_pval_sens1, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=True, 
                                     fit_method='GLSAR', model='lagged', 
                                     add_L=sens_add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)

                lagged_pval_sens2, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False, 
                                     fit_method='GLSAR', model='lagged', 
                                     add_L=add_L, difference=do_differencing, include_time=include_time, 
                                     include_month=include_month, normalize=normalize, verbose=verbose)

                print('beta, pval: ', lagged_beta, lagged_pval)
                coef_dcts['lagged'][intvn_map[intvn]][outcome_map[outcome]] = lagged_beta
                se_dcts['lagged'][intvn_map[intvn]][outcome_map[outcome]] = lagged_se
                lagged_pvals_sens = [lagged_pval_sens1, lagged_pval_sens2]
                print('lagged_pvals_sens: ', lagged_pvals_sens)
                annot_dcts['lagged'][intvn_map[intvn]][outcome_map[outcome]] = eval_pval(lagged_pval, pval_thresh_1, pval_thresh_2, lagged_pvals_sens)
                pval_dcts['lagged'][intvn_map[intvn]][outcome_map[outcome]] = lagged_pval

            print('Bin: ')
            if 'bin' in models_to_run:
                if (outcome in binary_analysis_dct) and (intvn not in binary_analysis_dct[outcome]):
                    continue

                if intvn == 'all_docs':
                    continue
                    
                bin_pval, bin_beta, bin_se = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False, 
                                                          PS_logistic=False,
                                     fit_method='GLSAR', model='bin', 
                                     add_L=add_L, difference=do_differencing_bin, include_time=include_time_bin, 
                                     include_month=include_month, duration_months=duration_months, 
                                                          normalize=normalize, verbose=verbose)

                bin_pval_sens1, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False,
                                                   PS_logistic=False,
                                     fit_method='GLSAR', model='bin', 
                                     add_L=sens_add_L, difference=do_differencing_bin, include_time=include_time_bin, 
                                     include_month=include_month, duration_months=duration_months, normalize=normalize, verbose=verbose)

                bin_pval_sens2, _, _ = run_primary_analyses(normalized.copy(), intvn, outcome, PS=False, 
                                    PS_logistic=False,
                                     fit_method='GLSAR', model='bin', 
                                     add_L=add_L, difference=do_differencing_bin, include_time=include_time_bin, 
                                     include_month=include_month, duration_months=duration_months, normalize=normalize, verbose=verbose)

                print('beta, pval: ', bin_beta, bin_pval)            
                    
                bin_pvals_sens = [bin_pval_sens1, bin_pval_sens2]
                coef_dcts['bin'][intvn_map[intvn]][outcome_map[outcome]] = bin_beta
                se_dcts['bin'][intvn_map[intvn]][outcome_map[outcome]] = bin_se
                annot_dcts['bin'][intvn_map[intvn]][outcome_map[outcome]] = eval_pval(bin_pval, pval_thresh_1, pval_thresh_2, bin_pvals_sens)
                pval_dcts['bin'][intvn_map[intvn]][outcome_map[outcome]] = bin_pval
                print('bin_pvals_sens: ', bin_pvals_sens)


intvn:  fok
Association: 
endog:  plant_based_plus_plant_based exog:  ['Intercept', 'fok_lag0']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.060
Model:                                    GLSAR   Adj. R-squared:                  0.059
Method:                           Least Squares   F-statistic:                     67.76
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           5.36e-16
Time:                                  17:39:53   Log-Likelihood:                -1473.8
No. Observations:                          1063   AIC:                             2952.
Df Residuals:                              1061   BIC:                             2962.
Df Model:                                     1                                         
Covariance Type:                      nonrobust                                         
              

look-up table. The actual p-value is greater than the p-value returned.



prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    fok   R-squared:                       0.049
Model:                            OLS   Adj. R-squared:                  0.030
Method:                 Least Squares   F-statistic:                     2.546
Date:                Sat, 26 Oct 2024   Prob (F-statistic):           0.000157
Time:                        17:39:53   Log-Likelihood:                -1481.7
No. Observations:                1063   AIC:                             3007.
Df Residuals:                    1041   BIC:                             3117.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                           coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.153
Model:                                    GLSAR   Adj. R-squared:                  0.135
Method:                           Least Squares   F-statistic:                     8.512
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           1.38e-25
Time:                                  17:39:54   Log-Likelihood:                -1422.9
No. Observations:                          1062   AIC:                             2892.
Df Residuals:                              1039   BIC:                             3006.
Df Model:                                    22                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  2.1612474810513808e-19
kpss_pval:  0.1
beta, pval:  0.17695711528592983 1.5678924799520362e-11
contemp_pvals_sens:  [1.6350373811781998e-13, 1.5679364133593594e-11]
Lagged: 
ps string:  fok ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(fok, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    fok   R-squared:                       0.049
Model:                            OLS   Adj. R-squared:                  0.030
Method:                 Least Squares   F-statistic:                   

look-up table. The actual p-value is greater than the p-value returned.



ps string:  fok ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(fok, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(animal_welfare, 1) + lag(all_climate, 1) + lag(food, 1)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    fok   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.011
Method:                 Least Squares   F-statistic:                     1.666
Date:                Sat, 26 Oct 2024   Prob (F-statistic):             0.0434
Time:                        17:39:54   Log-Likelihood:                -1494.1
No. Observations:                1063   AIC:                             3024.
Df Residuals:      

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



endog:  plant_based_plus_plant_based exog:  ['Intercept', 'Month_Dummy2_lag0', 'Month_Dummy3_lag0', 'Month_Dummy4_lag0', 'Month_Dummy5_lag0', 'Month_Dummy6_lag0', 'Month_Dummy7_lag0', 'Month_Dummy8_lag0', 'Month_Dummy9_lag0', 'Month_Dummy10_lag0', 'Month_Dummy11_lag0', 'Month_Dummy12_lag0', 'health_lag1', 'health_lag2', 'animal_welfare_lag1', 'animal_welfare_lag2', 'all_climate_lag1', 'all_climate_lag2', 'food_lag1', 'food_lag2', 'cowspiracy_lag1', 'cowspiracy_lag2', 'okja_lag1', 'okja_lag2', 'wth_lag1', 'wth_lag2', 'tgc_lag1', 'tgc_lag2', 'fok_lag1', 'fok_lag2', 'plant_based_plus_plant_based_lag2']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.165
Model:                                    GLSAR   Adj. R-squared:                  0.141
Method:                           Least Squares   F-statistic:                     6.803
Date:                          Sat, 2

look-up table. The actual p-value is greater than the p-value returned.



ps string:  cowspiracy ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(cowspiracy, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:             cowspiracy   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     1.210
Date:                Sat, 26 Oct 2024   Prob (F-statistic):              0.233
Time:                        17:39:55   Log-Likelihood:                -1495.5
No. O

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.106
Model:                                    GLSAR   Adj. R-squared:                  0.087
Method:                           Least Squares   F-statistic:                     5.591
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           5.26e-15
Time:                                  17:39:55   Log-Likelihood:                -1450.5
No. Observations:                          1062   AIC:                             2947.
Df Residuals:                              1039   BIC:                             3061.
Df Model:                                    22                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  2.161247481051583e-19
kpss_pval:  0.1
beta, pval:  0.019440886883804305 0.4457275300454373
contemp_pvals_sens:  [0.48071103859925657, 0.44572358535014045]
Lagged: 
ps string:  cowspiracy ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(cowspiracy, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:             cowspiracy   R-squared:                       0.024
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:               

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  2.439496631235588e-16
kpss_pval:  0.1
endog:  plant_based_plus_plant_based exog:  ['Intercept', 'Month_Dummy2_lag0', 'Month_Dummy3_lag0', 'Month_Dummy4_lag0', 'Month_Dummy5_lag0', 'Month_Dummy6_lag0', 'Month_Dummy7_lag0', 'Month_Dummy8_lag0', 'Month_Dummy9_lag0', 'Month_Dummy10_lag0', 'Month_Dummy11_lag0', 'Month_Dummy12_lag0', 'health_lag1', 'health_lag2', 'animal_welfare_lag1', 'animal_welfare_lag2', 'all_climate_lag1', 'all_climate_lag2', 'food_lag1', 'food_lag2', 'fok_lag1', 'fok_lag2', 'okja_lag1', 'okja_lag2', 'wth_lag1', 'wth_lag2', 'tgc_lag1', 'tgc_lag2', 'cowspiracy_lag1', 'cowspiracy_lag2', 'plant_based_plus_plant_based_lag2']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.165
Model:                                    GLSAR   Adj. R-squared:                  0.141
Method:                           Least Squares   F-statistic:               

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  3.734815426745184e-17
kpss_pval:  0.1
beta, pval:  0.09429384812646935 0.0017471557870244621
Contemporaneous: 
ps string:  okja ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(okja, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                   okja   R-squared:                       0.051
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     2.658
Date:                Sat, 26 Oct 2024   Prob (F-stati

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.112
Model:                                    GLSAR   Adj. R-squared:                  0.093
Method:                           Least Squares   F-statistic:                     5.946
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           2.78e-16
Time:                                  17:39:57   Log-Likelihood:                -1448.3
No. Observations:                          1062   AIC:                             2943.
Df Residuals:                              1039   BIC:                             3057.
Df Model:                                    22                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.343
Model:                                    GLSAR   Adj. R-squared:                  0.324
Method:                           Least Squares   F-statistic:                     17.97
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           3.04e-74
Time:                                  17:39:57   Log-Likelihood:                -1284.8
No. Observations:                          1062   AIC:                             2632.
Df Residuals:                              1031   BIC:                             2786.
Df Model:                                    30                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.165
Model:                                    GLSAR   Adj. R-squared:                  0.140
Method:                           Least Squares   F-statistic:                     6.577
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           1.67e-24
Time:                                  17:39:57   Log-Likelihood:                -1425.2
No. Observations:                          1061   AIC:                             2914.
Df Residuals:                              1029   BIC:                             3073.
Df Model:                                    31                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.072
Model:                                    GLSAR   Adj. R-squared:                  0.052
Method:                           Least Squares   F-statistic:                     3.509
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           5.56e-08
Time:                                  17:39:58   Log-Likelihood:                -1467.3
No. Observations:                          1061   AIC:                             2983.
Df Residuals:                              1037   BIC:                             3102.
Df Model:                                    23                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.067
Model:                                    GLSAR   Adj. R-squared:                  0.066
Method:                           Least Squares   F-statistic:                     76.05
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           1.05e-17
Time:                                  17:39:58   Log-Likelihood:                -1471.0
No. Observations:                          1063   AIC:                             2946.
Df Residuals:                              1061   BIC:                             2956.
Df Model:                                     1                                         
Covariance Type:                      nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.343
Model:                                    GLSAR   Adj. R-squared:                  0.324
Method:                           Least Squares   F-statistic:                     17.97
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           3.04e-74
Time:                                  17:39:58   Log-Likelihood:                -1284.8
No. Observations:                          1062   AIC:                             2632.
Df Residuals:                              1031   BIC:                             2786.
Df Model:                                    30                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.143
Model:                                    GLSAR   Adj. R-squared:                  0.125
Method:                           Least Squares   F-statistic:                     7.909
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           2.12e-23
Time:                                  17:39:59   Log-Likelihood:                -1428.3
No. Observations:                          1062   AIC:                             2903.
Df Residuals:                              1039   BIC:                             3017.
Df Model:                                    22                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.343
Model:                                    GLSAR   Adj. R-squared:                  0.324
Method:                           Least Squares   F-statistic:                     17.97
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           3.04e-74
Time:                                  17:39:59   Log-Likelihood:                -1284.8
No. Observations:                          1062   AIC:                             2632.
Df Residuals:                              1031   BIC:                             2786.
Df Model:                                    30                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.165
Model:                                    GLSAR   Adj. R-squared:                  0.140
Method:                           Least Squares   F-statistic:                     6.579
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           1.64e-24
Time:                                  17:39:59   Log-Likelihood:                -1425.2
No. Observations:                          1061   AIC:                             2914.
Df Residuals:                              1029   BIC:                             3073.
Df Model:                                    31                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.092
Model:                                    GLSAR   Adj. R-squared:                  0.072
Method:                           Least Squares   F-statistic:                     4.564
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           9.47e-12
Time:                                  17:39:59   Log-Likelihood:                -1458.4
No. Observations:                          1061   AIC:                             2965.
Df Residuals:                              1037   BIC:                             3084.
Df Model:                                    23                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  1.5815714291460277e-27
kpss_pval:  0.1
beta, pval:  0.1687797364841667 1.0205534198076253e-06
lagged_pvals_sens:  [6.170456302562092e-06, 1.0146151264438142e-06]
Bin: 
intvn:  tgc
Association: 
endog:  plant_based_plus_plant_based exog:  ['Intercept', 'tgc_lag0']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.153
Model:                                    GLSAR   Adj. R-squared:                  0.152
Method:                           Least Squares   F-statistic:                     192.0
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           2.99e-40
Time:                                  17:40:00   Log-Likelihood:                -1419.3
No. Observations:                          1063   AIC:                             2843.
Df Residuals:                              1061   BIC:                             2852.
Df Model:    

look-up table. The actual p-value is greater than the p-value returned.



var, sorted corrs:  tgc_lag1                            -0.801241
all_climate_lag1                    -0.302533
plant_based_plus_plant_based_lag1   -0.238925
animal_welfare_lag1                 -0.162488
health_lag1                         -0.157970
Month_Dummy8_lag0                   -0.119532
Month_Dummy12_lag0                  -0.119532
Month_Dummy7_lag0                   -0.071719
Month_Dummy5_lag0                   -0.023636
Month_Dummy4_lag0                   -0.023330
Month_Dummy2_lag0                   -0.023330
Month_Dummy3_lag0                   -0.011665
wth_lag1                            -0.004179
cowspiracy_lag0                     -0.000849
cowspiracy_lag1                      0.002309
fok_lag1                             0.002588
okja_lag1                            0.005214
okja_lag0                            0.015166
animal_welfare_lag0                  0.021073
Month_Dummy10_lag0                   0.047813
wth_lag0                             0.048429
Month_Dummy6_l

look-up table. The actual p-value is greater than the p-value returned.



prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    tgc   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     1.122
Date:                Sat, 26 Oct 2024   Prob (F-statistic):              0.326
Time:                        17:40:00   Log-Likelihood:                -1498.7
No. Observations:                1063   AIC:                             3033.
Df Residuals:                    1045   BIC:                             3123.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                                           coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------

look-up table. The actual p-value is greater than the p-value returned.



endog:  plant_based_plus_plant_based exog:  ['Intercept', 'Month_Dummy2_lag0', 'Month_Dummy3_lag0', 'Month_Dummy4_lag0', 'Month_Dummy5_lag0', 'Month_Dummy6_lag0', 'Month_Dummy7_lag0', 'Month_Dummy8_lag0', 'Month_Dummy9_lag0', 'Month_Dummy10_lag0', 'Month_Dummy11_lag0', 'Month_Dummy12_lag0', 'health_lag0', 'health_lag1', 'animal_welfare_lag0', 'animal_welfare_lag1', 'all_climate_lag0', 'all_climate_lag1', 'food_lag0', 'food_lag1', 'fok_lag0', 'fok_lag1', 'cowspiracy_lag0', 'cowspiracy_lag1', 'okja_lag0', 'okja_lag1', 'wth_lag0', 'wth_lag1', 'tgc_lag0', 'tgc_lag1', 'plant_based_plus_plant_based_lag1']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.343
Model:                                    GLSAR   Adj. R-squared:                  0.324
Method:                           Least Squares   F-statistic:                     17.97
Date:                          Sat, 2

look-up table. The actual p-value is greater than the p-value returned.



ps string:  tgc ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(tgc, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    tgc   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.071
Date:                Sat, 26 Oct 2024   Prob (F-statistic):              0.374
Time:                        17:40:00   Log-Likelihood:                -1497.0
No. Observations:  

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  1.5576174077528128e-27
kpss_pval:  0.1
ps string:  tgc ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(tgc, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(animal_welfare, 1) + lag(all_climate, 1) + lag(food, 1)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                    tgc   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     1.122
Date:                Sat, 26 Oct 2024   Prob (F-statistic):              0.326
Time:                        17:40:01   Log-Likelihood:                -1498.7
No. Observations:                1063   AIC:    

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  6.814022589412428e-22
kpss_pval:  0.1
endog:  plant_based_plus_plant_based exog:  ['Intercept', 'Month_Dummy2_lag0', 'Month_Dummy3_lag0', 'Month_Dummy4_lag0', 'Month_Dummy5_lag0', 'Month_Dummy6_lag0', 'Month_Dummy7_lag0', 'Month_Dummy8_lag0', 'Month_Dummy9_lag0', 'Month_Dummy10_lag0', 'Month_Dummy11_lag0', 'Month_Dummy12_lag0', 'health_lag1', 'health_lag2', 'animal_welfare_lag1', 'animal_welfare_lag2', 'all_climate_lag1', 'all_climate_lag2', 'food_lag1', 'food_lag2', 'fok_lag1', 'fok_lag2', 'cowspiracy_lag1', 'cowspiracy_lag2', 'okja_lag1', 'okja_lag2', 'wth_lag1', 'wth_lag2', 'tgc_lag1', 'tgc_lag2', 'plant_based_plus_plant_based_lag2']
                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.165
Model:                                    GLSAR   Adj. R-squared:                  0.141
Method:                           Least Squares   F-statistic:               

look-up table. The actual p-value is greater than the p-value returned.



adf_pval:  3.328658768233813e-17
kpss_pval:  0.1
beta, pval:  0.3736950036512407 6.162935384462612e-37
Contemporaneous: 
ps string:  yawye ~  + lag(Month_Dummy2, 0) + lag(Month_Dummy3, 0) + lag(Month_Dummy4, 0) + lag(Month_Dummy5, 0) + lag(Month_Dummy6, 0) + lag(Month_Dummy7, 0) + lag(Month_Dummy8, 0) + lag(Month_Dummy9, 0) + lag(Month_Dummy10, 0) + lag(Month_Dummy11, 0) + lag(Month_Dummy12, 0) + lag(yawye, 1) + lag(plant_based_plus_plant_based, 1) + lag(health, 1) + lag(health, 0) + lag(animal_welfare, 1) + lag(animal_welfare, 0) + lag(all_climate, 1) + lag(all_climate, 0) + lag(food, 1) + lag(food, 0)
prop score summary: 
                            OLS Regression Results                            
Dep. Variable:                  yawye   R-squared:                       0.026
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                     1.321
Date:                Sat, 26 Oct 2024   Prob (F-stat

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.252
Model:                                    GLSAR   Adj. R-squared:                  0.235
Method:                           Least Squares   F-statistic:                     15.19
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           6.88e-51
Time:                                  17:40:02   Log-Likelihood:                -1357.9
No. Observations:                          1062   AIC:                             2764.
Df Residuals:                              1038   BIC:                             2883.
Df Model:                                    23                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.475
Model:                                    GLSAR   Adj. R-squared:                  0.458
Method:                           Least Squares   F-statistic:                     29.05
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):          9.63e-121
Time:                                  17:40:02   Log-Likelihood:                -1168.7
No. Observations:                          1062   AIC:                             2403.
Df Residuals:                              1029   BIC:                             2567.
Df Model:                                    32                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.215
Model:                                    GLSAR   Adj. R-squared:                  0.190
Method:                           Least Squares   F-statistic:                     8.525
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           1.30e-35
Time:                                  17:40:02   Log-Likelihood:                -1406.7
No. Observations:                          1061   AIC:                             2881.
Df Residuals:                              1027   BIC:                             3050.
Df Model:                                    33                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.095
Model:                                    GLSAR   Adj. R-squared:                  0.074
Method:                           Least Squares   F-statistic:                     4.520
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           5.59e-12
Time:                                  17:40:03   Log-Likelihood:                -1457.2
No. Observations:                          1061   AIC:                             2964.
Df Residuals:                              1036   BIC:                             3089.
Df Model:                                    24                                         
Covariance Type:                      nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
--------

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



                                GLSAR Regression Results                                
Dep. Variable:     plant_based_plus_plant_based   R-squared:                       0.207
Model:                                    GLSAR   Adj. R-squared:                  0.206
Method:                           Least Squares   F-statistic:                     276.7
Date:                          Sat, 26 Oct 2024   Prob (F-statistic):           2.15e-55
Time:                                  17:40:03   Log-Likelihood:                -1386.7
No. Observations:                          1063   AIC:                             2777.
Df Residuals:                              1061   BIC:                             2787.
Df Model:                                     1                                         
Covariance Type:                      nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
----------------------------

look-up table. The actual p-value is greater than the p-value returned.



KeyboardInterrupt: 

In [None]:
plt.rcParams['font.family'] = 'Helvetica'
min_val = np.inf
max_val = -np.inf
for model in ['assoc', 'contemp', 'lagged', 'bin']:
    heat_dfs[model] = pd.DataFrame(coef_dcts[model])
    annot_dfs[model] = pd.DataFrame(annot_dcts[model])
    
    if heat_dfs[model].min().min() < min_val:
        min_val = heat_dfs[model].min().min()
    if heat_dfs[model].max().max() > max_val:
        max_val = heat_dfs[model].max().max()
    

model_maps = {'assoc': 'Association', 'contemp': 'Contemporaneous', 'lagged': 'Lagged',
             'bin': 'Binary ({d} Months)'.format(d=duration_months)}
    
for model in ['assoc', 'contemp', 'lagged', 'bin']:    
    plt.figure(figsize=(8, 6))  # Optional: Adjusts the size of the figure
    cmap = sns.diverging_palette(h_neg=130, h_pos=10, s=99, l=55, sep=3, as_cmap=True)

    ax = sns.heatmap(heat_dfs[model], annot=annot_dfs[model], center=0, fmt='s',
                     cmap = cmap,
                    annot_kws={"size": 25}, vmin=min_val, vmax=max_val, yticklabels=True, cbar=True)  # 'annot' annotates the boxes with the data values

    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)

    # Display the heatmap
    plt.title(model_maps[model])
    plt.show()

In [None]:
assoc_se_df = pd.DataFrame(se_dcts['assoc'])
assoc_coef_df = pd.DataFrame(coef_dcts['assoc'])
assoc_pval_df = pd.DataFrame(pval_dcts['assoc'])

lagged_se_df = pd.DataFrame(se_dcts['lagged'])
lagged_coef_df = pd.DataFrame(coef_dcts['lagged'])
lagged_pval_df = pd.DataFrame(pval_dcts['lagged'])

contemp_se_df = pd.DataFrame(se_dcts['contemp'])
contemp_coef_df = pd.DataFrame(coef_dcts['contemp'])
contemp_pval_df = pd.DataFrame(pval_dcts['contemp'])

bin_se_df = pd.DataFrame(se_dcts['bin'])
bin_coef_df = pd.DataFrame(coef_dcts['bin'])
bin_pval_df = pd.DataFrame(pval_dcts['bin'])

for col in lagged_se_df.columns:
    assoc_se_df = assoc_se_df.rename(columns={col:col+'_se'})
    assoc_coef_df = assoc_coef_df.rename(columns={col:col+'_pe'})
    assoc_pval_df = assoc_pval_df.rename(columns={col:col+'_pval'})
    
    lagged_se_df = lagged_se_df.rename(columns={col:col+'_se'})
    lagged_coef_df = lagged_coef_df.rename(columns={col:col+'_pe'})
    lagged_pval_df = lagged_pval_df.rename(columns={col:col+'_pval'})

    contemp_se_df = contemp_se_df.rename(columns={col:col+'_se'})
    contemp_coef_df = contemp_coef_df.rename(columns={col:col+'_pe'})
    contemp_pval_df = contemp_pval_df.rename(columns={col:col+'_pval'})

    bin_se_df = bin_se_df.rename(columns={col:col+'_se'})
    bin_coef_df = bin_coef_df.rename(columns={col:col+'_pe'})
    bin_pval_df = bin_pval_df.rename(columns={col:col+'_pval'})
    
assoc_df = pd.concat([assoc_se_df, assoc_coef_df, assoc_pval_df], axis=1)
lagged_df = pd.concat([lagged_se_df, lagged_coef_df, lagged_pval_df], axis=1)
contemp_df = pd.concat([contemp_se_df, contemp_coef_df, contemp_pval_df], axis=1)
bin_df = pd.concat([bin_se_df, bin_coef_df, bin_pval_df], axis=1)

assoc_df.index.name = 'Outcome'
lagged_df.index.name = 'Outcome'
contemp_df.index.name = 'Outcome'
bin_df.index.name = 'Outcome'

lagged_df

In [None]:
save=True 
if save:
    prefix = '../results/' + run_name + '_' + units + '_' + str(datetime.now()).replace(' ', '_').replace(':', '_') + '/'
    os.makedirs(prefix)
    assoc_df.to_csv(prefix + 'assoc_df_{u}_all_primary.csv'.format(u=units))    
    lagged_df.to_csv(prefix + 'lagged_df_{u}_all_primary.csv'.format(u=units))
    contemp_df.to_csv(prefix + 'contemp_df_{u}_all_primary.csv'.format(u=units))
    bin_df.to_csv(prefix + 'bin_df_{u}_all_primary.csv'.format(u=units))
    
    annot_dfs['assoc'].to_csv(prefix + 'annot_assoc_df_{u}_all_primary.csv'.format(u=units))
    annot_dfs['contemp'].to_csv(prefix + 'annot_contemp_df_{u}_all_primary.csv'.format(u=units))
    annot_dfs['lagged'].to_csv(prefix + 'annot_lagged_df_{u}_all_primary.csv'.format(u=units))
    annot_dfs['bin'].to_csv(prefix + 'annot_bin_df_{u}_all_primary.csv'.format(u=units))

In [None]:
prefix