In [1]:
import pandas as pd
import numpy as np

from ast import literal_eval

import statsmodels.formula.api as smf

import setup_modules
from lib.fomc import get_lname

In [2]:
fomc = pd.read_csv('../working-csvs/fomc.csv', index_col=0)
fred = pd.read_csv('../working-csvs/fred.csv')

In [3]:
sgs = pd.read_csv('../working-csvs/sgs.csv', index_col=0)
for col in ['tokens', 'probs', 'topk', 'sentiment']:
    sgs[col] = sgs[col].apply(literal_eval)
sgs

Unnamed: 0,date,speaker,section,content,enhanced,tokens,probs,topk,sentiment
0,1999-02-03,CHAIRMAN GREENSPAN.,1,That was a particularly impressive performance...,That was a particularly impressive performance...,"[particularli, impress, perform, three]","[(0, 0.022011643), (1, 0.022315186), (2, 0.021...",[],"{'label': 'positive', 'score': 0.9377896189689..."
1,1999-02-03,CHAIRMAN GREENSPAN.,1,I thought it most interesting in all respects.,I thought it most interesting in all respects.,"[thought, interest, respect]","[(0, 0.022751505), (1, 0.02140277), (2, 0.0214...",[],"{'label': 'neutral', 'score': 0.8762890100479126}"
2,1999-02-03,CHAIRMAN GREENSPAN.,1,I found the results of the forecast scenario t...,I found the results of the forecast scenario t...,"[found, result, forecast, scenario, assum, lar...","[(0, 0.020596826), (1, 0.020028388), (2, 0.019...","[(10, 0.0303), (12, 0.0397), (34, 0.0437)]","{'label': 'neutral', 'score': 0.6128905415534973}"
3,1999-02-03,CHAIRMAN GREENSPAN.,1,"If we look at the end result, we find that a s...","If we look at the end result, we find that a s...","[look, end, result, find, stock, market, bust,...","[(0, 0.021331787), (1, 0.02008242), (2, 0.0247...","[(9, 0.0347)]","{'label': 'negative', 'score': 0.9539210200309..."
4,1999-02-03,CHAIRMAN GREENSPAN.,1,Five or six years ago we thought 5 percent was...,Five or six years ago we thought 5 percent was...,"[five, six, year, ago, thought, percent, terri...","[(0, 0.02294944), (1, 0.021043114), (2, 0.0231...",[],"{'label': 'negative', 'score': 0.9214469790458..."
...,...,...,...,...,...,...,...,...,...
246466,2017-12-13,VICE CHAIRMAN DUDLEY.,2,So we do observe that inverted yield curves pr...,So we do observe that inverted yield curves pr...,"[observ, invert, yield, curv, preced, recess, ...","[(0, 0.020136567), (1, 0.019674476), (2, 0.020...","[(28, 0.0372), (35, 0.093)]","{'label': 'neutral', 'score': 0.8188691735267639}"
246467,2017-12-13,VICE CHAIRMAN DUDLEY.,2,"In this cycle, I expect the yield curve will c...","In this cycle, I expect the yield curve will c...","[cycl, expect, yield, curv, continu, flatten, ...","[(0, 0.02012691), (1, 0.018331075), (2, 0.0330...","[(6, 0.0373), (19, 0.0333), (28, 0.0483)]","{'label': 'negative', 'score': 0.9108558297157..."
246468,2017-12-13,VICE CHAIRMAN DUDLEY.,2,"But, to me, worrying now that we're committing...","But, to me, worrying now that we're committing...","[worri, commit, major, polici, mistak, rais, f...","[(0, 0.020604493), (1, 0.017746724), (2, 0.030...","[(3, 0.0551), (18, 0.0483), (20, 0.0366)]","{'label': 'negative', 'score': 0.8889493346214..."
246469,2017-12-13,VICE CHAIRMAN DUDLEY.,2,Our tightening moves have not yet tightened ov...,Our tightening moves have not yet tightened ov...,"[tighten, move, yet, tighten, overal, financi,...","[(0, 0.019498214), (1, 0.019106181), (2, 0.026...","[(28, 0.0365), (29, 0.0622)]","{'label': 'neutral', 'score': 0.4787711203098297}"


In [4]:
sgs['sentlabel'] = sgs['sentiment'].map(lambda x : x['label']).map({'neutral': 0, 'positive': 1, 'negative': '-1'})
sgs['sentscore'] = sgs['sentiment'].map(lambda x : x['score'])

In [5]:
# Filtered
fsgs = sgs[(sgs['topk'].map(len) > 0) & (sgs['sentscore'] > 0.5) & (sgs['sentlabel'] != 0)].copy()

In [6]:
k = 45

def partial_stance_vect(topics, sent_label):
    # maybe normalize
    total = sum([topic[1] for topic in topics])
    svect = [0] * k
    for topic, weight in topics:
        svect[topic] = float(sent_label) * float(weight) / total
    
    irrelevant =np.ones(45)
    irrelevant[4] = 0
    irrelevant[20] = 0
    irrelevant[22] = 0
    irrelevant[27] = 0 
    return np.array(svect) * irrelevant

In [7]:
fsgs['svect'] = fsgs.apply(lambda x : partial_stance_vect(x['topk'], x['sentlabel']), axis=1)

In [8]:
fsgs['speaker'] = fsgs['speaker'].map(get_lname)

In [9]:
fsgs = fsgs.merge(fomc, how='left', left_on=['date', 'speaker'], right_on=['date', 'member'])

In [10]:
fsgs = fsgs[fsgs['voter'].notna()]

In [11]:
fsgs

Unnamed: 0,date,speaker,section,content,enhanced,tokens,probs,topk,sentiment,sentlabel,sentscore,svect,member,voter,region,female,chair,exp
0,1999-02-03,greenspan,1,"If we look at the end result, we find that a s...","If we look at the end result, we find that a s...","[look, end, result, find, stock, market, bust,...","[(0, 0.021331787), (1, 0.02008242), (2, 0.0247...","[(9, 0.0347)]","{'label': 'negative', 'score': 0.9539210200309...",-1,0.953921,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",greenspan,1.0,governor,0.0,Greenspan,11.490411
1,1999-02-03,greenspan,1,"So, if someone were to say that a bust in stoc...","So, if someone were to say that a bust in stoc...","[someon, say, bust, stock, market, price, woul...","[(0, 0.020012224), (1, 0.019443788), (2, 0.022...","[(9, 0.032), (11, 0.0341), (25, 0.0374)]","{'label': 'negative', 'score': 0.911649227142334}",-1,0.911649,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",greenspan,1.0,governor,0.0,Greenspan,11.490411
2,1999-02-03,greenspan,1,I would presume that if the price of crude oil...,I would presume that if the price of crude oil...,"[would, presum, price, crude, oil, went, would...","[(0, 0.02159765), (1, 0.020465538), (2, 0.0202...","[(10, 0.0576), (31, 0.0344)]","{'label': 'negative', 'score': 0.8143059611320...",-1,0.814306,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",greenspan,1.0,governor,0.0,Greenspan,11.490411
3,1999-02-03,greenspan,1,"Well, let's assume that the CPI weighting is w...","Well, let's assume that the CPI weighting is w...","[well, let, assum, cpi, weight, wrong, clearli...","[(0, 0.020901103), (1, 0.020222858), (2, 0.018...","[(19, 0.0324), (32, 0.0372), (34, 0.0314)]","{'label': 'negative', 'score': 0.7547101378440...",-1,0.754710,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",greenspan,1.0,governor,0.0,Greenspan,11.490411
4,1999-02-03,greenspan,1,The point is that their perception of the infl...,The point is that their perception of the infl...,"[point, percept, inflat, rate, may, wrong, eco...","[(0, 0.020481486), (1, 0.02003508), (2, 0.0210...","[(20, 0.032), (25, 0.0348)]","{'label': 'negative', 'score': 0.7977304458618...",-1,0.797730,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",greenspan,1.0,governor,0.0,Greenspan,11.490411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92639,2017-12-13,dudley,2,If the curve were to invert in the current env...,If the curve were to invert in the current env...,"[curv, invert, current, environ, would, peopl,...","[(0, 0.021819353), (1, 0.018862993), (2, 0.020...","[(6, 0.0309), (35, 0.0626)]","{'label': 'negative', 'score': 0.5716786980628...",-1,0.571679,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.330481283422...",dudley,1.0,New York,0.0,Yellen,8.882192
92640,2017-12-13,dudley,2,"Investors anticipate, if monetary policy is ti...","Investors anticipate, if monetary policy is ti...","[investor, anticip, monetari, polici, tight, e...","[(0, 0.022857606), (1, 0.020568049), (2, 0.021...","[(40, 0.0306)]","{'label': 'negative', 'score': 0.8797866106033...",-1,0.879787,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",dudley,1.0,New York,0.0,Yellen,8.882192
92641,2017-12-13,dudley,2,"In this cycle, I expect the yield curve will c...","In this cycle, I expect the yield curve will c...","[cycl, expect, yield, curv, continu, flatten, ...","[(0, 0.02012691), (1, 0.018331075), (2, 0.0330...","[(6, 0.0373), (19, 0.0333), (28, 0.0483)]","{'label': 'negative', 'score': 0.9108558297157...",-1,0.910856,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.313708999158...",dudley,1.0,New York,0.0,Yellen,8.882192
92642,2017-12-13,dudley,2,"But, to me, worrying now that we're committing...","But, to me, worrying now that we're committing...","[worri, commit, major, polici, mistak, rais, f...","[(0, 0.020604493), (1, 0.017746724), (2, 0.030...","[(3, 0.0551), (18, 0.0483), (20, 0.0366)]","{'label': 'negative', 'score': 0.8889493346214...",-1,0.888949,"[0.0, 0.0, 0.0, -0.39357142857142857, 0.0, 0.0...",dudley,1.0,New York,0.0,Yellen,8.882192


In [12]:
ndf = fsgs.groupby(['date', 'section', 'speaker'])[['svect', 'voter', 'region', 'exp', 'female', 'chair']].agg(
    {'svect': 'sum', 'voter': 'max', 'exp': 'max', 'female': 'max', 'region': 'first', 'chair': 'first'})

In [13]:
nrdf = ndf.reset_index()

nrdf['diffavg'] = (nrdf['svect'] - nrdf.groupby(['date', 'section'])['svect'].transform('mean')).map(np.linalg.norm)

In [14]:
nrdf['date'] = pd.to_datetime(nrdf['date'])
nrdf['merge_date'] = nrdf['date'].dt.strftime('%Y-%m')
nrdf

Unnamed: 0,date,section,speaker,svect,voter,exp,female,region,chair,diffavg,merge_date
0,1999-02-03,1,boehne,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.813298121006...",1.0,18.016438,0.0,Philadelphia,Greenspan,2.965565,1999-02
1,1999-02-03,1,broaddus,"[0.0, 0.22751637879690292, 0.28740490278951814...",0.0,6.093151,0.0,Richmond,Greenspan,2.992641,1999-02
2,1999-02-03,1,ferguson,"[0.0, 0.0, -0.3704035874439462, 0.0, 0.0, 1.02...",1.0,1.246575,0.0,governor,Greenspan,2.651676,1999-02
3,1999-02-03,1,gramlich,"[0.0, -0.6131507733537453, 0.0, 0.0, 0.0, -0.6...",1.0,1.246575,0.0,governor,Greenspan,4.650914,1999-02
4,1999-02-03,1,greenspan,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0,11.490411,0.0,governor,Greenspan,1.918389,1999-02
...,...,...,...,...,...,...,...,...,...,...,...
4656,2017-12-13,2,powell,"[0.0, 0.0, 3.249530626685388, 0.0, 0.0, -0.365...",1.0,5.556164,0.0,governor,Yellen,1.687786,2017-12
4657,2017-12-13,2,quarles,"[0.2632226322263222, 0.0, 3.9789766411944916, ...",1.0,0.238356,0.0,governor,Yellen,2.460575,2017-12
4658,2017-12-13,2,rosengren,"[0.37258509659613614, 0.0, 0.3795712484237074,...",0.0,10.408219,0.0,Boston,Yellen,1.986754,2017-12
4659,2017-12-13,2,williams,"[0.4364820846905537, 0.3793103448275862, 3.772...",0.0,6.791781,0.0,San Francisco,Yellen,2.514672,2017-12


In [15]:
fred = fred.rename(columns={'PCE_PC1': 'pceinf', 'PCEPILFE_PC1': 'corepceinf', 'UNRATE': 'unrate', 'DATE': 'date'})

In [16]:
fred

Unnamed: 0,date,unrate,pceinf,corepceinf
0,1994-01-01,6.6,5.63043,2.24661
1,1994-02-01,6.6,6.36696,2.27283
2,1994-03-01,6.5,6.74325,2.39726
3,1994-04-01,6.4,6.32084,2.25372
4,1994-05-01,6.1,5.58746,2.13570
...,...,...,...,...
356,2023-09-01,3.8,5.54651,3.59248
357,2023-10-01,3.8,4.90982,3.37936
358,2023-11-01,3.7,5.45089,3.15332
359,2023-12-01,3.7,5.89681,2.93151


In [17]:
fred['date'] = pd.to_datetime(fred['date'])
fred['merge_date'] = fred['date'].dt.strftime('%Y-%m')

In [18]:
nrdf = nrdf.merge(fred, how='left', on='merge_date')
nrdf

Unnamed: 0,date_x,section,speaker,svect,voter,exp,female,region,chair,diffavg,merge_date,date_y,unrate,pceinf,corepceinf
0,1999-02-03,1,boehne,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.813298121006...",1.0,18.016438,0.0,Philadelphia,Greenspan,2.965565,1999-02,1999-02-01,4.4,6.55858,1.22177
1,1999-02-03,1,broaddus,"[0.0, 0.22751637879690292, 0.28740490278951814...",0.0,6.093151,0.0,Richmond,Greenspan,2.992641,1999-02,1999-02-01,4.4,6.55858,1.22177
2,1999-02-03,1,ferguson,"[0.0, 0.0, -0.3704035874439462, 0.0, 0.0, 1.02...",1.0,1.246575,0.0,governor,Greenspan,2.651676,1999-02,1999-02-01,4.4,6.55858,1.22177
3,1999-02-03,1,gramlich,"[0.0, -0.6131507733537453, 0.0, 0.0, 0.0, -0.6...",1.0,1.246575,0.0,governor,Greenspan,4.650914,1999-02,1999-02-01,4.4,6.55858,1.22177
4,1999-02-03,1,greenspan,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0,11.490411,0.0,governor,Greenspan,1.918389,1999-02,1999-02-01,4.4,6.55858,1.22177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4656,2017-12-13,2,powell,"[0.0, 0.0, 3.249530626685388, 0.0, 0.0, -0.365...",1.0,5.556164,0.0,governor,Yellen,1.687786,2017-12,2017-12-01,4.1,5.07616,1.57688
4657,2017-12-13,2,quarles,"[0.2632226322263222, 0.0, 3.9789766411944916, ...",1.0,0.238356,0.0,governor,Yellen,2.460575,2017-12,2017-12-01,4.1,5.07616,1.57688
4658,2017-12-13,2,rosengren,"[0.37258509659613614, 0.0, 0.3795712484237074,...",0.0,10.408219,0.0,Boston,Yellen,1.986754,2017-12,2017-12-01,4.1,5.07616,1.57688
4659,2017-12-13,2,williams,"[0.4364820846905537, 0.3793103448275862, 3.772...",0.0,6.791781,0.0,San Francisco,Yellen,2.514672,2017-12,2017-12-01,4.1,5.07616,1.57688


In [19]:
nrdf.to_csv('nrdf.csv')

In [20]:
nrdf2 = nrdf[nrdf['section'] == 2]

In [21]:
nrdf2['pceinf'] = nrdf2['pceinf'].astype('float')
nrdf2['corepceinf'] = nrdf2['corepceinf'].astype('float')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nrdf2['pceinf'] = nrdf2['pceinf'].astype('float')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nrdf2['corepceinf'] = nrdf2['corepceinf'].astype('float')


In [27]:
cov_name_map = {'exp': 'Experience', 'unrate': 'Unemployment',
                'I(exp ** 2)': 'Experience$^2$', 'pceinf': 'PCE inflation', 'C(female)[T.1.0]': 'Female', 'C(voter)[T.1.0]': 'Voter', 'I(exp ** 2):pceinf': 'Experience$^2$ $\\times$ PCE inflation',
                'I(exp ** 2):unrate': 'Experience$^2$ $\\times$ Unemployment', 'exp:pceinf': 'Experience $\\times$ PCE Inflation', 'exp:unrate': 'Experience $\\times$ Unemployment',
                "C(chair, Treatment(reference='Greenspan'))[T.Yellen]": "Yellen", "C(chair, Treatment(reference='Greenspan'))[T.Bernanke]": 'Bernanke'	
                }

In [28]:

model1 = smf.ols("diffavg ~ (exp + I(exp**2)) + C(chair, Treatment(reference='Greenspan')) + C(voter) + C(female) + unrate + pceinf + C(region)", nrdf2).fit(cov_type='cluster', cov_kwds={'groups': nrdf2['merge_date']})
model2 = smf.ols("diffavg ~ (exp + I(exp**2))*(unrate + pceinf) + C(chair, Treatment(reference='Greenspan')) + C(voter) + C(female) + C(region)", nrdf2).fit(cov_type='cluster', cov_kwds={'groups': nrdf2['merge_date']})

from stargazer.stargazer import Stargazer


sg = Stargazer([model1, model2])

sg.add_custom_notes(["This regression table reports results from regressions of the Euclidean distance of the stance vectors of individual committee participants from the committee average in the monetary policy discussion section. Key explanatory variables account for experience serving on the FOMC and economic conditions related to the Federal Reserve's dual mandate as measured by unemployment and PCE inflation calculated as the percentage change from 1 year ago. Additional controls include whether the given member was a voting member at that meeting, gender, and indicator variables for each FOMC chair."])
sg.rename_covariates(cov_name_map)
sg.dependent_variable_name('Distance from Committee Average Stance')
sg.covariate_order(["exp",  "unrate", "pceinf", 'exp:unrate', 'exp:pceinf', "I(exp ** 2)",'I(exp ** 2):pceinf', 'I(exp ** 2):unrate', "C(female)[T.1.0]", "C(voter)[T.1.0]", "C(chair, Treatment(reference='Greenspan'))[T.Bernanke]", "C(chair, Treatment(reference='Greenspan'))[T.Yellen]", 'Intercept'])
sg.title('Unordered Results')
sg.show_degrees_of_freedom(False)
sg.add_line('Region FEs', ['Yes', 'Yes'])
with open('table1.tex', 'w') as f:
    f.write(sg.render_latex())
sg

0,1,2
,,
,Dependent variable: Distance from Committee Average Stance,Dependent variable: Distance from Committee Average Stance
,,
,(1),(2)
,,
Experience,0.126***,0.086
,(0.015),(0.064)
Unemployment,-0.017,-0.020
,(0.031),(0.035)
PCE inflation,-0.010,-0.070**
