In [1]:
import pandas as pd
import os
from ast import literal_eval

from sklearn.linear_model import LassoCV

import statsmodels.formula.api as smf

In [2]:
excel_file = '../working-csvs/20240114 Participants Key.xlsx'
all_sheets = pd.read_excel(excel_file, sheet_name=None)

data_sheet_names = list(all_sheets.keys())[1:]

dfs = []

for sheet_name in data_sheet_names:
    sheet = pd.read_excel(excel_file, sheet_name=sheet_name, skiprows=1)
    for col in ['Name', 'Number', 'Affiliation']:
        alt_name = f'Participant\n{col}'
        if alt_name in sheet.columns:
            sheet[f'Participant {col}'] = sheet[alt_name]
            sheet.drop(columns=alt_name, inplace=True)
    sheet['MTGDATE'] = sheet_name
    dfs.append(sheet)

pkey = pd.concat(dfs)

In [3]:
pkey

Unnamed: 0,Participant Number,Participant Name,Participant Affiliation,Vote,Note,Chair,MTGDATE
0,1,Frederic Mishkin,Board,1.0,,0.0,20071031
1,2,Gary Stern,Minneapolis,0.0,,0.0,20071031
2,3,Timothy Geithner,New York,1.0,,0.0,20071031
3,4,William Poole,St. Louis,1.0,,0.0,20071031
4,5,Randall Kroszner,Board,1.0,,0.0,20071031
...,...,...,...,...,...,...,...
12,13,Eric Rosengren,Boston,0.0,,0.0,20181219
13,14,Loretta Mester,Cleveland,1.0,,0.0,20181219
14,15,Patrick Harker,Philadelphia,0.0,,0.0,20181219
15,16,Charles Evans,Chicago,0.0,,0.0,20181219


In [4]:
proj = pd.read_excel('../working-csvs/20240114 FomcProjections.xlsx')

In [12]:
proj.dtypes

MTGDATE      int64
ID           int64
TARGET      object
HORIZON      int64
GDP        float64
UN         float64
PCE        float64
COREPCE    float64
FFD        float64
dtype: object

In [13]:
pkey['MTGDATE'] = pkey['MTGDATE'].astype(int)

In [14]:
pkey.dtypes

Participant Number           int64
Participant Name            object
Participant Affiliation     object
Vote                       float64
Note                        object
Chair                      float64
MTGDATE                      int64
dtype: object

In [16]:
proj = proj.merge(pkey, left_on=['MTGDATE', 'ID'], right_on=['MTGDATE', 'Participant Number'])

In [17]:
proj

Unnamed: 0,MTGDATE,ID,TARGET,HORIZON,GDP,UN,PCE,COREPCE,FFD,Participant Number,Participant Name,Participant Affiliation,Vote,Note,Chair
0,20071031,1,2007,1,2.4,4.7,3.0,1.8,,1,Frederic Mishkin,Board,1.0,,0.0
1,20071031,1,2008,5,1.7,4.8,1.8,1.9,,1,Frederic Mishkin,Board,1.0,,0.0
2,20071031,1,2009,9,2.2,4.8,1.9,2.0,,1,Frederic Mishkin,Board,1.0,,0.0
3,20071031,1,2010,13,2.2,4.8,2.0,2.0,,1,Frederic Mishkin,Board,1.0,,0.0
4,20071031,2,2007,1,2.5,4.7,3.0,1.9,,2,Gary Stern,Minneapolis,0.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2747,20181219,17,2018,1,3.1,3.7,1.8,1.9,2.38,17,Raphael Bostic,Atlanta,1.0,,0.0
2748,20181219,17,2019,5,2.3,3.5,2.0,2.1,2.63,17,Raphael Bostic,Atlanta,1.0,,0.0
2749,20181219,17,2020,9,1.8,3.7,2.0,2.0,2.88,17,Raphael Bostic,Atlanta,1.0,,0.0
2750,20181219,17,2021,13,1.8,3.9,2.0,2.0,2.88,17,Raphael Bostic,Atlanta,1.0,,0.0


In [34]:
proj['year'] = ((proj['MTGDATE'] - proj['MTGDATE'] % 10000)/10000).astype(int)
proj['md'] = (proj['MTGDATE'] - proj['year'] * 10000)
proj['m'] = ((proj['md'] - proj['md'] % 100)/100).astype(int)

In [50]:
proj['speaker'] = proj['Participant Name'].map(lambda x : x.split()[-1].lower())

In [35]:
nrdf = pd.read_csv('../regressions/nrdf.csv', index_col=0)

In [40]:
nrdf['date'] = pd.to_datetime(nrdf['date_x'])

In [45]:
nrdf['year'] = nrdf['date'].dt.year
nrdf['m'] = nrdf['date'].dt.month

In [57]:
proj[['year', 'm']]

Unnamed: 0,year,m
0,2007,10
1,2007,10
2,2007,10
3,2007,10
4,2007,10
...,...,...
2747,2018,12
2748,2018,12
2749,2018,12
2750,2018,12


In [90]:
nrdf['svect'] = nrdf['svect'].map(lambda x : [float(item[:-1]) for item in x[1:-1].split()])

In [97]:
# nrdf['tprob']
nrdf[[f'tprob_{i}' for i in range(45)]] = nrdf['svect'].to_list()
nrdf

Unnamed: 0,date_x,section,speaker,svect,voter,exp,female,region,chair,diffavg,...,tprob_35,tprob_36,tprob_37,tprob_38,tprob_39,tprob_40,tprob_41,tprob_42,tprob_43,tprob_44
0,1999-02-03,1,boehne,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.8132981, -1....",1.0,18.016438,0.0,Philadelphia,Greenspan,2.965565,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.453627,0.000000,0.000000,0.000000
1,1999-02-03,1,broaddus,"[0.0, 0.2275163, 0.287404, -0.3256735, 0.0, -0...",0.0,6.093151,0.0,Richmond,Greenspan,2.992641,...,0.000000,0.189087,0.0,0.0,-0.222559,-0.833909,0.269360,0.000000,0.000000,0.000000
2,1999-02-03,1,ferguson,"[0.0, 0.0, -0.3704035, 0.0, 0.0, 1.0257428, -1...",1.0,1.246575,0.0,governor,Greenspan,2.651676,...,0.000000,-0.384956,0.0,0.0,-0.314937,0.000000,-0.311211,0.000000,0.000000,0.000000
3,1999-02-03,1,gramlich,"[0.0, -0.6131507, 0.0, 0.0, 0.0, -0.6738415, 0...",1.0,1.246575,0.0,governor,Greenspan,4.650914,...,0.000000,1.000000,0.0,0.0,-0.339546,-1.000000,0.000000,0.000000,-0.287682,0.000000
4,1999-02-03,1,greenspan,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1.0,11.490411,0.0,governor,Greenspan,1.918389,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4656,2017-12-13,2,powell,"[0.0, 0.0, 3.2495306, 0.0, 0.0, -0.3657142, 0....",1.0,5.556164,0.0,governor,Yellen,1.687786,...,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.336497,0.203205,0.000000
4657,2017-12-13,2,quarles,"[0.2632226, 0.0, 3.9789766, 0.0, 0.0, 0.0, 0.0...",1.0,0.238356,0.0,governor,Yellen,2.460575,...,0.451414,0.315620,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.395349,0.000000
4658,2017-12-13,2,rosengren,"[0.372585, 0.0, 0.3795712, 0.0, 0.0, 0.0, 0.0,...",0.0,10.408219,0.0,Boston,Yellen,1.986754,...,0.000000,0.000000,0.0,0.0,0.000000,0.343146,0.000000,0.000000,0.000000,0.000000
4659,2017-12-13,2,williams,"[0.436482, 0.3793103, 3.7724818, 0.0, 0.0, 0.0...",0.0,6.791781,0.0,San Francisco,Yellen,2.514672,...,-0.116297,0.000000,0.0,0.0,0.000000,0.000000,0.000000,-0.263758,0.000000,0.237133


In [181]:
test = proj.merge(nrdf, on=['year', 'm', 'speaker'], how='left', indicator=True)

In [182]:
test['_merge'].value_counts()

_merge
both          4679
left_only      306
right_only       0
Name: count, dtype: int64

Check these

In [183]:
test[(test['_merge'] == 'left_only') & (test['year'] < 2018)].speaker

1321      fisher
1322      fisher
1323      fisher
1324      fisher
1517       moore
1518       moore
1519       moore
1520       moore
1521       moore
1724       moore
1725       moore
1726       moore
1727       moore
3109    pianalto
3110    pianalto
3111    pianalto
3112    pianalto
4240     gooding
4241     gooding
4242     gooding
4243     gooding
Name: speaker, dtype: object

In [184]:
test['HORIZON'].value_counts()

HORIZON
99    980
1     346
5     346
9     346
13    346
3     285
7     285
11    285
2     280
6     280
10    280
4     270
8     270
12    270
14    116
Name: count, dtype: int64

In [185]:
def horiz_mapper(horiz):
    if horiz <= 3:
        return '1Q'
    if horiz > 3 and horiz <= 6:
        return '2Q'
    if horiz > 6 and horiz <= 9:
        return '3Q'
    if horiz > 9 and horiz <= 12:
        return '4Q'
    if horiz > 12 and horiz <= 14:
        return '5Q'
    if horiz >= 99:
        return 'LR'


In [186]:
test['HORIZON_condensed'] = test['HORIZON'].map(horiz_mapper)

In [187]:
test

Unnamed: 0,MTGDATE,ID,TARGET,HORIZON,GDP,UN,PCE,COREPCE,FFD,Participant Number,...,tprob_37,tprob_38,tprob_39,tprob_40,tprob_41,tprob_42,tprob_43,tprob_44,_merge,HORIZON_condensed
0,20071031,1,2007,1,2.4,4.7,3.0,1.8,,1,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,both,1Q
1,20071031,1,2007,1,2.4,4.7,3.0,1.8,,1,...,0.0,0.0,0.0,-0.700499,0.0,0.0,0.0,0.0,both,1Q
2,20071031,1,2008,5,1.7,4.8,1.8,1.9,,1,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,both,2Q
3,20071031,1,2008,5,1.7,4.8,1.8,1.9,,1,...,0.0,0.0,0.0,-0.700499,0.0,0.0,0.0,0.0,both,2Q
4,20071031,1,2009,9,2.2,4.8,1.9,2.0,,1,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,both,3Q
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4980,20181219,17,2018,1,3.1,3.7,1.8,1.9,2.38,17,...,,,,,,,,,left_only,1Q
4981,20181219,17,2019,5,2.3,3.5,2.0,2.1,2.63,17,...,,,,,,,,,left_only,2Q
4982,20181219,17,2020,9,1.8,3.7,2.0,2.0,2.88,17,...,,,,,,,,,left_only,3Q
4983,20181219,17,2021,13,1.8,3.9,2.0,2.0,2.88,17,...,,,,,,,,,left_only,5Q


In [188]:
fore1Q = test[test['HORIZON_condensed'] == '1Q']

In [189]:
tprob_cols = [f'tprob_{i}' for i in range(45)]

In [190]:
tprob_part = ' + '.join(tprob_cols)

In [191]:
'UN ~ ' + tprob_part

'UN ~ tprob_0 + tprob_1 + tprob_2 + tprob_3 + tprob_4 + tprob_5 + tprob_6 + tprob_7 + tprob_8 + tprob_9 + tprob_10 + tprob_11 + tprob_12 + tprob_13 + tprob_14 + tprob_15 + tprob_16 + tprob_17 + tprob_18 + tprob_19 + tprob_20 + tprob_21 + tprob_22 + tprob_23 + tprob_24 + tprob_25 + tprob_26 + tprob_27 + tprob_28 + tprob_29 + tprob_30 + tprob_31 + tprob_32 + tprob_33 + tprob_34 + tprob_35 + tprob_36 + tprob_37 + tprob_38 + tprob_39 + tprob_40 + tprob_41 + tprob_42 + tprob_43 + tprob_44'

TEMPORARILY OMMITTING 2018

In [192]:
test = test[test['year'] < 2018]

In [193]:
test = test.dropna(subset=tprob_cols + ['GDP'])

In [194]:
X = test[tprob_cols]
y = test['GDP']
reg = LassoCV(cv=10, random_state=0).fit(X,y)

reg.score(X,y)

0.04311841815099493

In [195]:
reg.coef_

array([-0.        ,  0.        , -0.15291103,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.00046662,  0.        ,  0.03544512,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.03441881])