In [2]:
import pandas as pd
import datetime

In [3]:
bog = pd.read_csv('bog.csv')

In [4]:
bog.fillna('', inplace=True)

In [5]:
def to_date(string):
    if string != '':
        return datetime.datetime.strptime(string, '%m-%d-%Y')
    return datetime.date.today()

In [8]:
def make_member_dict():
    members = {}

    for i, row in bog.iterrows():
        new = [to_date(row['start']), to_date(row['end'])]
        name = row['lname']
        if name in members:
            members[name].append(new)
        else:
            members[name] = [new]
    return members

In [None]:
make_member_dict()

In [10]:
prdf = pd.read_csv('presidents.csv')
prdf.fillna({'Out': '', 'Interim': 0}, inplace=True)
prdf.head()

Unnamed: 0,Name,Bank,In,Out,Interim
0,Frank E. Morris,Boston,08/15/68,12-31-1988,0.0
1,Richard F. Syron,Boston,01/01/89,03-31-1994,0.0
2,Cathy E. Minehan,Boston,07/13/94,07-20-2007,0.0
3,Eric Rosengren,Boston,07/20/07,09-30-2021,0.0
4,Anthony M. Solomon,New York,04/01/80,01-01-1985,0.0


In [11]:
def lname_extract(full_name):
    name_arr = [part.lower() for part in full_name.split(' ')]
    if name_arr[-1] == 'jr.':
        name_arr = name_arr[:-1]
    return name_arr[-1]

In [12]:
prdf['lname'] = prdf['Name'].apply(lname_extract)

In [33]:
prdf.head()

Unnamed: 0,Name,Bank,In,Out,Interim,lname
0,Frank E. Morris,Boston,08/15/68,12-31-1988,0.0,morris
1,Richard F. Syron,Boston,01/01/89,03-31-1994,0.0,syron
2,Cathy E. Minehan,Boston,07/13/94,07-20-2007,0.0,minehan
3,Eric Rosengren,Boston,07/20/07,09-30-2021,0.0,rosengren
4,Anthony M. Solomon,New York,04/01/80,01-01-1985,0.0,solomon


Voting member schedule

In [14]:
vm_schedule = [
    {'bank': 'St. Louis', 'start': 1980, 'interval': 3},
    {'bank': 'Boston', 'start': 1980, 'interval': 3},
    {'bank': 'Philadelphia', 'start': 1981, 'interval': 3},
    {'bank': 'Richmond', 'start': 1982, 'interval': 3},
    {'bank': 'Cleveland', 'start': 1980, 'interval': 2},
    {'bank': 'Chicago', 'start': 1981, 'interval': 2},
    {'bank': 'Atlanta', 'start': 1979, 'interval': 3},
    {'bank': 'Dallas', 'start': 1981, 'interval': 3},
    {'bank': 'Minneapolis', 'start': 1981, 'interval': 3},
    {'bank': 'Kansas City', 'start': 1980, 'interval': 3},
    {'bank': 'San Francisco', 'start': 1979, 'interval': 3},
    {'bank': 'New York', 'start': 1980, 'interval': 1},
]

In [15]:
def which_bank_votes(year):
    voters = []
    for bank in vm_schedule:
        if (year - bank['start']) % bank['interval'] == 0:
            voters.append(bank['bank'])
    return voters

In [16]:
which_bank_votes(2022)

['St. Louis', 'Boston', 'Cleveland', 'Kansas City', 'New York']

In [17]:
def get_pres(region, date):
    for i, row in prdf[prdf['Bank'] == region].iterrows():
        range = pd.period_range(row.In, to_date(row.Out))
        if date in range:
            return row.lname
    return f'{region}-{date}'

In [18]:
def get_bog(date):
    bog_members = []
    for i, row in bog.iterrows():
        range = pd.period_range(row.start, to_date(row.end))
        if date in range:
            bog_members.append(row.lname)
    return bog_members

In [19]:
regions = ['Richmond', 'Philadelphia', 'San Francisco', 'St. Louis', 'Cleveland',
           'Chicago', 'Boston', 'Minneapolis', 'Atlanta', 'Dallas', 'New York', 'Kansas City']

[get_pres(region, '02-13-17') for region in regions] + get_bog('02-13-17')

['lacker',
 'harker',
 'williams',
 'bullard',
 'mester',
 'evans',
 'rosengren',
 'kashkari',
 'lockhart',
 'kaplan',
 'dudley',
 'george',
 'brainard',
 'fischer',
 'powell',
 'tarullo',
 'yellen']

In [20]:
def get_fomc(date):
    return [get_pres(region, date) for region in regions] + get_bog(date)

Speakers at each meeting

In [21]:
smdf = pd.read_csv('../grouped_w_topics.csv', index_col=0)[['date', 'speaker']]

In [22]:
def get_capped_lname(string):
    return string.split(' ')[-1][:-1].lower()

In [23]:
smdf['lname'] = smdf['speaker'].apply(get_capped_lname)
speaker_date = smdf.groupby('date')['lname'].agg(' '.join).apply(lambda x : x.split(' '))

In [32]:
count = 0
for date, list in zip(speaker_date.index, speaker_date.values):
    theory = set(get_fomc(date))
    in_transcript = set(list)
    # print(date)
    diff = theory.difference(in_transcript)
    diffvote = set([get_pres(region, date) for region in which_bank_votes(int(date.split('-')[0]))] + get_bog(date)).difference(in_transcript)
    if len(diffvote) > 0:
        print(date + str(diffvote))
        count += 1

print(count)

1994-02-04{'mullins'}
1996-01-31{'blinder'}
1997-02-05{'yellen', 'lindsey'}
1998-02-04{'St. Louis-1998-02-04'}
1999-06-30{'rivlin'}
2000-11-15{'jordan'}
2002-01-30{'meyer'}
2004-12-14{'bies'}
2005-05-03{'bernanke'}
2005-08-09{'gramlich'}
2006-03-28{'ferguson'}
2006-10-25{'Atlanta-2006-10-25'}
2006-12-12{'Atlanta-2006-12-12'}
2007-03-21{'bies'}
2008-09-16{'geithner'}
2008-12-16{'geithner'}
2009-01-28{'tarullo'}
2011-03-15{'warsh'}
2013-09-18{'raskin'}
2013-10-30{'raskin'}
2013-12-18{'raskin'}
2014-01-29{'raskin'}
2017-09-20{'quarles'}
23


In [35]:
tdf = pd.read_csv('../grouped_w_topics.csv', index_col=0)

In [None]:
def is_fomc_member(lname, date):
    return lname in get_fomc(date)

In [39]:
tdf['lname'] = tdf['speaker'].apply(get_capped_lname)

In [40]:
tdf.head()

Unnamed: 0,date,speaker,text_enhanced,tokens,tprob_0,tprob_1,tprob_2,tprob_3,tprob_4,tprob_5,...,tprob_37,tprob_38,tprob_39,tprob_40,tprob_41,tprob_42,tprob_43,tprob_44,len,lname
1696,1994-02-04,CHAIRMAN GREENSPAN.,"As you know, this is our organizational meetin...","['know', 'organiz', 'meet', 'alway', 'meet', '...",0.00236,0.001847,0.002183,0.080568,0.031129,0.001038,...,0.00237,0.004235,0.00307,0.001608,0.00296,0.023677,0.029818,0.001092,3330,greenspan
1697,1994-02-04,MR. BERNARD.,"Secretary and Economist, Donald Kohn; Deputy S...","['secretari', 'economist', 'donald', 'kohn', '...",0.005522,0.005147,0.005976,0.004956,0.004744,0.00651,...,0.006113,0.004755,0.005623,0.006206,0.006836,0.006363,0.006006,0.006229,290,bernard
1698,1994-02-04,MR. BOEHNE.,"Normally in an economic expansion, development...","['normal', 'econom', 'expans', 'develop', 'rea...",0.005527,0.006127,0.005889,0.045383,0.098863,0.003816,...,0.011927,0.015376,0.0072,0.006452,0.050246,0.017779,0.171482,0.004548,528,boehne
1699,1994-02-04,MR. BROADDUS.,My report is going to be pretty bland after th...,"['report', 'go', 'pretti', 'bland', 'develop',...",0.006002,0.012405,0.006998,0.094474,0.018949,0.003959,...,0.012009,0.010079,0.004729,0.005816,0.009277,0.005711,0.037993,0.006824,596,broaddus
1700,1994-02-04,MR. COYNE.,"Oh, believe me, you will get asked! I think i...","['oh', 'believ', 'get', 'ask', 'think', 'take'...",0.022475,0.020367,0.019531,0.023227,0.030781,0.019507,...,0.021079,0.021056,0.019952,0.020426,0.020811,0.020512,0.021888,0.019169,10,coyne


In [62]:
tdf['is_fomc'] = tdf[['lname', 'date']].apply(lambda x : x.lname in get_fomc(x.date), axis=1)

In [66]:
tdf = tdf[tdf['is_fomc'] == True]

In [69]:
tdf.to_csv('../working-csvs/fomc_w_topics.csv')