In [1]:
import pandas as pd
import numpy as np
import spacy
import re



In [2]:
def read_data(filename: str) -> pd.DataFrame:
    return pd.read_csv(f"data/dataset/{filename}.csv",
                       index_col=0, parse_dates=True)

df_statement = read_data("FOMC/statement")
df_speech = read_data("FOMC/speech")
df_minutes = read_data("FOMC/minutes")

In [3]:
df_speech.shape, df_statement.shape, df_minutes.shape

((1607, 4), (201, 4), (229, 4))

In [4]:
df_minutes["speaker"].unique()

array(['Alan Greenspan', 'Ben Bernanke', 'Janet Yellen', 'Jerome Powell',
       'other'], dtype=object)

In [5]:
df_speech.dropna(inplace=True)
df_statement.dropna(inplace=True)
df_minutes.dropna(inplace=True)

In [6]:
df_speech[df_speech["speaker"].str.contains("Gov")]

Unnamed: 0,date,contents,speaker,title
1,1996-06-18,"Remarks by Governor Edward W. Kelley, Jr.\nDev...","Governor Edward W. Kelley, Jr.",Developments in electronic money and banking
2,1996-09-08,Monetary Policy Objectives and Strategy\n\n[SE...,Governor Laurence H. Meyer,Monetary policy objectives and strategy
4,1996-10-02,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,Small business is big business
7,1996-10-09,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,Here we go again?
8,1996-10-11,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,How to grow faster
...,...,...,...,...
1600,2022-10-06,"The Federal Reserve, the central bank of the U...",Governor Christopher J. Waller,The Economic Outlook with a Look at the Housin...
1603,2022-10-12,"The Federal Reserve, the central bank of the U...",Governor Michelle W. Bowman,Forward Guidance as a Monetary Policy Tool: Co...
1604,2022-10-14,"The Federal Reserve, the central bank of the U...",Governor Christopher J. Waller,The U.S. Dollar and Central Bank Digital Curre...
1605,2022-10-20,"The Federal Reserve, the central bank of the U...",Governor Michelle W. Bowman,Welcoming Remarks


On veut conserver seulement les textes présentés par les "Governor"

In [7]:
def filter_by_title(df: pd.DataFrame, title: str) -> pd.DataFrame:
    return df[df["speaker"].str.contains(title)]

In [8]:
df_speech = filter_by_title(df_speech, "Governor")
df_statement = filter_by_title(df_speech, "Governor")
df_minutes = filter_by_title(df_speech, "Governor")

In [9]:
df_minutes["contents"].iloc[-1]

'The Federal Reserve, the central bank of the United States, provides\n          the nation with a safe, flexible, and stable monetary and financial\n          system.\n\n[SECTION]\n\nFederal Open Market Committee\n\n[SECTION]\n\nMonetary Policy Principles and Practice\n\n[SECTION]\n\nPolicy Implementation\n\n[SECTION]\n\nReports\n\n[SECTION]\n\nReview of Monetary Policy Strategy, Tools, and\n                    Communications\n\n[SECTION]\n\nInstitution Supervision\n\n[SECTION]\n\nReports\n\n[SECTION]\n\nReporting Forms\n\n[SECTION]\n\nSupervision & Regulation Letters\n\n[SECTION]\n\nBanking Applications & Legal Developments\n\n[SECTION]\n\nRegulatory Resources\n\n[SECTION]\n\nBanking & Data Structure\n\n[SECTION]\n\nFinancial Stability Assessments\n\n[SECTION]\n\nFinancial Stability Coordination & Actions\n\n[SECTION]\n\nReports\n\n[SECTION]\n\nRegulations & Statutes\n\n[SECTION]\n\nPayment Policies\n\n[SECTION]\n\nReserve Bank Payment Services & Data\n\n[SECTION]\n\nFinancial Market

In [44]:
def clean_speech_text(df: pd.DataFrame):
    df_new = df.copy()
    full_text_col = df_new["contents"].apply \
        (lambda x: x.replace('\n\n[SECTION]\n\n', '').replace('\n', ' ').replace('\r', ' ').strip())
    full_text_col = full_text_col.apply(lambda x: re.sub(r'(http)\S+(htm)(l)?', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'(www.)\S+', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'[\d]', '', x))
    full_text_col = full_text_col.str.replace('—', ' ')
    full_text_col = full_text_col.str.replace('-', ' ')
    full_text_col = full_text_col.apply(lambda x: re.sub(r'[^\w\s]', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'([Rr]eturn to text)', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'([Pp]lay [vV]ideo)', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'function()', '', x))
    full_text_col = full_text_col.apply(lambda x: re.sub(r'/[^\s]{15,}/', '', x))
    df_new.drop(labels='contents', axis="columns", inplace=True)
    df_new['contents'] = full_text_col
    return df_new

In [45]:
df_minutes = clean_speech_text(df_minutes)
df_speech = clean_speech_text(df_speech)
df_statement = clean_speech_text(df_statement)


In [46]:
df_minutes["contents"].iloc[3]

'Remarks by Governor Lawrence B Lindsey At the Community Development Lending Conference Dallas Texas October Here We Go AgainThank you It is a pleasure to be here today to discuss some of the challenges that lie ahead in the areas of economic opportunity and community development Frankly its a pleasure to be anywhere but Washington DC Actually though in any year divisible by four the whole country gets to glimpse the craziness that has always been a way of life in our nations capitalOne of the problems that we have in Washington is that our focus is short term limited only to the next election Anything that could happen more than four years into the future doesnt register on the collective consciousness And frankly this time frame has gotten shorter and shorter in an age of nightly polling and focus groupsBut Americas cities are a testament to the effects policies enacted in the rush to the next election can have as well as proof positive that trends that occur in a single economic cyc

In [36]:
def find_start_speech(text: str):
    try:
        idx_start = re.search("Share", text).start()
        return text[idx_start + 5:]
    except:
        return text


SyntaxError: invalid syntax (906443036.py, line 5)

In [30]:
def replace_white_spaces(text: str):
    return " ".join(text.split())

df_statement["contents"] = df_statement['contents'].apply(lambda x: replace_white_spaces(x))
df_speech["contents"] = df_speech['contents'].apply(lambda x: replace_white_spaces(x))
df_minutes["contents"] = df_minutes['contents'].apply(lambda x: replace_white_spaces(x))

In [47]:
df_statement["contents"].iloc[10]

'The Role for Structural Macroeconomic ModelsI am in the middle of my third interesting and active encounter with the development andor use of macroeconometric models for forecasting and policy analysis My journey began at MIT as a research assistant to Professors Franco Modigiliani and Albert Ando during the period of development of the MPS model continued at Laurence H Meyer Associates with the development of The Washington University Macro Model under the direction of my partner Joel Prakken and the use of that model for both forecasting and policy analysis and now has taken me to the Board of Governors where macro models have long played an important role in forecasting and policy analysis and the MPS model has recently been replaced by the FRBUS modelI bring to this panel a perspective shaped by both my earlier experience and my new responsibilities I will focus my presentation on the role of structural macro models in the monetary policy process compare the use of models at the B

In [32]:
df_statement["contents"] = df_statement['contents'].apply(lambda x: find_start_speech(x))
df_speech["contents"] = df_speech['contents'].apply(lambda x: find_start_speech(x))
df_minutes["contents"] = df_minutes['contents'].apply(lambda x: find_start_speech(x))



In [34]:
df_speech["contents"].iloc[10]

'The Role for Structural Macroeconomic ModelsI am in the middle of my third interesting and active encounter with the development andor use of macroeconometric models for forecasting and policy analysis My journey began at MIT as a research assistant to Professors Franco Modigiliani and Albert Ando during the period of development of the MPS model continued at Laurence H Meyer Associates with the development of The Washington University Macro Model under the direction of my partner Joel Prakken and the use of that model for both forecasting and policy analysis and now has taken me to the Board of Governors where macro models have long played an important role in forecasting and policy analysis and the MPS model has recently been replaced by the FRBUS modelI bring to this panel a perspective shaped by both my earlier experience and my new responsibilities I will focus my presentation on the role of structural macro models in the monetary policy process compare the use of models at the B

In [18]:
load_model = spacy.load('en_core_web_sm')

In [19]:
stopwords = load_model.Defaults.stop_words


In [48]:
doc = load_model(df_statement['contents'].iloc[10])

In [49]:
test_test = " ".join([token.lemma_ for token in doc])

In [50]:
test_test

'the role for Structural Macroeconomic ModelsI be in the middle of my third interesting and active encounter with the development andor use of macroeconometric model for forecasting and policy analysis my journey begin at MIT as a research assistant to Professors Franco Modigiliani and Albert Ando during the period of development of the MPS model continue at Laurence H Meyer Associates with the development of the Washington University Macro Model under the direction of my partner Joel Prakken and the use of that model for both forecasting and policy analysis and now have take I to the Board of Governors where macro model have long play an important role in forecasting and policy analysis and the MPS model have recently be replace by the FRBUS modelI bring to this panel a perspective shape by both my early experience and my new responsibility I will focus my presentation on the role of structural macro model in the monetary policy process compare the use of model at the Board with their