In [1]:
import pandas as pd
import numpy as np
import spacy
import re



In [2]:
def read_data(filename: str) -> pd.DataFrame:
    return pd.read_csv(f"data/dataset/{filename}.csv",
                       index_col=0, parse_dates=True)

df_statement = read_data("FOMC/statement")
df_speech = read_data("FOMC/speech")
df_minutes = read_data("FOMC/minutes")

In [3]:
df_speech.shape, df_statement.shape, df_minutes.shape

((1607, 4), (201, 4), (229, 4))

In [4]:
df_minutes["speaker"].unique()

array(['Alan Greenspan', 'Ben Bernanke', 'Janet Yellen', 'Jerome Powell',
       'other'], dtype=object)

In [5]:
df_speech.dropna(inplace=True)
df_statement.dropna(inplace=True)
df_minutes.dropna(inplace=True)

In [6]:
df_speech[df_speech["speaker"].str.contains("Gov")]

Unnamed: 0,date,contents,speaker,title
1,1996-06-18,"Remarks by Governor Edward W. Kelley, Jr.\nDev...","Governor Edward W. Kelley, Jr.",Developments in electronic money and banking
2,1996-09-08,Monetary Policy Objectives and Strategy\n\n[SE...,Governor Laurence H. Meyer,Monetary policy objectives and strategy
4,1996-10-02,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,Small business is big business
7,1996-10-09,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,Here we go again?
8,1996-10-11,Remarks by Governor Lawrence B. Lindsey\nAt th...,Governor Lawrence B. Lindsey,How to grow faster
...,...,...,...,...
1600,2022-10-06,"The Federal Reserve, the central bank of the U...",Governor Christopher J. Waller,The Economic Outlook with a Look at the Housin...
1603,2022-10-12,"The Federal Reserve, the central bank of the U...",Governor Michelle W. Bowman,Forward Guidance as a Monetary Policy Tool: Co...
1604,2022-10-14,"The Federal Reserve, the central bank of the U...",Governor Christopher J. Waller,The U.S. Dollar and Central Bank Digital Curre...
1605,2022-10-20,"The Federal Reserve, the central bank of the U...",Governor Michelle W. Bowman,Welcoming Remarks


On veut conserver seulement les textes présentés par les "Governor"

In [7]:
def filter_by_title(df: pd.DataFrame, title: str) -> pd.DataFrame:
    return df[df["speaker"].str.contains(title)]

In [8]:
df_speech = filter_by_title(df_speech, "Governor")
df_statement = filter_by_title(df_speech, "Governor")
df_minutes = filter_by_title(df_speech, "Governor")

In [9]:
def clean_speech_text(df: pd.DataFrame):
    df_new = df.copy()
    full_text_col = df_new["contents"].apply \
        (lambda x: x.replace('\n\n[SECTION]\n\n', '').replace('\n', ' ').replace('\r', ' ').strip())
    full_text_col = full_text_col.str.replace('\n', ' ')
    df_new.drop(labels='contents', axis="columns", inplace=True)
    df_new['contents'] = full_text_col
    return df_new

In [10]:
df_minutes = clean_speech_text(df_minutes)
df_speech = clean_speech_text(df_speech)
df_statement = clean_speech_text(df_statement)


In [11]:
df_statement["contents"].iloc[-1]

'The Federal Reserve, the central bank of the United States, provides           the nation with a safe, flexible, and stable monetary and financial           system.Federal Open Market CommitteeMonetary Policy Principles and PracticePolicy ImplementationReportsReview of Monetary Policy Strategy, Tools, and                     CommunicationsInstitution SupervisionReportsReporting FormsSupervision & Regulation LettersBanking Applications & Legal DevelopmentsRegulatory ResourcesBanking & Data StructureFinancial Stability AssessmentsFinancial Stability Coordination & ActionsReportsRegulations & StatutesPayment PoliciesReserve Bank Payment Services & DataFinancial Market Utilities & InfrastructuresResearch, Committees, and ForumsWorking Papers and NotesData, Models and ToolsBank Assets and LiabilitiesBank Structure DataBusiness FinanceDealer Financing TermsExchange Rates and International DataFinancial AccountsHousehold FinanceIndustrial ActivityInterest RatesMicro Data Reference Manual (MD

In [12]:
def find_start_speech(text: str):
    try:
        idx_start = re.search("Share", text).start()
        return text[idx_start + 5:]
    except:
        return text


In [13]:
df_statement["contents"].iloc[0]

'Remarks by Governor Edward W. Kelley, Jr. Developments in electronic money and banking At the CyberPayments \'96 Conference, Dallas, Texas June 18, 1996It is a pleasure to be here this afternoon to provide      the Federal Reserve\'s perspective on recent developments in the      field of electronic money and banking.  To begin, let me remind      you that progress in the retail electronic banking sector so far      has been slow in the United States.  Twenty years ago, a national      Commission was appointed by the Congress to study many of the      issues surrounding emerging electronic funds transfer systems,      particularly as they were seen to affect consumers and the      general public.  The assumption at that time was that the use of      currency and checks in the United States would rapidly disappear.      Indeed, the creation of the Automated Clearing House system and      NACHA have their roots in this era of our history.  Yet today,      paper currency and checks are s

In [14]:
find_start_speech(df_statement['contents'].iloc[0])

'Remarks by Governor Edward W. Kelley, Jr. Developments in electronic money and banking At the CyberPayments \'96 Conference, Dallas, Texas June 18, 1996It is a pleasure to be here this afternoon to provide      the Federal Reserve\'s perspective on recent developments in the      field of electronic money and banking.  To begin, let me remind      you that progress in the retail electronic banking sector so far      has been slow in the United States.  Twenty years ago, a national      Commission was appointed by the Congress to study many of the      issues surrounding emerging electronic funds transfer systems,      particularly as they were seen to affect consumers and the      general public.  The assumption at that time was that the use of      currency and checks in the United States would rapidly disappear.      Indeed, the creation of the Automated Clearing House system and      NACHA have their roots in this era of our history.  Yet today,      paper currency and checks are s

In [15]:
df_statement["contents"] = pd.Series([find_start_speech(df_statement['contents'].iloc[i]) for i in range(len(df_statement))])
df_speech["contents"] = pd.Series([find_start_speech(df_speech['contents'].iloc[i]) for i in range(len(df_speech))])
df_minutes["contents"] = pd.Series([find_start_speech(df_minutes['contents'].iloc[i]) for i in range(len(df_minutes))])


In [16]:
load_model = spacy.load('en_core_web_sm')

In [17]:
stopwords = load_model.Defaults.stop_words


In [18]:
doc = load_model(df_statement['contents'].iloc[3])

In [19]:
test_test = " ".join([token.lemma_ for token in doc])

In [20]:
test_test

'the Transformation of the U.S. Banking Industry and Resulting Challenges to RegulatorsGood morning .   it be a pleasure to be here on Ohio Bankers Day . the over - ride theme of my remark be the profound transformation the banking industry have undergo over the last 15 year or so and the challenge that these change pose to bank regulator .   at the end , I hope to have time to take some question and learn where you think the banking industry be go , and how -- consistent with our responsibility -- regulator can promote a more efficient , resilient , and profitable financial service industry .   I plan to learn as much or more from you as you learn from I . the Role of Banks let I begin with a few observation about the importance of the banking industry in the economy and why banking receive such special attention in term of regulation . bank , like other financial intermediary , pool and absorb risk for depositor and provide stable source of investment and work capital fund for nonfin