In [67]:
#!/usr/bin/python3
"""Routine to load MasterDictionary class"""
# BDM : 201510

import time
import os
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification


### This is From Loughran Mcdonald, it loads the master dictionary file
def load_masterdictionary(file_path, print_flag=False, f_log=None, get_other=False):
    _master_dictionary = {}
    _sentiment_categories = ['negative', 'positive', 'uncertainty', 'litigious', 'constraining',
                             'strong_modal', 'weak_modal'] 
    # Load slightly modified nltk stopwords.  I do not use nltk import to avoid versioning errors.
    # Dropped from nltk: A, I, S, T, DON, WILL, AGAINST
    # Added: AMONG,
    _stopwords = ['ME', 'MY', 'MYSELF', 'WE', 'OUR', 'OURS', 'OURSELVES', 'YOU', 'YOUR', 'YOURS',
                       'YOURSELF', 'YOURSELVES', 'HE', 'HIM', 'HIS', 'HIMSELF', 'SHE', 'HER', 'HERS', 'HERSELF',
                       'IT', 'ITS', 'ITSELF', 'THEY', 'THEM', 'THEIR', 'THEIRS', 'THEMSELVES', 'WHAT', 'WHICH',
                       'WHO', 'WHOM', 'THIS', 'THAT', 'THESE', 'THOSE', 'AM', 'IS', 'ARE', 'WAS', 'WERE', 'BE',
                       'BEEN', 'BEING', 'HAVE', 'HAS', 'HAD', 'HAVING', 'DO', 'DOES', 'DID', 'DOING', 'AN',
                       'THE', 'AND', 'BUT', 'IF', 'OR', 'BECAUSE', 'AS', 'UNTIL', 'WHILE', 'OF', 'AT', 'BY',
                       'FOR', 'WITH', 'ABOUT', 'BETWEEN', 'INTO', 'THROUGH', 'DURING', 'BEFORE',
                       'AFTER', 'ABOVE', 'BELOW', 'TO', 'FROM', 'UP', 'DOWN', 'IN', 'OUT', 'ON', 'OFF', 'OVER',
                       'UNDER', 'AGAIN', 'FURTHER', 'THEN', 'ONCE', 'HERE', 'THERE', 'WHEN', 'WHERE', 'WHY',
                       'HOW', 'ALL', 'ANY', 'BOTH', 'EACH', 'FEW', 'MORE', 'MOST', 'OTHER', 'SOME', 'SUCH',
                       'NO', 'NOR', 'NOT', 'ONLY', 'OWN', 'SAME', 'SO', 'THAN', 'TOO', 'VERY', 'CAN',
                       'JUST', 'SHOULD', 'NOW']
    ## Read file
    with open(file_path) as f:
        _total_documents = 0 ##update total no of documents
        _md_header = f.readline()
        for line in f:
            cols = line.split(',') ##split by comma
            _master_dictionary[cols[0]] = MasterDictionary(cols, _stopwords)
            _total_documents += _master_dictionary[cols[0]].doc_count
            if len(_master_dictionary) % 5000 == 0 and print_flag:
                print('\r ...Loading Master Dictionary' + ' {}'.format(len(_master_dictionary)), end='', flush=True)

    if print_flag:
        print('\r', end='')  # clear line
        print('\nMaster Dictionary loaded from file: \n  ' + file_path)
        print('  {0:,} words loaded in master_dictionary.'.format(len(_master_dictionary)) + '\n')

    if f_log:
        try:
            f_log.write('\n\n  load_masterdictionary log:')
            f_log.write('\n    Master Dictionary loaded from file: \n       ' + file_path)
            f_log.write('\n    {0:,} words loaded in master_dictionary.\n'.format(len(_master_dictionary)))
        except Exception as e:
            print('Log file in load_masterdictionary is not available for writing')
            print('Error = {0}'.format(e))

    if get_other:
        return _master_dictionary, _md_header, _sentiment_categories, _stopwords, _total_documents
    else:
        return _master_dictionary


def create_sentimentdictionaries(_master_dictionary, _sentiment_categories): ##Create sentiment categories

    _sentiment_dictionary = {} ## dictionart for sentiments
    for category in _sentiment_categories:
        _sentiment_dictionary[category] = {}
    # Create dictionary of sentiment dictionaries with count set = 0
    for word in _master_dictionary.keys():
        for category in _sentiment_categories:
            if _master_dictionary[word].sentiment[category]:
                _sentiment_dictionary[category][word] = 0

    return _sentiment_dictionary

### Master Dicitonary class
class MasterDictionary:
    def __init__(self, cols, _stopwords):
        self.word = cols[0].upper()
        self.sequence_number = int(cols[1])
        self.word_count = int(cols[2])
        self.word_proportion = float(cols[3])
        self.average_proportion = float(cols[4])
        self.std_dev_prop = float(cols[5])
        self.doc_count = int(cols[6])
        self.negative = int(cols[7])
        self.positive = int(cols[8])
        self.uncertainty = int(cols[9])
        self.litigious = int(cols[10])
        self.constraining = int(cols[11])
        self.superfluous = int(cols[12])
        self.interesting = int(cols[13])
        self.modal_number = int(cols[14])
        self.strong_modal = False
        if int(cols[14]) == 1:
            self.strong_modal = True
        self.moderate_modal = False
        if int(cols[14]) == 2:
            self.moderate_modal = True
        self.weak_modal = False
        if int(cols[14]) == 3:
            self.weak_modal = True
        self.sentiment = {}
        self.sentiment['negative'] = bool(self.negative)
        self.sentiment['positive'] = bool(self.positive)
        self.sentiment['uncertainty'] = bool(self.uncertainty)
        self.sentiment['litigious'] = bool(self.litigious)
        self.sentiment['constraining'] = bool(self.constraining)
        self.sentiment['strong_modal'] = bool(self.strong_modal)
        self.sentiment['weak_modal'] = bool(self.weak_modal)
        self.irregular_verb = int(cols[15])
        self.harvard_iv = int(cols[16])
        self.syllables = int(cols[17])
        self.source = cols[18]

        if self.word in _stopwords:
            self.stopword = True
        else:
            self.stopword = False
        return

In [70]:
import csv
import glob
import re
import string
import sys
import time
import pandas as pd
import math

import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os

# Load the tokenizer and model for FOMC-RoBERTa
tokenizer = AutoTokenizer.from_pretrained("gtfintechlab/FOMC-RoBERTa", do_lower_case=True, do_basic_tokenize=True)
# tokenizer = AutoTokenizer.from_pretrained("gtfintechlab/FOMC-RoBERTa", do_lower_case=True)
model = AutoModel.from_pretrained("gtfintechlab/FOMC-RoBERTa")

def get_embeddings(text, max_token_length=512):
    # Tokenize the document with truncation to the specified max_length
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_token_length)
    
    # Pass the tokenized input through the model
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state  # This gives the embeddings for each token
    
    # Pooling: Take the mean of the token embeddings to get a single vector representation
    pooled_embedding = embeddings.mean(dim=1).squeeze().numpy()
    
    return pooled_embedding

# Function to compute cosine similarity between two vectors
def compute_cosine_similarity(embedding1, embedding2):
    # Reshape the embeddings if necessary (sklearn expects 2D arrays)
    embedding1 = embedding1.reshape(1, -1)
    embedding2 = embedding2.reshape(1, -1)
    
    return cosine_similarity(embedding1, embedding2)[0][0] ## Return cosine similarity between the two embeddings

# Extremely Hawkish Texts
hawkish_texts = [
    "Given the persistent inflationary pressures, the Federal Reserve will need to raise interest rates aggressively to curb rising prices and prevent the economy from overheating.",
    "We are committed to a sustained period of tightening, including several consecutive rate hikes, until inflation falls decisively back to our 2% target.",
    "Inflation is far too high, and we must take all necessary measures, including aggressive interest rate increases, to restore price stability.",
    "The current labor market is excessively tight, driving wage inflation, and we will continue to raise rates to ensure that inflationary pressures are contained.",
    "We are prepared to maintain a restrictive stance on monetary policy for an extended period, with further rate hikes expected in the coming months.",
    "Monetary tightening is necessary to reduce the risks of an overheating economy, and we will not hesitate to act decisively with higher rates to prevent runaway inflation.",
    "The risk of high inflation outweighs concerns about slowing economic growth. Rate hikes will continue until we are confident inflation is firmly under control.",
    "Given the strength of consumer demand and rising input costs, inflation remains a primary concern, and we will aggressively use rate hikes to cool down price pressures.",
    "We are unlikely to pivot toward accommodative policies anytime soon. Sustained interest rate hikes are necessary to restore price stability and anchor inflation expectations.",
    "With inflationary pressures proving more persistent than anticipated, the Federal Reserve will take preemptive actions to raise rates even higher, ensuring inflation does not become entrenched."
]

# Extremely Dovish Texts
dovish_texts = [
    "Given the risks of slowing economic growth, the Federal Reserve is prepared to maintain its accommodative stance and will keep interest rates low to support recovery and job creation.",
    "We believe that the current inflationary pressures are transitory, and therefore, it is crucial to avoid premature tightening that could derail economic growth.",
    "Our primary focus is on maximizing employment, and we are committed to keeping interest rates low until the labor market has fully recovered.",
    "In light of ongoing economic challenges, the Federal Reserve will continue to provide liquidity and maintain low rates to support the economy's sustained recovery.",
    "With unemployment still elevated in some sectors, we are committed to keeping monetary policy accommodative, ensuring that the recovery benefits all Americans.",
    "The current economic environment requires patience. The Federal Reserve will keep interest rates near zero for the foreseeable future to ensure continued growth and job creation.",
    "We see no need to raise interest rates in the near term. Our priority is supporting a full and inclusive recovery, especially for those still struggling in the labor market.",
    "To support economic activity, the Federal Reserve is committed to maintaining its low interest rate policy and using all available tools to stimulate demand.",
    "Given the uncertain global economic outlook, we are prepared to lower interest rates further or maintain them at current levels to prevent any slowdown in growth.",
    "We believe that accommodative monetary policy will be essential in supporting long-term economic growth, and therefore, rate hikes are not on the table in the near future."
] 

# Generate vector embeddings for the hawkish and dovish texts to compute cosine similarity
hawkish_embedding = get_embeddings(" ".join(hawkish_texts))
dovish_embedding = get_embeddings(" ".join(dovish_texts))


MASTER_DICTIONARY_FILE = '/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/LoughranMcDonald_MasterDictionary_2018.csv'

lm_dictionary = load_masterdictionary(MASTER_DICTIONARY_FILE, True)
n_words=[] ## list of negative words

for word in lm_dictionary.keys(): ##if word in loughran mcd list is negative then append to list of negative words 
    if(lm_dictionary[word].negative):
        n_words.append(word)

df_neg = pd.DataFrame(columns=n_words) ## stores dataframe that has all neg words as columns and no of docs as rows
tot_words=[] ##stores total words in doc i
neg_words=[] ##stores neg words in doc i
dov_score=[] ##stores dovish cosine similarity of  doc i
hawk_score=[] ##stores hawkish cosine similarity of  doc i
files=[] ##stores name of file i

import os

common='/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/'


folders = [f"{common}{'FOMC_Minutes'}", f"{common}{'FOMC_Press_Conference'}", f"{common}{'FOMC_Speech'}"]
### to iterrate over three data folders

# Initialize a dictionary to store the filenames for each folder
txt_files = []

for folder in folders:
    folder_path = os.path.join(common, folder)
    file_names = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.txt')]
    txt_files.append(file_names) ##append .txt files in all folder to a common list
    
txt_files=[item for sublist in txt_files for item in sublist] ##append .txt files in all folder to a common list

def main():
    global tot_words
    global neg_words
    global txt_files
    global files
    global dov_score
    global hawk_score
    for file in txt_files: ## read file one by one from txt_files
        print(file)
        with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
            doc = f_in.read()
            
        file_embedding = get_embeddings(doc) ## get vector embedding for file i
        
        ## compute cosine similairty to hawkish anf dovish target 
        similarity_to_hawkish = compute_cosine_similarity(file_embedding, hawkish_embedding)
        similarity_to_dovish = compute_cosine_similarity(file_embedding, dovish_embedding)
        
        doc_len = len(doc)
        doc = re.sub('(May|MAY)', ' ', doc)  # drop all May month references
        doc = doc.upper()  # for this parse caps aren't informative so shift
        output_data = get_data(doc)
        output_data[0] = file
        output_data[1] = doc_len
        tot_words.append(output_data[2]) ##append total wordds
        neg_words.append(output_data[4]) ##append eng wordds
        dov_score.append(similarity_to_dovish) ##append dovish score wordds
        hawk_score.append(similarity_to_hawkish) ##append hawkish score
        file_name = file.split('/')[-1]
        files.append(file_name) ##append file name


### routine to iterate over document token by token
def get_data(doc):
    global df_neg
    global df_neg_h
    vdictionary = {}
    _odata = [0] * 17
    total_syllables = 0
    word_length = 0
    
    tokens = re.findall('\w+', doc)  # Note that \w+ splits hyphenated words
    
    word_counts = {word:0 for word in n_words}
    
    for token in tokens:
        if not token.isdigit() and len(token) > 1 and token in lm_dictionary:
            _odata[2] += 1  # word count
            word_length += len(token)
            if token not in vdictionary:
                vdictionary[token] = 1
            if lm_dictionary[token].negative: ## add to neg word if word is negative
                _odata[4] += 1
                word_counts[token]+=1 
                
    df_neg = pd.concat([df_neg, pd.DataFrame([word_counts])], ignore_index=True) ## append a row to dataframe

        
    return _odata


if __name__ == '__main__':
    print('\n' + time.strftime('%c') + '\nGeneric_Parser.py\n')
    main()
    print('\n' + time.strftime('%c') + '\nNormal termination.')



df_tf_idf=df_neg.copy() ##df to. calculate tf-idf score
df_i=[0]*df_neg.shape[1]

###COMPUTING TF-IDF using fromaula provided in the loughran mcdonald paper
for i in range(df_tf_idf.shape[0]):#docs
    for j in range(df_tf_idf.shape[1]):## words
        if(df_tf_idf.iloc[i,j]):
            df_tf_idf.iloc[i,j]=(1+math.log(df_tf_idf.iloc[i,j]))/(1+math.log(tot_words[i]))
            df_i[j]+=1

df_i=[math.log(df_tf_idf.shape[0]/i) if i else i for i in df_i]
df_tf_idf = df_tf_idf.mul(df_i, axis=1)

df_=df_tf_idf.copy()
df_['tfidf']=[sum(df_.iloc[i,:]) for i in range(df_.shape[0])]
df_['file_name']=files
df_['hawkish']=hawk_score
df_['dovish']=dov_score
df_=df_[['file_name','tfidf','hawkish','dovish']]
df_

Some weights of RobertaModel were not initialized from the model checkpoint at gtfintechlab/FOMC-RoBERTa and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 ...Loading Master Dictionary 85000
Master Dictionary loaded from file: 
  /Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/LoughranMcDonald_MasterDictionary_2018.csv
  86,486 words loaded in master_dictionary.


Sat Oct 12 17:44:33 2024
Generic_Parser.py

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20131218.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20121023.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20181108.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20231101.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20161214.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20201216.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20190731.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20230503.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20170503.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20200129.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20180801.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20210728.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20130130.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20230201.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Minutes/FOMCminutes20240731.txt
/Users/dev

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20220921.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20200429.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20210616.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20170614.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20230920.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20210428.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20190130.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Press_Conference/FOMCpresconf20200610.txt
/Users/devanshjoshi/tensorflow-test/NLP-

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20201015a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/cook20231018a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20221020a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/fischer20160103a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20151201a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20161118a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20181004a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/stein20131004a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/fischer20150624a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodrigu

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20210623a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20240528a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20201001a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20170117a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20171213a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20230807a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20181207a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20231017a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20180213a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Ro

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/duke20120113a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20211117a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20170328a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20180326a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20210225a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20151202a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20180515a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20220826a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20231108a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodrigue

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20210711a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20220817b.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/cook20240625a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20210526b.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20191108a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20170303a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/duke20121109a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20150204a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/stein20131107a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/A

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/cook20221006a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20170926a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20180719a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/cook20230928a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20140305a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20171130a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20180406a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/tarullo20141107a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20240823a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/A

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20230926a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20161130a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20201001a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20240116a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/stein20121217a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20211119a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/jefferson20230531a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/clarida20190517a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/tarullo20140327a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodrig

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20231019a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20240625b.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20210322a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20181203a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/duke20120106a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20210511a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20201117a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20151112a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20240202a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodri

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20220525a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/fischer20170731a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20210909a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20170728a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/fischer20140811a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/yellen20150303a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20190820a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/quarles20200923a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20210503a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Ro

/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bowman20190325a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20230622a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/cook20231113a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20240520a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/duke20130205a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/brainard20190711a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/bernanke20131216a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/waller20211217a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/Assignment_2/Data/FOMC_Speech/powell20170706a.txt
/Users/devanshjoshi/tensorflow-test/NLP- Dan Rodriguez/

Unnamed: 0,file_name,tfidf,hawkish,dovish
0,FOMCminutes20131218.txt,20.529626,0.130615,0.537401
1,FOMCminutes20121023.txt,16.736768,0.137413,0.547877
2,FOMCminutes20181108.txt,13.417258,0.137908,0.538247
3,FOMCminutes20231101.txt,14.167732,0.135363,0.535311
4,FOMCminutes20161214.txt,12.337477,0.137458,0.539062
...,...,...,...,...
912,brainard20141202a.txt,4.514435,0.159401,0.535447
913,powell20180620a.txt,8.119419,0.319314,0.644812
914,waller20230511a.txt,17.852031,0.136195,0.521566
915,powell20190509a.txt,2.168036,0.158238,0.548112


In [71]:
df_.to_csv('df_hawk-dov-tfidf')