In [1]:
from finbert_embedding.embedding import FinbertEmbedding

text = "Another PSU bank, Punjab National Bank which also reported numbers managed to see a slight improvement in asset quality."

# Class Initialization (You can set default 'model_path=None' as your finetuned BERT model path while Initialization)
finbert = FinbertEmbedding()

word_embeddings = finbert.word_vector(text)
sentence_embedding = finbert.sentence_vector(text)

print("Text Tokens: ", finbert.tokens)
# Text Tokens:  ['another', 'psu', 'bank', ',', 'punjab', 'national', 'bank', 'which', 'also', 'reported', 'numbers', 'managed', 'to', 'see', 'a', 'slight', 'improvement', 'in', 'asset', 'quality', '.']

print ('Shape of Word Embeddings: %d x %d' % (len(word_embeddings), len(word_embeddings[0])))
# Shape of Word Embeddings: 21 x 768

print("Shape of Sentence Embedding = ",len(sentence_embedding))

2023-12-15 10:17:52.520650: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Text Tokens:  ['another', 'psu', 'bank', ',', 'punjab', 'national', 'bank', 'which', 'also', 'reported', 'numbers', 'managed', 'to', 'see', 'a', 'slight', 'improvement', 'in', 'asset', 'quality', '.']
Shape of Word Embeddings: 21 x 768
Shape of Sentence Embedding =  768


In [2]:
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd

df_list = ["Punjab National Bank which also reported numbers managed to see a slight improvement in asset quality.",
          "Citi bank predics housing market crash in 2024",
          "Apple stock sink due to EU proposal",
          "Fund obligations created pursuant to derivative instruments may give rise to leverage, which may subject the fund to heightened avoid risk of loss."]

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

inputs = tokenizer(df_list, padding = True, truncation = True, return_tensors='pt') #tokenize text to be sent to model

outputs = model(**inputs)

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

model.config.id2label

positive = predictions[:, 0].tolist()

negative = predictions[:, 1].tolist()

neutral = predictions[:, 2].tolist()

table = {'Headline':df_list, "Positive":positive, "Negative":negative, "Neutral":neutral}

df2 = pd.DataFrame(table, columns = ["Headline", "Positive", "Negative", "Neutral"])

df2.head()

Unnamed: 0,Headline,Positive,Negative,Neutral
0,Punjab National Bank which also reported numbe...,0.958428,0.023605,0.017967
1,Citi bank predics housing market crash in 2024,0.023949,0.801739,0.174312
2,Apple stock sink due to EU proposal,0.016743,0.89126,0.091997
3,Fund obligations created pursuant to derivativ...,0.022261,0.56716,0.410579
