The following code is to generate a sample output for our Business Use Case. We have used our model to extract sentiment label for the Tickers: NVIDIA and Qualcomm. The input files are 'demo_data.xlsx' and the output is 'final_output.xlsx'

In [8]:
import pandas as pd
import spacy
import yfinance
import re

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline



data = pd.read_excel('demo_data.xlsx')


data.head()

Unnamed: 0,title,url,time_published,authors,summary,banner_image,source,category_within_source,source_domain,topics,...,ticker_sentiment_label,news_text,keywords,company_name,relevant_sentences,predicted_sentiment,top_5,Unnamed: 19,Unnamed: 20,predictions
0,Nike's warnings on China should scare other co...,https://www.businessinsider.com/nike-stock-chi...,12/22/2023 0:00,Phil Rosen,Nike's warning on China should scare other com...,,Business Insider,GoogleRSS,www.businessinsider.com,"[{'topic': 'Finance', 'relevance_score': '0.25...",...,Somewhat-Bearish,"By clicking ‚ÄúSign Up‚Äù, you accept our Term...","['qcom', 'qualcomm']",QUALCOMM Incorporated,"[""Bank of America strategists warned in Septem...",negative,"[""Bank of America strategists warned in Septem...",,,
1,Arm laying off and relocating over 70 engineer...,https://www.scmp.com/tech/big-tech/article/324...,2023-12-18 15:39:12+08:00,,The British firm will offer new China-related ...,https://cdn.i-scmp.com/sites/default/files/sty...,South China Morning Post,Companies,www.scmp.com,"[{'topic': 'IPO', 'relevance_score': '0.158519...",...,Somewhat-Bearish,The jobs being terminated are currently filled...,"['qcom', 'qualcomm']",QUALCOMM Incorporated,['The British firm‚Äôs actions mirrored those ...,negative,['The British firm‚Äôs actions mirrored those ...,,,
2,Josh Brown Sold Some Nvidia Stock Today: Why H...,https://www.benzinga.com/news/24/02/36968318/j...,,Adam Eckert,Nvidia Corp NVDA shares skyrocketed more than ...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",...,Bullish,Nvidia Corp NVDA shares skyrocketed more than ...,"['nvda', 'nvidia']",NVIDIA Corporation,['Nvidia Corp NVDA shares skyrocketed more tha...,positive,['Nvidia Corp NVDA shares skyrocketed more tha...,nvidia corp nvda shares skyrocketed more than...,NVIDIA corp NVDA shares skyrocketed more than ...,
3,Surging Against The Odds - Nvidia's Astounding...,https://www.benzinga.com/tech/24/02/37013534/s...,,Zaheer Anwari,Nvidia faces significant challenges due to res...,https://cdn.benzinga.com/files/boliviaintelige...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",...,Bullish,Nvidia faces significant challenges due to res...,"['nvda', 'nvidia']",NVIDIA Corporation,['Nvidia faces significant challenges due to r...,positive,['Nvidia faces significant challenges due to r...,morgan stanley raised nvidia ' s price target...,Morgan Stanley raised NVIDIA' s price target t...,


In [9]:
#####################################################################################################
## DEFINE FUNCTIONS


## Feature extraction functions----------------------------------

def get_company_name(ticker):
    """
    Get the full company name for a given stock ticker.
    
    Args:
    - ticker (str): The stock ticker to look up.
    
    Returns:
    - str: The full company name for the stock ticker.
    """
    company = ''
    try:
        company = yfinance.Ticker(ticker).info['longName']
    except:
        company = ''
    return company


def compiled_list_of_keywords(ticker, company_name):
    """
    Generate a list of keywords to search for in news articles based on the stock ticker and company name.
    
    Args:
    - ticker (str): The stock ticker to generate keywords for.
    - company_name (str): The full company name to generate keywords for.
    
    Returns:
    - list: A list of keywords to search for in news articles.
    """
    keywords = [ticker, company_name]
    # Split the company name into individual words and add to the list of keywords
    company_name = re.sub(r'[^a-zA-Z\s]', '', company_name)  # Remove non-letter characters
    keywords.extend(company_name.split())
    return keywords


def find_relevant_sentences(text, keywords):
    """
    Extract sentences from the provided text that contain any of the specified keywords.
    
    Args:
    - text (str): The text to search within.
    - keywords (list): A list of keywords to search for.
    
    Returns:
    - list: A list of sentences from the text that contain any of the keywords.
    """
    relevant_sentences = []
    doc = nlp(text)
    
    # Convert keywords to lowercase for case-insensitive matching
    keywords_lower = [keyword.lower() for keyword in keywords]
    
    for sent in doc.sents:
        # Check if any keyword is in the sentence
        if any(keyword in sent.text.lower() for keyword in keywords_lower):
            relevant_sentences.append(sent.text)
            
    return relevant_sentences

def process_keywords(keywords):
    """
    Process the list of keywords to remove any duplicates and convert to lowercase.
    
    Args:
    - keywords (list): A list of keywords to process.
    
    Returns:
    - list: The processed list of keywords.
    """
    keywords = [keyword.lower() for keyword in keywords]  # Convert to lowercase
    for word in keywords:
    # remove the word from keywords if it contains a non-alphabet character
        if not word.isalpha():
            keywords.remove(word)
    
    for word in keywords:
        if "inc" in word or "corporation" in word:
            keywords.remove(word)

    return keywords


## Model Processing functions----------------------------------

def aggregate_article_sentiment(sentiments, index):
    """
    Aggregates sentiment scores from individual sentences to determine the overall article sentiment.

    Parameters:
    - sentiments (list of dicts): Each dict contains 'label' and 'score' for a sentence.

    Returns:
    - str: The overall sentiment of the article ('positive', 'neutral', 'negative').
    """
    # Initialize counters for each sentiment
    total_scores = {'positive': 0, 'neutral': 0, 'negative': 0}
    
    # Sum up the scores for each sentiment
    for sentiment in sentiments:
        label = sentiment['label']
        score = sentiment['score']
        if label in total_scores:
            total_scores[label] += score
    
    # Normalize the scores to sum up to 1
    total_score = sum(total_scores.values())
    if total_score > 0:  # Avoid division by zero
        for key in total_scores:
            total_scores[key] /= total_score
    
    # Determine the overall sentiment by finding the max score
    overall_sentiment = max(total_scores, key=total_scores.get)
    print(f"Overall Sentiment for row {index}: {overall_sentiment}")
    
    return overall_sentiment


def chunk_sentence(sentences, chunk_size):
    """
    Chunk the list of sentences into smaller groups of a specified size.

    Parameters:
    - sentences (list): The list of sentences to chunk.
    - chunk_size (int): The maximum number of sentences in each chunk.

    Returns:
    - list: A list of chunks, where each chunk is a list of sentences.
    """
    chunks = []
    for i in range(0, len(sentences), chunk_size):
        chunks.append(sentences[i:i + chunk_size])
    return chunks


def processed_relevant_sentences(relevant_sentences):
    """
    Process the list of relevant sentences by appending the chunks of sentences to the list

    Parameters:
    - relevant_sentences (list): A list of relevant sentences to process.

    Returns:
    - list: The processed list of relevant sentences.
    """
    result = []
    for sentence in relevant_sentences:
        chunks = chunk_sentence(sentence, 512)
        for chunk in chunks:
            result.append(chunk)
    return result





In [10]:
## Model Processing - GETTING SENTIMENT LABELS
nlp = spacy.load('en_core_web_md')

model = BertForSequenceClassification.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis",num_labels=3)
tokenizer = BertTokenizer.from_pretrained("ahmedrachid/FinancialBERT-Sentiment-Analysis")

nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)



for index, row in data.iterrows():
    # Assuming 'nlp' function returns a list of dictionaries with 'label' and 'score' for each sentence in 'relevant_sentences'
    sentence_sentiments = nlp(row['top_5'])
    # Now you can pass the current index and the sentence sentiments to your aggregation function
    overall_sentiment = aggregate_article_sentiment(sentence_sentiments, index)
    # Assign the calculated overall sentiment back to the DataFrame
    data.at[index, 'predicted_sentiment'] = overall_sentiment

data.head()

Overall Sentiment for row 0: negative
Overall Sentiment for row 1: negative
Overall Sentiment for row 2: positive
Overall Sentiment for row 3: positive


Unnamed: 0,title,url,time_published,authors,summary,banner_image,source,category_within_source,source_domain,topics,...,ticker_sentiment_label,news_text,keywords,company_name,relevant_sentences,predicted_sentiment,top_5,Unnamed: 19,Unnamed: 20,predictions
0,Nike's warnings on China should scare other co...,https://www.businessinsider.com/nike-stock-chi...,12/22/2023 0:00,Phil Rosen,Nike's warning on China should scare other com...,,Business Insider,GoogleRSS,www.businessinsider.com,"[{'topic': 'Finance', 'relevance_score': '0.25...",...,Somewhat-Bearish,"By clicking ‚ÄúSign Up‚Äù, you accept our Term...","['qcom', 'qualcomm']",QUALCOMM Incorporated,"[""Bank of America strategists warned in Septem...",negative,"[""Bank of America strategists warned in Septem...",,,
1,Arm laying off and relocating over 70 engineer...,https://www.scmp.com/tech/big-tech/article/324...,2023-12-18 15:39:12+08:00,,The British firm will offer new China-related ...,https://cdn.i-scmp.com/sites/default/files/sty...,South China Morning Post,Companies,www.scmp.com,"[{'topic': 'IPO', 'relevance_score': '0.158519...",...,Somewhat-Bearish,The jobs being terminated are currently filled...,"['qcom', 'qualcomm']",QUALCOMM Incorporated,['The British firm‚Äôs actions mirrored those ...,negative,['The British firm‚Äôs actions mirrored those ...,,,
2,Josh Brown Sold Some Nvidia Stock Today: Why H...,https://www.benzinga.com/news/24/02/36968318/j...,,Adam Eckert,Nvidia Corp NVDA shares skyrocketed more than ...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",...,Bullish,Nvidia Corp NVDA shares skyrocketed more than ...,"['nvda', 'nvidia']",NVIDIA Corporation,['Nvidia Corp NVDA shares skyrocketed more tha...,positive,['Nvidia Corp NVDA shares skyrocketed more tha...,nvidia corp nvda shares skyrocketed more than...,NVIDIA corp NVDA shares skyrocketed more than ...,
3,Surging Against The Odds - Nvidia's Astounding...,https://www.benzinga.com/tech/24/02/37013534/s...,,Zaheer Anwari,Nvidia faces significant challenges due to res...,https://cdn.benzinga.com/files/boliviaintelige...,Benzinga,Trading,www.benzinga.com,"[{'topic': 'Financial Markets', 'relevance_sco...",...,Bullish,Nvidia faces significant challenges due to res...,"['nvda', 'nvidia']",NVIDIA Corporation,['Nvidia faces significant challenges due to r...,positive,['Nvidia faces significant challenges due to r...,morgan stanley raised nvidia ' s price target...,Morgan Stanley raised NVIDIA' s price target t...,
