### Financial Text Sentiment Analysis


In [None]:
import os
from dotenv import load_dotenv
from newsapi import NewsApiClient
from transformers import pipeline
from datetime import datetime, timedelta
import pandas as pd

load_dotenv()
key = os.getenv("NEWSAPI_KEY")

newsapi = NewsApiClient(api_key=key)
pipe = pipeline("text-classification", model="ProsusAI/finbert")


start_date = datetime.strptime('2025-05-07', '%Y-%m-%d')
end_date = datetime.today()

keyword = 'palantir'
label_map = {"positive":1, "neutral":0, "negative":-1}

daily_scores = []

current_date = start_date

while current_date < end_date:  
  next_date = current_date + timedelta(days=1)
  
  news = newsapi.get_everything(q=keyword,
                                from_param=current_date.strftime('%Y-%m-%d'),
                                to=next_date.strftime('%Y-%m-%d'),
                                language='en',
                                sort_by='relevancy',
                                  )

  articles = news['articles']
  articles = [article for article in articles if keyword.lower() in article['title'].lower() or keyword.lower() in article['description'].lower()]
 

  total_weight = 0 
  weighted_sum = 0


  for i, article in enumerate(articles):
    if not article['content']:
      continue
      
    #print(f"Title: {article['title']}")
    #print(f"Link: {article['url']}")
    #print(f"Description: {article['description']}")
    
    sentiment = pipe(article['content'])[0]
    score = label_map.get(sentiment['label'].lower(), 0)
    weight = sentiment['score']
    
    #print(f"Sentiment {sentiment['label']}, Score: {sentiment['score']}")
    #print('-' * 40) 
    
    weighted_sum += score * weight
    total_weight += weight


  final_score = weighted_sum / total_weight if total_weight > 0 else 0
  daily_scores.append({'date': current_date.strftime('%Y-%m-%d'), 'sentiment_score' : final_score})
  #print(f"Overall Sentiment: {'Positive' if final_score > 0.2 else 'Negative' if final_score < -0.2 else 'Neutral'} {final_score}")
  
  current_date = next_date

df = pd.DataFrame(daily_scores)
print(df)

df.to_csv('daily_sentiment_scores.csv', index=False)



Device set to use cpu


          date  sentiment_score
0   2025-05-07         0.036310
1   2025-05-08         0.116714
2   2025-05-09         0.066564
3   2025-05-10         0.194385
4   2025-05-11        -0.036524
5   2025-05-12         0.039492
6   2025-05-13         0.095310
7   2025-05-14         0.084889
8   2025-05-15         0.020153
9   2025-05-16        -0.067974
10  2025-05-17         0.006837
11  2025-05-18        -0.321212
12  2025-05-19         0.054354
13  2025-05-20         0.193016
14  2025-05-21         0.022187
15  2025-05-22         0.092537
16  2025-05-23         0.191797
17  2025-05-24         0.097353
18  2025-05-25         0.134601
19  2025-05-26         0.312057
20  2025-05-27         0.301800
21  2025-05-28         0.353887
22  2025-05-29         0.198328
23  2025-05-30         0.040010
24  2025-05-31         0.170645
25  2025-06-01        -0.040374
26  2025-06-02        -0.099764
27  2025-06-03        -0.001489
28  2025-06-04        -0.098330
29  2025-06-05        -0.106249
30  2025