<a href="https://colab.research.google.com/github/m4rk-lewis/GPT-3_breaking_news_parse_with_sentiment_classify/blob/main/GPT_3_breaking_financial_news_summary_and_sentiment_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# credit to: @m4rk-lewis https://github.com/m4rk-lewis/GPT-3_breaking_news_parse_with_sentiment_classify

# Step 1: Install required packages
!pip install feedparser beautifulsoup4 openai




[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Step 2: Download and parse the RSS feed
import feedparser
def get_feed(url):
    return feedparser.parse(url)

In [3]:
# Step 3: Extract HTML from the description
from bs4 import BeautifulSoup

def extract_text(html):
    soup = BeautifulSoup(html, "html.parser")
    return soup.get_text()

In [9]:
from dotenv import load_dotenv
load_dotenv()
import os
import openai

# openai.api_key = "insert-openAI-API-here"
openai.api_key = os.environ.get("api_key")

# Step 4: Summarize using GPT-3
def summarize_text(text):
    response = openai.Completion.create(
        engine="text-davinci-003", 
        prompt=f"Summarize the following news article: {text}",
        temperature=0.3,
        max_tokens=50,
        top_p=1,
        frequency_penalty=0.0,
        presence_penalty=0,
    )
    return response.choices[0].text.strip()

# Step 5: Sentiment classification
def sentiment_classification(text):
    response = openai.Completion.create(
        engine="text-davinci-003", 
        prompt=f"classify the sentiment of this breaking financial news article as a numerical float, with a range of -1 to 1 with 0.1 granularity, where -1 is maximum bearishness and 1 is maximum bullishness in relation to equities, reply only the float number as requested, do not include any text in the response: {text}",       
        temperature=0.3,
        max_tokens=10,
        top_p=1,
        frequency_penalty=0.0,
        presence_penalty=0,
    )
    sentiment_str = response.choices[0].text.strip()
    try:
        sentiment = float(sentiment_str)
    except ValueError:
        sentiment = None
    return sentiment

In [10]:
# Step 6: Store the summarized information in a SQLite database
import sqlite3

def create_db():
    conn = sqlite3.connect('nvda_yahoo_rss.db')
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS news (published TEXT, title TEXT, summary TEXT, sentiment TEXT)''')
    return conn, cursor

def insert_news(cursor, published, title, summary, sentiment):
    cursor.execute("INSERT INTO news (published, title, summary, sentiment) VALUES (?, ?, ?, ?)", (published, title, summary, sentiment))
    # Commit the changes to the database
    conn.commit()

In [12]:
# Step 7: Continuously monitor the RSS feed for updates
import time

def monitor_feed(url, conn, cursor
                 # , interval=60
                 ):
    seen_titles = set()
    
    while True:
        feed = get_feed(url)
        # print(feed.entries[0])
        for entry in feed.entries:
            title = entry.title
            if title not in seen_titles:
                # if entry.category == "News" or entry.category =="Central Banks":
                    seen_titles.add(title)
                    # category = extract_text(entry.category)
                    published = extract_text(entry.published)
                    description = extract_text(entry.description)
                    summary = summarize_text(description[:4000])
                    sentiment = sentiment_classification(description[:4000])  
                    insert_news(cursor, published, title, summary, sentiment)  
                    print(f"{published} >>> {title} >>> {description} >>> Summary: {summary} >>> Sentiment: {sentiment}")
                          # >>> {category} 
                          
        # Close the database connection and sleep
        conn.close()
        #time.sleep(interval)

In [13]:
'''
# This code will download the RSS feed and process new entries every 60 seconds. 
Adjust the interval parameter in monitor_feed() to control how often the feed is checked.
'''
if __name__ == "__main__":
    url = "http://finance.yahoo.com/rss/headline?s=NVDA"
    conn, cursor = create_db()
    monitor_feed(url, conn, cursor)


Tue, 09 May 2023 21:45:09 +0000 >>> Nvidia (NVDA) Dips More Than Broader Markets: What You Should Know >>> In the latest trading session, Nvidia (NVDA) closed at $285.71, marking a -1.99% move from the previous day. >>> Summary: Nvidia (NVDA) closed at $285.71 in the latest trading session, representing a decrease of 1.99% from the previous day. >>> Sentiment: -0.9
Tue, 09 May 2023 20:57:01 +0000 >>> What's Going On With NVIDIA Stock Tuesday >>> Nvidia Corp (NASDAQ: NVDA) shares are trading lower Tuesday as Advanced Micro Devices, Inc (NASDAQ: AMD) shared plans to showcase its growth strategy and expanding product portfolio and capabilities for data center and AI at a June 13 livestreaming event. AMD Chair and CEO Dr. Lisa Su will detail new products and momentum across data center, AI, adaptive, and high-performance computing solutions. Also, Monday reports suggested that U.S. sanctions compelled the Chinese firms, including Alibaba Gr >>> Summary: p Hldg Ltd (NYSE: BABA) and Tencent 

KeyboardInterrupt: 

In [14]:
import sqlite3
import pandas as pd
from datetime import datetime

# create a connection
con = sqlite3.connect('nvda_yahoo_rss.db')
data = pd.read_sql_query('Select * from news;', con)

# data['sentiment'] = data['sentiment'].astype(float)
# parse_format = "%a, %d %b %Y %H:%M:%S %z"
# format_output = "%m-%d %H:%M" # %Y-%m-%d %H:%M:%S
# data['published'] = data['published'].apply(lambda x: datetime.strptime(x, parse_format).strftime(format_output))

data.head()

Unnamed: 0,published,title,summary,sentiment
0,"Tue, 09 May 2023 21:45:09 +0000",Nvidia (NVDA) Dips More Than Broader Markets: ...,Nvidia (NVDA) closed at $285.71 in the latest ...,-0.9
1,"Tue, 09 May 2023 20:57:01 +0000",What's Going On With NVIDIA Stock Tuesday,p Hldg Ltd (NYSE: BABA) and Tencent Holdings L...,p Hldg Ltd (NYSE: BABA
2,"Tue, 09 May 2023 20:46:22 +0000","Despite An Another Bad Update, Intel Still Bel...",it can turn things around.\n\nIntel Corporatio...,that its long-term prospects remain positive.
3,"Tue, 09 May 2023 16:46:30 +0000","PayPal, IBM turn to A.I. to cut costs",PayPal is introducing an artificial intelligen...,0.5
4,"Tue, 09 May 2023 15:08:48 +0000",AI Stocks To Watch As Big Tech Braces For Arti...,This article discusses the best stocks to buy ...,0.7
