In [1]:
import nltk
nltk.download("punkt")
from newspaper import Article
from bs4 import BeautifulSoup
import nltk
from gtts import gTTS
import os
from TTS.api import TTS
import requests
import pandas as pd
import speech_recognition as sr
import time
import pyaudio
import wave

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/yaswanthganapathi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
from transformers import pipeline
summarizer = pipeline("summarization")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import speech_recognition as sr
from TTS.api import TTS
import pyaudio
import wave
import time

def play_audio(file_path):
    """Play audio using pyaudio"""
    try:
        wf = wave.open(file_path, 'rb')
        p = pyaudio.PyAudio()

        # Open a stream
        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True)

        # Read data and play audio
        data = wf.readframes(1024)
        while data:
            stream.write(data)
            data = wf.readframes(1024)

        # Stop stream and close
        stream.stop_stream()
        stream.close()
        p.terminate()
        wf.close()
    except Exception as e:
        print(f"Error playing audio: {e}")

def speak(text, output_path="Voice_Output.wav"):
    """Speak text using preloaded TTS model and voice cloning"""
    if not text:
        return

    try:
        # Generate the cloned voice output
        TTS_MODEL.tts_to_file(text=text, file_path=output_path, speaker_wav=SPEAKER_WAV, language="en")

        # Play the audio file
        play_audio(output_path)
    except Exception as e:
        print(f"Error in speech synthesis: {e}")

def get_voice_input():
    """Get voice input from user and convert to text"""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Please speak the company name...")
        speak("Please speak the company name")
        recognizer.adjust_for_ambient_noise(source)
        try:
            audio = recognizer.listen(source, timeout=5)
            text = recognizer.recognize_google(audio)
            print(f"You said: {text}")
            speak(f"You said {text}")
            return text.upper()
        except Exception as e:
            print(f"Error in voice recognition: {e}")
            speak("I couldn't understand that. Please try again")
            return None

def extract_ticker(spoken_text):
    """Extract ticker symbol from spoken text using S&P 500 companies list"""
    if not spoken_text:
        return None

    sp500_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    tables_sp500 = pd.read_html(sp500_url)
    sp500_tickers = tables_sp500[0]['Symbol'].tolist()

    # Get NASDAQ tickers
    nasdaq_url = "https://en.wikipedia.org/wiki/Nasdaq-100"
    tables_nasdaq = pd.read_html(nasdaq_url)
    nasdaq_tickers = tables_nasdaq[4]['Symbol'].tolist()

    # Combine both ticker lists
    known_tickers = list(set(sp500_tickers + nasdaq_tickers))

    words = spoken_text.split()
    for word in words:
        if word in known_tickers:
            return word
    return None

def get_news(ticker):
    """Fetch news using Alpha Vantage API"""
    api_key = 'ENIPDF3XPHW9IUUE'
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&limit=3&apikey={api_key}'

    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    return None

def get_article_text(url):
    """Extract text from article URL"""
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all(['h1', 'p'])
        return ' '.join([p.get_text() for p in paragraphs])
    except Exception as e:
        print(f"Error fetching article: {e}")
        return None

def summarize_text(text):
    """Summarize text"""
    if not text:
        return "Unable to summarize article."

    # Example summarization logic (to be replaced with an actual summarizer)
    return text[:200] + "..." if len(text) > 200 else text

def format_news(news_data):
    """Format news data with summaries and read them aloud"""
    if not news_data or 'feed' not in news_data:
        msg = "No news available."
        speak(msg)
        return msg

    formatted_output = []
    for i, article in enumerate(news_data['feed'][:3], 1):
        article_text = get_article_text(article['url'])
        if article_text:
            summary = summarize_text(article_text)
            article_content = (
                f"Article {i}:\n"
                f"Title: {article['title']}\n"
                f"Source: {article['source']}\n"
                f"Summary: {summary}\n"
            )
            formatted_output.append(article_content)

            # Read each article
            speak(f"Article {i}")
            #time.sleep(0.5)
            speak(f"Title: {article['title']}")
            #time.sleep(0.5)
            speak(f"From: {article['source']}")
            #time.sleep(0.5)
            speak(f"Summary: {summary}")
            #time.sleep(1)  # Pause between articles

    return '\n'.join(formatted_output)

def main():
    print("Welcome to the Stock News Assistant!")
    speak("Welcome to the Stock News Assistant!")
    time.sleep(0.5)
    speak("Say exit to quit.")
    time.sleep(1)

    while True:
        # Step 1: Get voice input
        spoken_text = get_voice_input()
        if not spoken_text:
            continue

        # Check for exit command
        if 'EXIT' in spoken_text:
            speak("Thank you for using the Stock News Assistant. Goodbye!")
            break

        # Step 2: Extract ticker
        ticker = extract_ticker(spoken_text)
        if not ticker:
            speak("Could not identify a ticker symbol. Please try again.")
            continue

        msg = f"Processing news for {ticker}..."
        print(msg)
        speak(msg)

        # Step 3: Fetch news
        news_data = get_news(ticker)
        if not news_data:
            speak("Could not fetch news data. Please try again.")
            continue

        # Step 4 & 5: Format and summarize news
        formatted_news = format_news(news_data)
        print("\n" + formatted_news)

        speak("You can say another company name or say exit to quit.")
        time.sleep(1)

if __name__ == "__main__":
    main()


Welcome to the Stock News Assistant!
 > Text splitted to sentences.
['Welcome to the Stock News Assistant!']
 > Processing time: 7.671520948410034
 > Real-time factor: 1.6683469791743064
 > Text splitted to sentences.
['Say exit to quit.']
 > Processing time: 10.586360216140747
 > Real-time factor: 1.6759710135403754
Please speak the company name...
 > Text splitted to sentences.
['Please speak the company name']
 > Processing time: 15.592488050460815
 > Real-time factor: 1.706376367389924
You said: give me the stock news for IBM
 > Text splitted to sentences.
['You said give me the stock news for IBM']
 > Processing time: 10.953891277313232
 > Real-time factor: 1.7090265387237968
Processing news for IBM...
 > Text splitted to sentences.
['Processing news for IBM...']
 > Processing time: 8.978206872940063
 > Real-time factor: 1.6772524531341366
 > Text splitted to sentences.
['Article 2']
 > Processing time: 8.153517961502075
 > Real-time factor: 1.6639370562261286
 > Text splitted to 