In [1]:
import os
import pandas as pd
import numpy as np
from yahoo_fin import news
from finvizfinance.quote import finvizfinance
from openai import OpenAI
from loguru import logger
import argparse
import datetime


def get_latest_news(ticker: str, source: str = 'yf') -> list:
    """
    Fetches the latest news for a given stock ticker using either the yahoo_fin or finviz library.
    
    :param ticker: The stock ticker symbol.
    :param source: The news source to fetch from. Options are 'yf' for yahoo_fin and 'finviz' for finvizfinance.
                   Default value is 'yf'.
    :return: A list of latest news articles with their titles and URLs.
    """
    news_list = []
    try:
        if source == 'yf':
            logger.info(f'Fetching latest news for {ticker} from Yahoo! Finance')
            news_data = news.get_yf_rss(ticker)
            for article in news_data:
                news_list.append({
                    "ticker": ticker,
                    "title": article["title"],
                    "summary": article["summary"],
                    "url": article["link"],
                    "time": article["published"]
                })
        elif source == 'finviz':
            logger.info(f'Fetching latest news for {ticker} from Finviz')
            news_data = finvizfinance(ticker).ticker_news()
            news_list = []
            for index, row in news_data.iterrows():
                news_list.append({
                    "ticker": ticker,
                    "title": row["Title"],
                    "summary": None,
                    "url": row["Link"],
                    "time": row["Date"]
                })
        else:
            raise ValueError("Invalid news source. Please choose either 'yf' or 'finviz'.")
        
        return news_list

    except Exception as e:
        logger.error(f"An error occurred: {e}")


def get_gpt_embeddings(client, text: str, model: str = "text-embedding-ada-002", mock: bool = False):
    """
    Get embeddings for a given text using OpenAI API.
    
    Args:
        client (object): OpenAI client object.
        text (str): The text to get embeddings for.
        model (str): The embedding model to use (default is "text-embedding-ada-002").
        mock (bool): If True, return a mock embedding with shape of 100 and all 0 values.
    
    Returns:
        list: The embeddings for the given text.
    """
    if mock:
        return [0] * 100
    
    if text:
        text = text.replace("\n", " ")
        try:
            response = client.embeddings.create(
                input=[text],
                model=model
            )
            return response.data[0].embedding
        
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return None
    else:
        return None


In [2]:

os.environ['OPENAI_API_KEY'] = ''
client = OpenAI()
logger.success("OpenAI client connected")
logger.info(f'Generating embeddings for last news using ada from OpenAI')



[32m2024-08-13 18:35:15.953[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [32m[1mOpenAI client connected[0m
[32m2024-08-13 18:35:15.954[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mGenerating embeddings for last news using ada from OpenAI[0m


In [3]:
ticker = 'SYRS'
news = get_latest_news(ticker, source='yf')

[32m2024-08-13 18:35:16.608[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_latest_news[0m:[36m24[0m - [1mFetching latest news for SYRS from Yahoo! Finance[0m


In [4]:
news_df = pd.DataFrame(news)
news_df.iloc[1]

ticker                                                  SYRS
title      Cancer-Focused Syros Pharmaceuticals Stops Mid...
summary    Syros Pharmaceuticals Inc. (NASDAQ:SYRS) stock...
url        https://finance.yahoo.com/news/cancer-focused-...
time                         Tue, 13 Aug 2024 13:41:17 +0000
Name: 1, dtype: object

In [5]:
news_ready = news[1]['summary']
news_ready

'Syros Pharmaceuticals Inc. (NASDAQ:SYRS) stock is trading lower on Tuesday after the company issued an update on the SELECT-AML-1 Phase 2 trial. The company announced that it will discontinue enrollment in the SELECT-AML-1 Phase 2 trial evaluating the triplet regimen of tamibarotene in combination with venetoclax and azacitidine compared to the doublet regimen of venetoclax and azacitidine in newly diagnosed, unfit patients with acute myeloid leukemia (AML) and RARA gene overexpression. AbbVie I'

In [6]:
embedding = get_gpt_embeddings(client, news_ready, model='text-embedding-ada-002')

In [7]:
embedding = np.array(embedding).reshape(1, -1)
print(embedding.shape)

(1, 1536)


In [8]:
from tensorflow.keras.models import load_model

inference_model = load_model('data/inference/in/input/feedforward.keras')

2024-08-13 18:36:24.870182: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
inference_model.input_shape

(None, 1536)

In [10]:

prediction = inference_model.predict(embedding)

prediction

I0000 00:00:1723588609.904492  599913 service.cc:145] XLA service 0x7fbe05ece520 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723588609.904548  599913 service.cc:153]   StreamExecutor device (0): Host, Default Version
2024-08-13 18:36:49.905059: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-08-13 18:36:49.918665: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 925ms/step


I0000 00:00:1723588610.449455  599913 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


array([[0.52878493, 0.5355394 , 0.5704622 , 0.5545112 , 0.5535886 ,
        0.55041087, 0.5709983 ]], dtype=float32)

In [11]:
embedding.shape

(1, 1536)

In [12]:
import tensorflow as tf
print(tf.__version__)

2.16.2
