# Import Libraries

In [80]:
import polars as pl
import requests
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import pickle

# Fetch Current News

In [50]:
def build_endpoint(stock_ticker, api_key, start_date, end_date=None):
    """
    Constructs the API endpoint for fetching news data.
    """
    base_url = "https://www.alphavantage.co/query?function=NEWS_SENTIMENT"
    date_from = f"&time_from={start_date}T0130"
    date_to = f"&time_to={end_date}T0130" if end_date else ""
    limit = "&limit=1000"
    return f"{base_url}&tickers={stock_ticker}{date_from}{date_to}{limit}&apikey={api_key}"

def fetch_news_data(stock_ticker, api_key, start_date, end_date=None):
    """
    Makes an API request and returns the response data.
    """
    endpoint = build_endpoint(stock_ticker, api_key, start_date, end_date)
    response = requests.get(endpoint)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data: {response.status_code}")
        return None

def convert_to_dataframe(news_items):
    """
    Converts a list of news items into a Polars DataFrame and extracts the date.
    """
    news_items = news_items.get("feed", [])
    if news_items:
        df = pl.DataFrame(news_items)
        df = df.with_columns(pl.col("time_published").str.slice(0, 10).alias("date"))
        return df
    return pl.DataFrame()

def aggregate_news_data(stock_ticker_list, api_key, start_date, end_date=None):
    """
    Fetches and aggregates news data for multiple stock tickers.
    """
    df_list = []
    for stock_ticker in stock_ticker_list:
        news_data = fetch_news_data(stock_ticker, api_key, start_date, end_date)
        if news_data:
            df = convert_to_dataframe(news_data)
            df_list.append(df)

    return pl.concat(df_list, how='vertical') if df_list else pl.DataFrame()

def get_news(start_date, end_date):
    # Top stocks in S & P
    stock_ticker_list = ['MSFT', 'AAPL', 'NVDA', 'GOOG', 'AMZN', 'META', 'TSLA', 'LLY', 'JPM', 'WMT']
    
    stock_ticker_list = ['MSFT']
    
    
    api_key = input ("Please provide your Alpha Vantage API key :")
    news = aggregate_news_data(stock_ticker_list, api_key, start_date, end_date)
    
    return news

In [None]:
start_date = '20240424'
end_date = '20240425'
news_data = get_news(start_date, end_date)

In [52]:
news_data = news_data.to_pandas()

# Group the News Data - Based On Date

In [53]:
news_data.sort_values(by = 'time_published', inplace=True)

In [54]:
news_data = news_data[['date', 'title', 'summary']]

In [55]:
news_data.head()

Unnamed: 0,date,title,summary
83,20240424T0,Congress tells China: sell TikTok or we'll ban it,"J joined TikTok only two months ago, with a vi..."
82,20240424T0,Biden Set To Sign TikTok Ban Into Law After Se...,The U.S. Senate has passed a bill that could l...
81,20240424T0,PERION NETWORK SHAREHOLDER ALERT BY FORMER LOU...,"NEW ORLEANS, April 23, 2024 ( GLOBE NEWSWIRE )..."
80,20240424T0,US Market surges on robust earnings,Investors also awaited a slew of key U.S. econ...
79,20240424T0,6 Rules for a 'Show Me' Stock Market.,"Show me, don't tell me-it's the new mantra for..."


In [56]:
news_data['title'] = news_data['title']
news_data['summary'] = news_data['summary'] + ' \n\n'

In [57]:
news_data['cumulative_title'] = news_data.groupby('date')['title'].transform(lambda x: x.cumsum())
news_data['cumulative_summary'] = news_data.groupby('date')['summary'].transform(lambda x: x.cumsum())

In [58]:
news_data

Unnamed: 0,date,title,summary,cumulative_title,cumulative_summary
83,20240424T0,Congress tells China: sell TikTok or we'll ban it,"J joined TikTok only two months ago, with a vi...",Congress tells China: sell TikTok or we'll ban it,"J joined TikTok only two months ago, with a vi..."
82,20240424T0,Biden Set To Sign TikTok Ban Into Law After Se...,The U.S. Senate has passed a bill that could l...,Congress tells China: sell TikTok or we'll ban...,"J joined TikTok only two months ago, with a vi..."
81,20240424T0,PERION NETWORK SHAREHOLDER ALERT BY FORMER LOU...,"NEW ORLEANS, April 23, 2024 ( GLOBE NEWSWIRE )...",Congress tells China: sell TikTok or we'll ban...,"J joined TikTok only two months ago, with a vi..."
80,20240424T0,US Market surges on robust earnings,Investors also awaited a slew of key U.S. econ...,Congress tells China: sell TikTok or we'll ban...,"J joined TikTok only two months ago, with a vi..."
79,20240424T0,6 Rules for a 'Show Me' Stock Market.,"Show me, don't tell me-it's the new mantra for...",Congress tells China: sell TikTok or we'll ban...,"J joined TikTok only two months ago, with a vi..."
...,...,...,...,...,...
4,20240424T2,Melinda French Gates and her daughters love Ta...,Melinda French Gates and daughters sing along ...,Stock market today: US stocks mixed as traders...,Stock Market Today: US Stocks Mixed Ahead of G...
3,20240425T0,International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...,International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...
2,20240425T0,ServiceNow ( NOW ) Q1 2024 Earnings Call Tra...,NOW earnings call for the period ending March ...,International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...
1,20240425T0,"Bragar Eagel & Squire, P.C. Reminds Investors ...","NEW YORK, April 24, 2024 ( GLOBE NEWSWIRE ) --...",International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...


In [59]:
news_data['Date'] = news_data['date'].apply(lambda x : x[:4] + '-' + x[4:6] + '-' + x[6:8])
# news_data.drop(columns = ['date'], inplace =True)

In [62]:
required_date = (lambda x : x[:4] + '-' + x[4:6] + '-' + x[6:])(end_date)
print(required_date)
required_news_data = news_data[news_data['Date']==required_date]

2024-04-25


In [67]:
all_news_of_required_date = required_news_data.tail(1)

# Load Text Embedding Model

In [68]:
from sentence_transformers import SentenceTransformer

checkpoint = 'sentence-transformers/paraphrase-TinyBERT-L6-v2' # 'sentence-transformers/all-MiniLM-L6-v2'

embedding_model = SentenceTransformer(checkpoint)

def generate_embedding(x):
    return embedding_model.encode(x)



In [69]:
tqdm.pandas()

all_news_of_required_date['cumulative_title_emb'] = all_news_of_required_date['cumulative_title'].progress_apply(generate_embedding)
all_news_of_required_date['cumulative_summary_emb'] = all_news_of_required_date['cumulative_summary'].progress_apply(generate_embedding)

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 31.11it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_news_of_required_date['cumulative_title_emb'] = all_news_of_required_date['cumulative_title'].progress_apply(generate_embedding)
100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 39.81it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_news_of_required_date['cumulative_summary_emb'] = all_news_of_required_date['cumulative_summary'].progress_apply(generate_embedding)


In [70]:
all_news_of_required_date

Unnamed: 0,date,title,summary,cumulative_title,cumulative_summary,Date,cumulative_title_emb,cumulative_summary_emb
0,20240425T0,JEE Mains Session 2 Final Result 2024 Out HIGH...,JEE Main 2024 Session 2 Result HIGHLIGHTS: The...,International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...,2024-04-25,"[0.26864007, 0.055694774, -0.082368486, 0.0653...","[0.15465383, 0.022456273, 0.09477475, -0.05835..."


In [None]:
# Concatenate Features
series1 = all_news_of_required_date['cumulative_title_emb'].apply(lambda x: x.reshape(1, -1))
series2 = all_news_of_required_date['cumulative_summary_emb'].apply(lambda x: x.reshape(1, -1))

concatenated_features = np.concatenate([np.concatenate(list(series1.values), axis = 0), np.concatenate(list(series2.values), axis = 0)], axis = 1)

all_news_of_required_date['features'] = list(concatenated_features)

In [74]:
all_news_of_required_date

Unnamed: 0,date,title,summary,cumulative_title,cumulative_summary,Date,cumulative_title_emb,cumulative_summary_emb,features
0,20240425T0,JEE Mains Session 2 Final Result 2024 Out HIGH...,JEE Main 2024 Session 2 Result HIGHLIGHTS: The...,International Business Machines ( IBM ) Q1 2...,IBM earnings call for the period ending March ...,2024-04-25,"[0.26864007, 0.055694774, -0.082368486, 0.0653...","[0.15465383, 0.022456273, 0.09477475, -0.05835...","[0.26864007, 0.055694774, -0.082368486, 0.0653..."


In [77]:
final_df = all_news_of_required_date[['features', 'Date']]

# Load Model

In [81]:
model_base_dir = "../model"

# Load the model from the pickle file
with open(f"{model_base_dir}/random_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

In [82]:
X_test = list(final_df['features'])

In [85]:
y_pred = loaded_model.predict(X_test)

# 0 means closing price will fall, 1 means closing price will rise in near future
print(y_pred)

[0]
