In [None]:
!pip install 24_finance-0.0.0-py3-none-any.whl

In [None]:
from lib.llm.model import (
    model_api_client,
    make_impact_from_news,
    make_reasons_from_news,
    make_description_of_instrument,
    make_operational_countries,
    make_summary_from_news,
    make_title_from_news,
)
from lib.content_matching.matching import match_portfolio_and_news
from lib.scraping.scrap import extract_text_to_dataframe
import pandas as pd
from pathlib import Path

In [None]:
client = model_api_client()

In [None]:
news_data = spark.sql("""
                    SELECT * 
                    FROM hive_metastore.default.news_data 
                    WHERE Date >= DATE_SUB((SELECT MAX(Date) FROM hive_metastore.default.news_data), 7) 
                    AND Date <= (SELECT MAX(Date) FROM hive_metastore.default.news_data)
                    """).toPandas().dropna().reset_index(drop=True)
stock_df = spark.sql("select * from hive_metastore.default.stock_data WHERE stockID is not NULL").toPandas().dropna().reset_index(drop=True)
event_df = spark.sql("select * from hive_metastore.default.cameo_event").toPandas().dropna().reset_index(drop=True)

In [None]:
stock_df['operational_country'] = stock_df.apply(lambda row: make_operational_countries(row, client), axis=1)
stock_df['company_description'] = stock_df.apply(lambda row: make_description_of_instrument(row, client), axis=1)
stock_df = stock_df.explode('operational_country')

In [None]:
filtered_data = match_portfolio_and_news(news_data, stock_df)

In [None]:
news_df_processed = extract_text_to_dataframe(filtered_data, url_column = 'ArticleUrl', output_column = 'news_content')

In [None]:
news_df_processed['news_summary'] = news_df_processed.apply(lambda row: make_summary_from_news(row, client), axis=1)
news_df_processed['news_title'] = news_df_processed.apply(lambda row: make_title_from_news(row, client), axis=1)
news_df_processed['impact'] = news_df_processed.apply(lambda row: make_impact_from_news(row, client), axis=1)
news_df_processed['reasons'] = news_df_processed.apply(lambda row: make_reasons_from_news(row, client), axis=1)

In [None]:
output_file = Path().resolve().parent / 'data' / 'news_data_processed.csv'
news_df_processed.to_csv(output_file, index=False)

In [None]:
spark.createDataFrame(news_df_processed).write.mode("overwrite").saveAsTable("default.dashboard")