# Forecaster and Portfolio Manager

In [None]:
import os
import re
import time
import json
import random
import finnhub
import torch
import gradio as gr
import pandas as pd
import yfinance as yf
from pynvml import *
from peft import PeftModel
from collections import defaultdict
from datetime import date, datetime, timedelta
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer


os.environ["HF_TOKEN"] = ""

os.environ["FINNHUB_API_KEY"] = ""

access_token = os.environ["HF_TOKEN"]


finnhub_client = finnhub.Client(api_key=os.environ["FINNHUB_API_KEY"])

base_model = AutoModelForCausalLM.from_pretrained(
    '/checkpoints/Llama-2-7b-chat-hf',
    token=access_token,
    trust_remote_code=False, 
    device_map="auto",
    torch_dtype=torch.float16, 
)
model_strategy = AutoModelForCausalLM.from_pretrained(
    '/FinGPT_Strategy/finetuned_models/strategy-train_202411251624/checkpoint-104',
    trust_remote_code=False,
    device_map="auto",
    torch_dtype=torch.float16,  
    token = "",
    offload_folder="/FinGPT_Strategy/offload/" 
)

model_strategy = model_strategy.eval()

model_forecast = AutoModelForCausalLM.from_pretrained(
    '/FinGPT_Forecaster/finetuned_models/local-test_202411211516/checkpoint-88',
    trust_remote_code=False,
    device_map="auto",
    torch_dtype=torch.float16,  
    token = "",
)

model_forecast = model_forecast.eval()

tokenizer_strategy = AutoTokenizer.from_pretrained(
    '/FinGPT_Strategy/finetuned_models/strategy-train_202411251624/checkpoint-104',
    token=access_token
)

tokenizer_forecast = AutoTokenizer.from_pretrained(
    '/FinGPT_Forecaster/finetuned_models/local-test_202411211516/checkpoint-88',
    token=access_token
)

streamer_strategy = TextStreamer(tokenizer_strategy)
streamer_forecast = TextStreamer(tokenizer_forecast)

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

SYSTEM_PROMPT_FORECAST = "You are a seasoned stock market analyst. Your task is to list the positive developments and potential concerns for companies based on relevant news and basic financials from the past weeks, then provide an analysis and prediction for the companies' stock price movement for the upcoming week. " \
    "Your answer format should be as follows:\n\n[Positive Developments]:\n1. ...\n\n[Potential Concerns]:\n1. ...\n\n[Prediction & Analysis]\nPrediction: ...\nAnalysis: ..."

SYSTEM_PROMPT_STRATEGY = "You are a seasoned stock market analyst. Your task is to construct a portfolio including weights of these stocks based  on relevant news and basic financials from the past weeks, then provide an explanation of why did you choose these weights. " \
    "Your answer format should be as follows:\n\n[Portfolio Weights]:\n1. ... (stock name) - ...% (weight)\n\n[Explanation]\n1. ...\n\n[Risk Management]\n1. ..."


2024-12-03 16:21:08.056289: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-03 16:21:08.130713: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733214068.180823  564351 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733214068.195600  564351 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-03 16:21:08.244755: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
def print_gpu_utilization():
    
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")  


def get_curday():
    
    return date.today().strftime("%Y-%m-%d")


def n_weeks_before(date_string, n):
    
    date = datetime.strptime(date_string, "%Y-%m-%d") - timedelta(days=7*n)

    return date.strftime("%Y-%m-%d")


def get_stock_data(stock_symbol, steps):

    stock_data = yf.download(stock_symbol, steps[0], steps[-1]) #step[0] is the start date, step[-1] is the end date

    
    
    if len(stock_data) == 0:
        raise gr.Error(f"Failed to download stock price data for symbol {stock_symbol} from yfinance!")
    
    dates, prices = [], []
    available_dates = stock_data.index.format() 
    #print(available_dates)
    
    for date in steps[:-1]: # 每天每个股票，如果有数据就加入，没有就跳过
        for i in range(len(stock_data)):
            if available_dates[i] >= date:
                prices.append(stock_data['Close'].iloc[i].values[0])
                dates.append(datetime.strptime(available_dates[i][:10], "%Y-%m-%d"))
                #print(dates)
                break

    dates.append(datetime.strptime(available_dates[-1][:10], "%Y-%m-%d"))
    #print(dates)
    prices.append(stock_data['Close'].iloc[-1].values[0])

    
    return pd.DataFrame({
        "Start Date": dates[:-1], "End Date": dates[1:],
        "Start Price": prices[:-1], "End Price": prices[1:]
    })


def get_news(symbol, data):
    
    news_list = []
    
    for end_date, row in data.iterrows():
        start_date = row['Start Date'].strftime('%Y-%m-%d')
        end_date = row['End Date'].strftime('%Y-%m-%d')
#         print(symbol, ': ', start_date, ' - ', end_date)
        time.sleep(1) # control qpm
        weekly_news = finnhub_client.company_news(symbol, _from=start_date, to=end_date)
        if len(weekly_news) == 0:
            raise gr.Error(f"No company news found for symbol {symbol} from finnhub!")
        weekly_news = [
            {
                "date": datetime.fromtimestamp(n['datetime']).strftime('%Y%m%d%H%M%S'),
                "headline": n['headline'],
                "summary": n['summary'],
            } for n in weekly_news
        ]
        weekly_news.sort(key=lambda x: x['date'])
        news_list.append(json.dumps(weekly_news))
    
    data['News'] = news_list
    
    return data


def get_company_prompt(symbol):

    profile = finnhub_client.company_profile2(symbol=symbol)
    if not profile:
        raise gr.Error(f"Failed to find company profile for symbol {symbol} from finnhub!")
        
    company_template = "[Company Introduction]:\n\n{name} is a leading entity in the {finnhubIndustry} sector. Incorporated and publicly traded since {ipo}, the company has established its reputation as one of the key players in the market. As of today, {name} has a market capitalization of {marketCapitalization:.2f} in {currency}, with {shareOutstanding:.2f} shares outstanding." \
        "\n\n{name} operates primarily in the {country}, trading under the ticker {ticker} on the {exchange}. As a dominant force in the {finnhubIndustry} space, the company continues to innovate and drive progress within the industry."

    formatted_str = company_template.format(**profile)
    
    return formatted_str


def get_prompt_by_row(symbol, row):

    start_date = row['Start Date'] if isinstance(row['Start Date'], str) else row['Start Date'].strftime('%Y-%m-%d')
    end_date = row['End Date'] if isinstance(row['End Date'], str) else row['End Date'].strftime('%Y-%m-%d')
    term = 'increased' if row['End Price'] > row['Start Price'] else 'decreased'
    head = "From {} to {}, {}'s stock price {} from {:.2f} to {:.2f}. Company news during this period are listed below:\n\n".format(
        start_date, end_date, symbol, term, row['Start Price'], row['End Price'])
    
    news = json.loads(row["News"])
    news = ["[Headline]: {}\n[Summary]: {}\n".format(
        n['headline'], n['summary']) for n in news if n['date'][:8] <= end_date.replace('-', '') and \
        not n['summary'].startswith("Looking for stock market analysis and research with proves results?")]

    basics = json.loads(row['Basics'])
    if basics:
        basics = "Some recent basic financials of {}, reported at {}, are presented below:\n\n[Basic Financials]:\n\n".format(
            symbol, basics['period']) + "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')
    else:
        basics = "[Basic Financials]:\n\nNo basic financial reported."
    
    return head, news, basics


def sample_news(news, k=5):
    
    return [news[i] for i in sorted(random.sample(range(len(news)), k))]


def get_current_basics(symbol, curday): # get the most recent basic financials

    basic_financials = finnhub_client.company_basic_financials(symbol, 'all')
    if not basic_financials['series']:
        raise gr.Error(f"Failed to find basic financials for symbol {symbol} from finnhub!")
        
    final_basics, basic_list, basic_dict = [], [], defaultdict(dict)
    
    for metric, value_list in basic_financials['series']['quarterly'].items():
        for value in value_list:
            basic_dict[value['period']].update({metric: value['v']})

    for k, v in basic_dict.items():
        v.update({'period': k})
        basic_list.append(v)
        
    basic_list.sort(key=lambda x: x['period'])
    
    for basic in basic_list[::-1]:
        if basic['period'] <= curday:
            break
            
    return basic
    

def get_all_prompts_online(symbol, data, curday, with_basics=True):

    company_prompt = get_company_prompt(symbol)

    prev_rows = []

    for row_idx, row in data.iterrows():
        head, news, _ = get_prompt_by_row(symbol, row)
        prev_rows.append((head, news, None))
        
    prompt = ""
    for i in range(-len(prev_rows), 0):
        prompt += "\n" + prev_rows[i][0]
        sampled_news = sample_news(
            prev_rows[i][1],
            min(5, len(prev_rows[i][1]))
        )
        if sampled_news:
            prompt += "\n".join(sampled_news)
        else:
            prompt += "No relative news reported."
        
    period = "{} to {}".format(curday, n_weeks_before(curday, -1))
    
    if with_basics:
        basics = get_current_basics(symbol, curday)
        basics = "Some recent basic financials of {}, reported at {}, are presented below:\n\n[Basic Financials]:\n\n".format(
            symbol, basics['period']) + "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')
    else:
        basics = "[Basic Financials]:\n\nNo basic financial reported."

    info = company_prompt + '\n' + prompt + '\n' + basics
    prompt = info + f"\n\nLet's guess your prediction for next week of {symbol} ({period}). " \
        "The prediction result need to be inferred from your analysis at the end, and thus not appearing as a foundational factor of your analysis." 
        
    return info, prompt


def construct_prompt_forecast(ticker, curday, n_weeks, use_basics):

    try:
        steps = [n_weeks_before(curday, n) for n in range(n_weeks + 1)][::-1]
    except Exception:
        raise gr.Error(f"Invalid date {curday}!")
        
    data = get_stock_data(ticker, steps)
    data = get_news(ticker, data)
    data['Basics'] = [json.dumps({})] * len(data)
    # print(data)
    
    info, prompt = get_all_prompts_online(ticker, data, curday, use_basics)
    
    prompt = B_INST + B_SYS + SYSTEM_PROMPT_FORECAST + E_SYS + prompt + E_INST
    # print(prompt)
    
    return info, prompt
# def get_all_prompts_online_strategy(portfolio, curday, n_weeks, with_basics=True):
#     all_prompt = ""
#     for stock in portfolio:
#         all_prompt += construct_prompt(stock, curday, n_weeks, with_basics)[1]
#     all_prompt = B_INST + B_SYS + SYSTEM_PROMPT_STRATEGY + E_SYS + all_prompt + E_INST
#     return all_prompt




def get_all_prompts_online_strategy(symbol_list, date, n_weeks, with_basics=True):

    info = ""
    for symbol in symbol_list:

        company_prompt = get_company_prompt(symbol)

        try:
            steps = [n_weeks_before(date, n) for n in range(n_weeks + 1)][::-1]
        except Exception:
            raise gr.Error(f"Invalid date {date}!")
        
        data = get_stock_data(symbol, steps)
        data = get_news(symbol, data)
        data['Basics'] = [json.dumps({})] * len(data)

        prev_rows = []

        for row_idx, row in data.iterrows():
            head, news, _ = get_prompt_by_row(symbol, row)
            prev_rows.append((head, news, None))
            
        prompt = ""
        for i in range(-len(prev_rows), 0):
            prompt += "\n" + prev_rows[i][0]
            sampled_news = sample_news(
                prev_rows[i][1],
                min(5, len(prev_rows[i][1]))
            )
            if sampled_news:
                prompt += "\n".join(sampled_news)
            else:
                prompt += "No relative news reported."
            
        period = "{} to {}".format(date, n_weeks_before(date, -1))
        
        if with_basics:
            basics = get_current_basics(symbol, date)
            basics = "Some recent basic financials of {}, reported at {}, are presented below:\n\n[Basic Financials]:\n\n".format(
                symbol, basics['period']) + "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')
        else:
            basics = "[Basic Financials]:\n\nNo basic financial reported."

        info = info + '\n' + symbol + '\n' + company_prompt + '\n' + prompt + '\n' + basics
    prompt = info + f"\n\nBased on all the information before {date}, let's first make a portfolio with specific weights. "  \
                    f"Provide a summary analysis to support your prediction."
        
    return info, prompt

def process_prompt(prompt):
    paragraphs = prompt.split("\n\n")
        # 使用集合去重
    unique_paragraphs = set(paragraphs)
    # 将唯一段落重新组合成字符串
    result = "\n\n".join(unique_paragraphs)
    return result  # 示例操作：去除前后空格

def construct_prompt_strategy(symbol_list, curday, n_weeks, use_basics):
    # print(data)
    
    info, prompt = get_all_prompts_online_strategy(symbol_list, curday, n_weeks, use_basics)

    prompt = process_prompt(prompt)
    
    print(len(prompt))
    prompt = B_INST + B_SYS + SYSTEM_PROMPT_STRATEGY + E_SYS + prompt + E_INST
    # print(prompt)
    
    return info, prompt

def predict_strategy(tickers, date, n_weeks, use_basics):

    print_gpu_utilization()

    tickers = [ticker.strip() for ticker in tickers.split(',')]

    info, prompt = construct_prompt_strategy(tickers, date, n_weeks, use_basics)
    
      
    inputs = tokenizer_strategy(
        prompt, return_tensors='pt', padding=False
    )
    inputs = {key: value.to(model_strategy.device) for key, value in inputs.items()}

    print("Inputs loaded onto devices.")
        
    res = model_strategy.generate(
        **inputs, do_sample=True,
        #max_length=2048*10, 
        eos_token_id=tokenizer_strategy.eos_token_id,
        use_cache=True, streamer=streamer_strategy
    )
    output = tokenizer_strategy.decode(res[0], skip_special_tokens=True)
    answer = re.sub(r'.*\[/INST\]\s*', '', output, flags=re.DOTALL)

    torch.cuda.empty_cache()
    
    return info, answer, None, None

def predict_forecast(ticker, date, n_weeks, use_basics):

    print_gpu_utilization()

    info, prompt = construct_prompt_forecast(ticker, date, n_weeks, use_basics)
      
    inputs = tokenizer_forecast(
        prompt, return_tensors='pt', padding=False
    )
    inputs = {key: value.to(model_forecast.device) for key, value in inputs.items()}

    print("Inputs loaded onto devices.")
        
    res = model_forecast.generate(
        **inputs, max_length=2048*8, do_sample=True,
        eos_token_id=tokenizer_forecast.eos_token_id,
        use_cache=True, streamer=streamer_forecast
    )
    output = tokenizer_forecast.decode(res[0], skip_special_tokens=True)
    answer = re.sub(r'.*\[/INST\]\s*', '', output, flags=re.DOTALL)

    torch.cuda.empty_cache()
    
    return info, answer, None, None

# Sentiment Analysis

In [None]:
# import accelerate
# import bitsandbytes
import os
import re
import time
import json
import random
import finnhub
import torch
import gradio as gr
import pandas as pd
import yfinance as yf
from pynvml import *
from peft import PeftModel
from collections import defaultdict
from datetime import date, datetime, timedelta
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast   # 4.30.2
from peft import PeftModel  # 0.4.0
import torch

finnhub_client = finnhub.Client(api_key="")


sys.path.append('/project3_LLM/FinGPT/fingpt/FinNLP')

base_model_senti = "NousResearch/Llama-2-13b-hf" 
peft_model_senti = "FinGPT/fingpt-sentiment_llama2-13b_lora"
model_senti = LlamaForCausalLM.from_pretrained(base_model_senti, trust_remote_code=True, offload_folder="offload/",
                                               load_in_8bit = True,
                                            
                                               )

tokenizer_senti = AutoTokenizer.from_pretrained(base_model_senti, trust_remote_code=True)

model_senti = PeftModel.from_pretrained(model_senti, peft_model_senti,
                                        offload_folder="offload/")
model_senti = model_senti.eval()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def n_weeks_before(date_string, n):
    
    date = datetime.strptime(date_string, "%Y-%m-%d") - timedelta(days=7*n)

    return date.strftime("%Y-%m-%d")

def analyze_sentiment(news_list, tokenizer_senti, model_senti):
    results = []
    for news in news_list:
        # 构建提示词
        prompt = f"""Instruction: What is the sentiment of this news? Please choose an answer from {{negative/neutral/positive}}
Input: {news['headline']} {news['summary']}
Answer: """
        
        # 分词和编码
        tokens_senti = tokenizer_senti(prompt, return_tensors="pt", padding=True, max_length=512)
        
        # 调用情感分析模型
        outputs = model_senti.generate(**tokens_senti, max_length=512)
        sentiment = tokenizer_senti.decode(outputs[0], skip_special_tokens=True).split("Answer: ")[-1].strip().lower()
        
        # 检查情感结果是否有效
        if sentiment not in {"positive", "neutral", "negative"}:
            sentiment = "unknown"  
        
        # 保存结果
        results.append({"headline": news['headline'], "summary": news['summary'], "sentiment": sentiment})
    return results

def fetch_and_analyze_news(ticker, tokenizer_senti, model_senti,date, n_weeks):

    news = finnhub_client.company_news(ticker, _from=n_weeks_before(date, n_weeks), to=date)
    news = news[:20]  # 只取最近 20 条新闻
    
    # 进行情感分析
    analyzed_news = analyze_sentiment(news, tokenizer_senti, model_senti)
    
    # 初始化统计字典
    sentiment_stats = {"positive": 0, "neutral": 0, "negative": 0}
    for item in analyzed_news:
        if item["sentiment"] in sentiment_stats:  # 确保情感结果有效
            sentiment_stats[item["sentiment"]] += 1

    return sentiment_stats, analyzed_news


def predict_sentiment(ticker, date, n_weeks):

    sentiment_stats, analyzed_news = fetch_and_analyze_news(ticker, tokenizer_senti, model_senti, date, n_weeks)
    
    # 生成结果文本
    sentiment_report = f"Sentiment Analysis for {ticker}:\n"
    sentiment_report += f"Positive: {sentiment_stats['positive']} | Neutral: {sentiment_stats['neutral']} | Negative: {sentiment_stats['negative']}\n\n"
    sentiment_report += "Detailed News Sentiment:\n"
    for news in analyzed_news:
        sentiment_report += f"- [Headline]: {news['headline']}\n  [Sentiment]: {news['sentiment']}\n\n"
    
    labels = ['Positive', 'Neutral', 'Negative']
    counts = [sentiment_stats['positive'], sentiment_stats['neutral'], sentiment_stats['negative']]
    colors = ['#FCB44A', '#4DBB82', '#403292']
    
    data = {'Sentiment': labels, 'Count': counts}
    df = pd.DataFrame(data)
    
    sns.set(style="whitegrid")

    fig, ax = plt.subplots(figsize=(8, 6))
    
    sns.barplot(x='Sentiment', y='Count', data=df, palette=colors, ax=ax)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_title(f"Sentiment Analysis for {ticker}", fontsize=16)
    ax.set_xlabel('Sentiment', fontsize=12)
    ax.set_ylabel('Count', fontsize=12)

    return sentiment_report, None,None, fig


# Peer Comparison

In [None]:
import os
import re
import csv
import math
import time
import json
import random
import finnhub
import datasets
import pandas as pd
import numpy as np
import yfinance as yf
import torch
from datetime import date, datetime, timedelta
from collections import defaultdict
from datasets import Dataset
from openai import OpenAI
from finnhub.client import FinnhubAPIException  # 导入异常类
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from pynvml import *

finnhub_client = finnhub.Client(api_key="")
model = AutoModelForCausalLM.from_pretrained(
    '/finetuned_models/local-test_202412010011/checkpoint-24',
    #token=access_token,
    trust_remote_code=False, 
    device_map="auto",
    torch_dtype=torch.float16,
    #offload_folder="offload/"
)

model = model.eval()

tokenizer = AutoTokenizer.from_pretrained(
    '/finetuned_models/local-test_202412010011/checkpoint-24',
    #token=access_token
)

streamer = TextStreamer(tokenizer)

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.17s/it]


In [2]:
SYSTEM_PROMPT = "You are a professional stock financial analyst. Your task is to analyze a target company and its industry peers based on relevant news \nand basic financials for the last week, then provide company analysis, compare companies and determine the best-performing company. " \
   "Your answer format should be as follows:\n\n[Comparison Result (Best Company)]:...\n\n[Reasons]:1. ...\n\n[Peer Companies Analysis]:1. ...\n"
def print_gpu_utilization():
    
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

def get_curday():
    
    return date.today().strftime("%Y-%m-%d")

def n_weeks_before(date_string, n):
    
    date = datetime.strptime(date_string, "%Y-%m-%d") - timedelta(days=7*n)

    return date.strftime("%Y-%m-%d")


def get_stock_data(stock_symbol, steps):
    try:
        # 下载股票数据
        stock_data = yf.download(stock_symbol, start=steps[0], end=steps[-1], progress=False)
        if stock_data.empty:
            print(f"Warning: No data available for {stock_symbol}")
            return None  # 如果没有数据，返回 None

        dates, prices = [], []
        available_dates = stock_data.index.strftime("%Y-%m-%d").tolist()  # 格式化日期为字符串

        # 提取每个步骤的价格
        for date in steps[:-1]:
            matched = False
            for i, available_date in enumerate(available_dates):
                if available_date >= date:  # 找到最近的日期
                    prices.append(stock_data['Close'].iloc[i])  # 追加收盘价
                    dates.append(datetime.strptime(available_date, "%Y-%m-%d"))  # 转为 datetime
                    matched = True
                    break
            if not matched:  # 如果没有匹配到
                print(f"Warning: No price data available for {stock_symbol} at step {date}")
                prices.append(None)
                dates.append(datetime.strptime(date, "%Y-%m-%d"))

        # 添加最后一天的数据
        dates.append(datetime.strptime(available_dates[-1], "%Y-%m-%d"))
        prices.append(stock_data['Close'].iloc[-1])

        return pd.DataFrame({
            "Start Date": dates[:-1],
            "End Date": dates[1:],
            "Start Price": prices[:-1],
            "End Price": prices[1:]
        })
    except Exception as e:
        print(f"Error downloading data for {stock_symbol}: {e}")
        return None  # 捕获错误并返回 None

def get_news(symbol, data):
    
    news_list = []
    
    for end_date, row in data.iterrows():
        start_date = row['Start Date'].strftime('%Y-%m-%d')
        end_date = row['End Date'].strftime('%Y-%m-%d')
        print(symbol, ': ', start_date, ' - ', end_date)
        #time.sleep(1) # control qpm
        try:
            weekly_news = finnhub_client.company_news(symbol, _from=start_date, to=end_date)
            #print(len(weekly_news))
            weekly_news_delet = weekly_news[:math.ceil(len(weekly_news)*0.01)]
            #print(len(weekly_news_delet))
            weekly_news_delet = [
            {
                "date": datetime.fromtimestamp(n['datetime']).strftime('%Y%m%d%H%M%S') if n['datetime'] > 0 else 'Invalid Date',
                "headline": n['headline'],
                "summary": n['summary'],
            } for n in weekly_news_delet
            ]
            weekly_news_delet.sort(key=lambda x: x['date'])
            news_list.append(json.dumps(weekly_news_delet))
        except FinnhubAPIException as e:
            news_list=[json.dumps([])]
    
    data['News'] = news_list
    
    return data

def get_company_prompt(symbol):
    try:
        profile = finnhub_client.company_profile2(symbol=symbol)
        if not profile:
            raise gr.Error(f"Failed to find company profile for symbol {symbol} from finnhub!")
            
        company_template = "[Company Introduction]:\n\n{name} is a leading entity in the {finnhubIndustry} sector. Incorporated and publicly traded since {ipo}, the company has established its reputation as one of the key players in the market. As of today, {name} has a market capitalization of {marketCapitalization:.2f} in {currency}, with {shareOutstanding:.2f} shares outstanding." \
            "\n\n{name} operates primarily in the {country}, trading under the ticker {ticker} on the {exchange}. As a dominant force in the {finnhubIndustry} space, the company continues to innovate and drive progress within the industry."

        formatted_str = company_template.format(**profile)

    except FinnhubAPIException as e:
        formatted_str = None 
    
    
    return formatted_str


def get_prompt_by_row(symbol, row):

    start_date = row['Start Date'] if isinstance(row['Start Date'], str) else row['Start Date'].strftime('%Y-%m-%d')
    end_date = row['End Date'] if isinstance(row['End Date'], str) else row['End Date'].strftime('%Y-%m-%d')
    #term = 'increased' if row['End Price'] > row['Start Price'] else 'decreased'
    term = 'increased' if row['End Price'].iloc[0] > row['Start Price'].iloc[0] else 'decreased'
    #head = "From {} to {}, {}'s stock price {} from {:.2f} to {:.2f}. Company news during this period are listed below:\n\n".format(
    #    start_date, end_date, symbol, term, row['Start Price'], row['End Price'])
    head = "From {} to {}, {}'s stock price {} from {:.2f} to {:.2f}. Company news during this period are listed below:\n\n".format(
        start_date, end_date, symbol, term, row['Start Price'].iloc[0], row['End Price'].iloc[0])
    
    news = json.loads(row["News"])
    news = ["[Headline]: {}\n[Summary]: {}\n".format(
        n['headline'], n['summary']) for n in news if n['date'][:8] <= end_date.replace('-', '') and \
        not n['summary'].startswith("Looking for stock market analysis and research with proves results?")]

    basics = json.loads(row['Basics'])
    if basics:
        basics = "Some recent basic financials of {}, reported at {}, are presented below:\n\n[Basic Financials]:\n\n".format(
            symbol, basics['period']) + "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')
    else:
        basics = "[Basic Financials]:\n\nNo basic financial reported."
    
    return head, news, basics


def sample_news(news, k=5):
    
    return [news[i] for i in sorted(random.sample(range(len(news)), k))]


def get_current_basics(symbol, curday):

    basic_financials = finnhub_client.company_basic_financials(symbol, 'all')
    if not basic_financials['series']:
        raise gr.Error(f"Failed to find basic financials for symbol {symbol} from finnhub!")
        
    final_basics, basic_list, basic_dict = [], [], defaultdict(dict)
    
    for metric, value_list in basic_financials['series']['quarterly'].items():
        for value in value_list:
            basic_dict[value['period']].update({metric: value['v']})

    for k, v in basic_dict.items():
        v.update({'period': k})
        basic_list.append(v)
        
    basic_list.sort(key=lambda x: x['period'])
    
    for basic in basic_list[::-1]:
        if basic['period'] <= curday:
            break
            
    return basic
    

def get_all_prompts_online(symbol, data, curday, with_basics=True):

    company_prompt = get_company_prompt(symbol)
    if company_prompt is None:
        return None, None
    else:
        prev_rows = []

        for row_idx, row in data.iterrows():
            head, news, _ = get_prompt_by_row(symbol, row)
            prev_rows.append((head, news, None))
            
        prompt = ""
        for i in range(-len(prev_rows), 0):
            prompt += "\n" + prev_rows[i][0]
            sampled_news = sample_news(
                prev_rows[i][1],
                min(5, len(prev_rows[i][1]))
            )
            if sampled_news:
                prompt += "\n".join(sampled_news)
            else:
                prompt += "No relative news reported."
            
        period = "{} to {}".format(curday, n_weeks_before(curday, -1))
        
        if with_basics:
            basics = get_current_basics(symbol, curday)
            basics = "Some recent basic financials of {}, reported at {}, are presented below:\n\n[Basic Financials]:\n\n".format(
                symbol, basics['period']) + "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')
        else:
            basics = "[Basic Financials]:\n\nNo basic financial reported."

        info = company_prompt + '\n' + prompt + '\n' + basics
        prompt = info + f"\n\nBased on all the information before {curday}, let's first analyze the positive developments and potential concerns for {symbol}. Come up with 2-4 most important factors respectively and keep them concise. Most factors should be inferred from company related news. " \
            f"Then make your prediction of the {symbol} stock price movement for next week ({period}). Provide a summary analysis to support your prediction."
            
        return info, prompt


def construct_prompt(ticker, curday, n_weeks, use_basics):

    '''
    try:
        steps = [n_weeks_before(curday, n) for n in range(n_weeks + 1)][::-1]
    except Exception:
        raise gr.Error(f"Invalid date {curday}!")
        
    '''
    steps = [n_weeks_before(curday, n) for n in range(n_weeks + 1)][::-1]
    data = get_stock_data(ticker, steps)
    if data is None:
        return None, None
    else:
        data = get_news(ticker, data)
        data['Basics'] = [json.dumps({})] * len(data)
        # print(data)
        
        info, prompt = get_all_prompts_online(ticker, data, curday, use_basics)
        if info is None:
            return None, None
        else:
            #prompt = B_INST + B_SYS + SYSTEM_PROMPT + E_SYS + prompt + E_INST
            return info, prompt

def predict_compare(ticker, curday, use_basics=False):

   
    # 获取特定ticker的同行公司
    peers_lst = finnhub_client.company_peers(ticker)
    # system prompt
    #SYSTEM_PROMPT = "You are an expert financial analyst. Please provide a detailed comparison of the following companies."

    if not peers_lst:
        raise gr.Error(f"Failed to find peer companies for symbol {ticker} from finnhub!")

    # 获取目标公司（如TSLA）的分析
    company_info, company_prompt = construct_prompt(ticker, curday, n_weeks=1, use_basics=use_basics)
    
    # 初始化对比分析的prompt
    company_info += f"\n\nThe following are the information of peer companis:"
    comparison_prompt = f"Below is the analysis of {ticker}. Please compare this company with its peers and give your analysis on which one is the best based on the following dimensions:\n1. Financial performance\n2. Market trends and news\n3. Stock price movement prediction\n\n{company_prompt}\n\n"

    # 对每个同行公司进行分析，并构建对比prompt
    count = 0
    for peer in peers_lst[1:]:
        if count < 3:
            peer_info, peer_prompt = construct_prompt(peer, curday,n_weeks=1, use_basics=use_basics)
            if peer_info is None:
                continue
            company_info += f"\n\n{peer_info}"
            comparison_prompt += f"\n\n[Analysis of {peer}]:\n{peer_prompt}"
            count+=1
        else:
            break
        
    # 生成模型输入
    comparison_prompt = B_INST + B_SYS + SYSTEM_PROMPT + E_SYS + comparison_prompt + E_INST

    inputs = tokenizer(
        comparison_prompt, return_tensors='pt', padding=False
    )
    inputs = {key: value.to(model.device) for key, value in inputs.items()}

    print("Inputs loaded onto devices.")

    res = model.generate(
        **inputs, max_length=4096*30, do_sample=True,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=True
    )   
    output = tokenizer.decode(res[0], skip_special_tokens=True)
    answer = re.sub(r'.*\[/INST\]\s*', '', output, flags=re.DOTALL)

    torch.cuda.empty_cache()
    
    return company_info, answer, None, None

# Analyst Rating

In [None]:
import os
import re
import time
import json
import random
import finnhub
import torch
import gradio as gr
import pandas as pd
import yfinance as yf
from pynvml import *
from peft import PeftModel
from collections import defaultdict
from datetime import date, datetime, timedelta
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime



os.environ["FINNHUB_API_KEY"] = ""
finnhub_client = finnhub.Client(api_key=os.environ["FINNHUB_API_KEY"])


def print_gpu_utilization():
    
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")  

# Define the plot_recommendation_trends function
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import finnhub


# Define the plot_recommendation_trends function
def plot_recommendation_trends(symbol, n_months=4):
    data = finnhub_client.recommendation_trends(symbol) 

    # Extract data
    periods = [entry['period'] for entry in data][:n_months]
    buy = [entry['buy'] for entry in data][:n_months]
    hold = [entry['hold'] for entry in data][:n_months]
    sell = [entry['sell'] for entry in data][:n_months]
    strong_buy = [entry['strongBuy'] for entry in data][:n_months]
    strong_sell = [entry['strongSell'] for entry in data][:n_months]

    # Convert dates to month + year format
    periods_formatted = [datetime.strptime(period, '%Y-%m-%d').strftime('%B %Y') for period in periods]

    # Set background color and grid
    plt.figure(figsize=(10, 6))
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)

    # Plot the chart
    bar_width = 0.35
    index = np.arange(len(periods))

    bars5 = plt.bar(index, strong_sell, bar_width, label='Strong Sell', color='#8B0000')
    bars4 = plt.bar(index, sell, bar_width, bottom=strong_sell, label='Sell', color='#FF6347')
    bars3 = plt.bar(index, hold, bar_width, bottom=[i+j for i,j in zip(strong_sell, sell)], label='Hold', color='#FFA500')
    bars2 = plt.bar(index, buy, bar_width, bottom=[i+j+k for i,j,k in zip(strong_sell, sell, hold)], label='Buy', color='#008000')
    bars1 = plt.bar(index, strong_buy, bar_width, bottom=[i+j+k+l for i,j,k,l in zip(strong_sell, sell, hold, buy)], label='Strong Buy', color='#006400')

    # Add numbers to each bar
    for bar in bars1:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0,
                 strong_sell[bars1.index(bar)] + sell[bars1.index(bar)] + hold[bars1.index(bar)] + buy[bars1.index(bar)] + height / 2.0,
                 '%d' % int(height), ha='center', va='bottom', color='white')

    for bar in bars2:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0,
                 strong_sell[bars2.index(bar)] + sell[bars2.index(bar)] + hold[bars2.index(bar)] + height / 2.0,
                 '%d' % int(height), ha='center', va='bottom', color='white')

    for bar in bars3:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0,
                 strong_sell[bars3.index(bar)] + sell[bars3.index(bar)] + height / 2.0,
                 '%d' % int(height), ha='center', va='bottom', color='white')

    for bar in bars4:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0,
                 strong_sell[bars4.index(bar)] + height / 2.0,
                 '%d' % int(height), ha='center', va='bottom', color='white')

    for bar in bars5:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2.0,
                 height / 2.0,
                 '%d' % int(height), ha='center', va='bottom', color='white')

    # Set axis labels and title
    plt.xlabel('Period', fontsize=12)
    plt.ylabel('#Analysts', fontsize=12)
    plt.title(f'{symbol} Stock Recommendations Trends', fontsize=14)
    plt.xticks(index, periods_formatted)
    
    # Calculate the maximum value among all the bars and set y-axis limit slightly higher than the maximum value
    max_value = max([sum(x) for x in zip(strong_sell, sell, hold, buy, strong_buy)])
    plt.ylim(0, max_value * 1.1)  # Modify y-axis range to be slightly higher than the maximum value
    
    plt.legend()

    # Save the chart as an image file
    image_path = f"{symbol}_recommendation_trends.png"
    plt.savefig(image_path)

    # Output the number of analysts recommending buy, sell, etc. for each month
    output = ""
    for i in range(len(periods)):
        output += f"{periods_formatted[i]}: {strong_buy[i]} Strong Buy, {buy[i]} Buy, {hold[i]} Hold, {sell[i]} Sell, {strong_sell[i]} Strong Sell\n"
    
    return output, image_path

# predict
def predict_rating(ticker, n_months):
    
    print_gpu_utilization()
    
    recommend_output, image_path = plot_recommendation_trends(ticker, n_months)

    return recommend_output, None, image_path, None

2024-12-03 17:37:20.911233: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-03 17:37:20.926188: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733218640.943403  567104 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733218640.948483  567104 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-03 17:37:20.967020: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

# predict_final and demo

In [2]:


def predict_final(model_choice , ticker, date, n_weeks, use_latest_financials):
    if model_choice == "Forcaster":
        return predict_forecast(ticker, date, n_weeks, use_latest_financials)  # 传递 model_choice
    elif model_choice == "Portfolio Manager":
        return predict_strategy(ticker, date, n_weeks, use_latest_financials)
    elif model_choice == "Sentiment Analysis":
        return predict_sentiment(ticker, date, n_weeks)
    elif model_choice == "Peer Comparison":
        return predict_compare(ticker, date, use_latest_financials)
    elif model_choice == "Rating Analysis":
        return predict_rating(ticker, n_weeks)
    else:
        return "Please choose a model"


In [None]:
import gradio as gr

def update_inputs(selected_model):
    if selected_model == "Forcaster":
        show_choice = True
        show_ticker = True
        show_date = True
        show_n_weeks = True
        show_latest_financials = True
        n_weeks_label = "n_weeks"
    elif selected_model == "Portfolio Manager":
        show_choice = True
        show_ticker = True
        show_date = True
        show_n_weeks = True
        show_latest_financials = True
        n_weeks_label = "n_weeks"
    elif selected_model == "Peer Comparison":
        show_choice = True
        show_ticker = True
        show_date = True
        show_n_weeks = False
        show_latest_financials = True
        n_weeks_label = "n_weeks"
    elif selected_model == "Sentiment Analysis":
        show_choice = True
        show_ticker = True
        show_date = True
        show_n_weeks = True
        show_latest_financials = False
        n_weeks_label = "n_weeks"
    elif selected_model == "Rating Analysis":
        show_choice = True
        show_ticker = True
        show_date = False
        show_n_weeks = True
        n_weeks_label = "n_months"
        show_latest_financials = False

    return (gr.update(visible=show_choice),
            gr.update(visible=show_ticker),
            gr.update(visible=show_date),
            gr.update(visible=show_n_weeks, label=n_weeks_label),
            gr.update(visible=show_latest_financials))

def update_outputs(selected_model):
    show_information = True
    show_response = True
    show_image_output = False
    show_plot_output = False

    if selected_model == "Forcaster":
        show_information = True
        show_response = True
        show_image_output = False
        show_plot_output = False
    elif selected_model == "Portfolio Manager":
        show_information = True
        show_response = True
        show_image_output = False
        show_plot_output = False
    elif selected_model == "Peer Comparison":
        show_information = True
        show_response = True
        show_image_output = False
        show_plot_output = False
    elif selected_model == "Sentiment Analysis":
        show_information = True
        show_response = False
        show_image_output = False
        show_plot_output = True
    elif selected_model == "Rating Analysis":
        show_information = True
        show_response = False
        show_image_output = True
        show_plot_output = False

    return (gr.update(visible=show_information),
            gr.update(visible=show_response),
            gr.update(visible=show_image_output),
            gr.update(visible=show_plot_output))

with gr.Blocks() as demo:
    # Add the title inside the layout and add the description as Markdown
    gr.Markdown("""
    <h1 style="text-align: center;">FinGPT</h1>
                
    Our FinGPT is a kind of financial analyst assistant having five functions: 
                **Forcastor, Portfolio Manager, Peer Comparison, Sentiment Analysis, and Rating Analysis.**
    All the analysis are based on company profiles, market news and optional basic financials retrieved from **yfinance & finnhub**.
    The models are finetuned on Llama2-7b-chat-hf with LoRA on the past year's DOW30 market data. Inference in this demo uses fp16 and **welcomes any ticker symbol**.
    
    **Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Please use common sense and always first consult a professional before trading or investing.**
    """)

    with gr.Row():
        with gr.Column():
            prediction_model = gr.Dropdown(
                label="Model Choice",
                choices=["Forcaster", "Portfolio Manager", "Peer Comparison", "Sentiment Analysis", "Rating Analysis"],
                value="Forcaster",
                info="Choose the model you want to use",
            )

            ticker = gr.Textbox(label="Ticker", value="TSLA", info="Companies that you can get information from Finnhub")
            date_text = gr.Textbox(label="Date", value='2024-05-31', info="Date from which the prediction is made, use format yyyy-mm-dd")

            n_weeks_slider = gr.Slider(
                minimum=1,
                maximum=4,
                value=1,
                step=1,
                label="n_weeks",
                info="Information of the past n weeks will be utilized, choose between 1 and 4",
            )

            use_latest_financials = gr.Checkbox(label="Use Latest Basic Financials", value=False, info="If checked, the latest quarterly reported basic financials of the company are taken into account.")

            # Add callback for model choice
            prediction_model.change(fn=update_inputs, inputs=prediction_model, outputs=[prediction_model, ticker, date_text, n_weeks_slider, use_latest_financials])
            submit_button = gr.Button("Submit")


        with gr.Column():
            information = gr.Textbox(label="Information", visible=True)  # Default visible
            response = gr.Textbox(label="Response", visible=True)  # Default visible
            image_output = gr.Image(label="Output Image", visible=False)  # Default hidden
            plot_output = gr.Plot(label="Output Image", visible=False)

            # Add callback for model choice
            prediction_model.change(fn=update_outputs, inputs=prediction_model, outputs=[information, response, image_output, plot_output])

        submit_button.click(fn=predict_final, inputs=[prediction_model, ticker, date_text, n_weeks_slider, use_latest_financials], 
                            outputs=[information, response, image_output, plot_output])

demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://c1f9e9de28605bd58d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




GPU memory occupied: 4317 MB.
