In [38]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
import pandas as pd

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

def get_sentiment_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).detach().numpy()[0]
    return probs[2] - probs[0]  # Positive - Negative = Polarity score (c in [-1, 1])


In [39]:
import yfinance as yf

def simulate_price_paths(symbol, S0, mu, sigma, T=1, steps=45, n_paths=10000):
    """
    Simulate multiple price paths for a given financial asset using the Geometric Brownian Motion model.

    Parameters:
        symbol (str): The ticker symbol of the asset.
        S0 (float): The initial price of the asset.
        mu (float): The expected return (drift) of the asset.
        sigma (float): The volatility of the asset.
        T (float, optional): The total time period for the simulation. Default is 1.
        steps (int, optional): The number of time steps in the simulation. Default is 45.
        n_paths (int, optional): The number of simulated paths. Default is 10000.

    Returns:
        np.ndarray: A 2D array where each column represents a simulated price path over time.
    """
    dt = T / steps
    paths = np.zeros((steps, n_paths))
    paths[0] = S0
    for t in range(1, steps):
        Z = np.random.standard_normal(n_paths)
        paths[t] = paths[t-1] * np.exp((mu - 0.5 * sigma**2) * dt + sigma * np.sqrt(dt) * Z)
    return paths

def get_ST_from_sentiment(paths, c, S0):
    if c >= 0:
        return S0 + (paths[-1].max() - S0) * c
    else:
        return S0 - (S0 - paths[-1].min()) * abs(c)


In [40]:
import numpy as np
symbol = "AAPL"
S0 = 150  # Initial stock price
mu = 0.05  # Expected return
sigma = 0.2  # Volatility
T = 1  # Time period in years
steps = 45  # Number of time steps
n_paths = 10000  # Number of simulated paths

paths = simulate_price_paths(symbol, S0, mu, sigma, T, steps, n_paths)
future_prices = get_ST_from_sentiment(paths=paths, c=0.008440747, S0=S0)
print(paths)
print(future_prices)

[[150.         150.         150.         ... 150.         150.
  150.        ]
 [158.97232217 146.00195417 152.06671276 ... 154.96344805 143.66116566
  145.33651203]
 [157.68881561 146.0947992  157.98361789 ... 162.3371403  147.92634404
  144.56278117]
 ...
 [149.48048844 235.73604098 180.31713422 ... 127.85185074 145.22726352
  148.11969146]
 [147.15086314 227.32540181 174.40246761 ... 129.19321513 153.03342225
  148.45172816]
 [141.47043401 218.64473497 179.15358751 ... 138.24751324 151.62402925
  150.63916576]]
151.62274766154147


In [41]:
def calculate_yield(ST, S0):
    return np.log(ST / S0)


In [42]:
from pypfopt.black_litterman import BlackLittermanModel, market_implied_prior_returns
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.expected_returns import mean_historical_return

def apply_black_litterman(prices_df, sentiment_views, tau=0.05):
    S = CovarianceShrinkage(prices_df).ledoit_wolf()
    mu = mean_historical_return(prices_df)
    market_weights = np.array([1/len(prices_df.columns)] * len(prices_df.columns))
    delta = market_weights.T @ mu.values
    pi = market_implied_prior_returns(market_weights, S, delta)
    
    P = np.eye(len(sentiment_views))
    Q = np.array([sentiment_views[s] for s in prices_df.columns])
    
    bl = BlackLittermanModel(S, pi=pi, Q=Q, P=P, omega="idzorek", tau=tau)
    ret_bl = bl.bl_returns()
    cov_bl = bl.bl_cov()
    
    ef = EfficientFrontier(ret_bl, cov_bl)
    weights = ef.max_sharpe()
    return ef.clean_weights()


In [43]:
import feedparser
import re

def fetch_articles_from_google_news(ticker, max_articles=5):
    rss_url = f"https://news.google.com/rss/search?q={ticker}+stock"
    feed = feedparser.parse(rss_url)
    articles = []

    for entry in feed.entries[:max_articles]:
        title = entry.title
        link = entry.link
        articles.append({"title": title, "link": link})

    return articles


In [44]:
import yfinance as yf

def fetch_yfinance_news(ticker, max_entries=5):
    stock = yf.Ticker(ticker)
    news = stock.news[:max_entries]
    return [{"title": item['content']['title'], "link": item['content']['canonicalUrl']['url']} for item in news]


In [45]:
ticker = "AAPL"

print("🔎 Google News:")
articles = fetch_articles_from_google_news(ticker)
for a in articles:
    print(f"• {a['title']} → {a['link']}")

print("\n📈 Yahoo Finance News:")
reports = fetch_yfinance_news(ticker)
for r in reports:
    print(f"• {r['title']} → {r['link']}")


🔎 Google News:
• Is Apple Inc. (NASDAQ:AAPL) the Best Tech Stock to Buy For Long-Term Investment? - Yahoo Finance → https://news.google.com/rss/articles/CBMifEFVX3lxTE5INlM5aHRlSWdyd3lxTW5qUW5zUXl4anY5T1V4TURtRXQzTVV0dGphSHZEdTVhcHB5Szk4R0pOcnJRZmpqcU1POEFGcndlTURjank5cXdLVy1XX1M5cmFKdGlwc0R4UmFGSllJMVlaRzAyV1RIWEw2N0o2Tm8?oc=5
• Apple Was The Top Smartphone Seller In Q1. Does That Make AAPL Stock A Buy Here? - Barchart.com → https://news.google.com/rss/articles/CBMiwAFBVV95cUxNeDFlVnFVVXVDVkdFMWJHMU16cEpvRGlQOEtzRlo4ZkdUZ2ZnenZWSWhwd0NXcGpuZi12S1dQVDJuX25FVmVSVmZqeE5WTmVON2lidE9YenJfU2dwS2Q1Qjd2SXE1ZjhSUmFyazVlWERhTmhQUGJlTm1UWW10cFoxRzFwZEY5a1pVNGllRUR4bWRBWndESG9kdl9mRl9JT3hibnJnRXZLeTZ3UGRkc2puUDdhZldZU2pZQ195bGlLNVg?oc=5
• Magnificent Seven Stocks Watch: Nvidia, Tesla Sell Off - Investor's Business Daily → https://news.google.com/rss/articles/CBMie0FVX3lxTFB6UE0zMU96cW1vdmN1cE1RQjd4RzlqbFVycEk2VlVNRFY3aDFmU1E2eGYyQi1XV09zTEF3SnFaZVMtMWJiV3VzVmtrSy1KNXZZYUctTXgxOFNJRkFRaFJiUkZHSjBa

In [46]:
from newspaper import Article

def extract_article_text(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except:
        return None


In [47]:
def extract_text(reports):
    for r in reports:
        link = r.get('link')
        if link:
            try:
                r["text"] = extract_article_text(link)
            except Exception as e:
                r["text"] = None
                print(f"Failed to extract text from {link}: {e}")
    return reports


print(extract_text(reports))

[{'title': 'The Best Warren Buffett Stocks to Buy With $1,000 Right Now', 'link': 'https://www.fool.com/investing/2025/04/18/the-best-warren-buffett-stocks-to-buy-with-1000-ri/?source=eptyholnk0000202&utm_source=yahoo-host-full&utm_medium=feed&utm_campaign=article&referring_guid=0fc538b4-b833-4b06-9d67-ad1ef2430468', 'text': 'The Oracle of Omaha has already done the hard work for you. Just follow his lead.\n\nGot an extra $1,000 you\'re ready to put to work for a while but don\'t know what to buy? Don\'t make it complicated. Borrow a pick or two -- or more -- from the market\'s most proven stock picker. That\'s Warren Buffett, of course, chief investment guru of Berkshire Hathaway (BRK.A 0.82%) (BRK.B 0.39%), which regularly outshines the S&P 500\'s long-term performance. Credit Buffett\'s stock-picking prowess, mostly.\n\nWith that as the backdrop, here\'s a closer look at three Buffett/Berkshire holdings that would be good all-around picks for nearly investor.\n\nApple\n\nIt\'s such 

In [48]:
for r in reports:
    text = r.get("text")
    if text is not None:
        print(get_sentiment_score(text=text))

0.053710118
0.11429344
0.6469492
-0.703268


In [49]:
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


In [None]:
from sklearn.preprocessing import MinMaxScaler
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.algorithms.moo.moead import MOEAD
from pymoo.algorithms.moo.nsga3 import NSGA3
from pymoo.core.problem import Problem
from pymoo.optimize import minimize
from pymoo.util.ref_dirs import get_reference_directions

def predict_with_sentiment_ann(models, data, sentiment_scores, duration="1y"):
    logger.info("Running ANN + Sentiment-enhanced predictions")
    future_prices = {}
    duration_map = {"6m": 180, "1y": 365, "5y": 1825, "10y": 3650}
    prediction_days = duration_map.get(duration, 365)

    for ticker, model in models.items():
        last_10_days = np.array(data[ticker][-10:]).reshape(1, -1)
        future_preds = []
        sentiment_factor = sentiment_scores.get(ticker, 0)

        for _ in range(prediction_days):
            predicted_price = model.predict(last_10_days)[0][0]

            # Modify prediction based on sentiment (polarity in [-1, 1])
            adjusted_price = predicted_price * (1 + 0.1 * sentiment_factor)
            future_preds.append(adjusted_price)

            last_10_days = np.roll(last_10_days, -1)
            last_10_days[0, -1] = adjusted_price

        future_prices[ticker] = future_preds
        logger.debug(f"{ticker} - Avg predicted price: {np.mean(future_preds):.2f} with sentiment {sentiment_factor:.2f}")

    return future_prices

def optimize_with_bl_and_moo(future_prices, prices_df, sentiment_scores, return_res):
    logger.info("Running hybrid Black–Litterman + NSGA3 optimization")

    # Step 1: Compute returns and risks
    tickers = list(future_prices.keys())
    ann_returns = np.array([np.mean(future_prices[t]) for t in tickers])
    ann_risks = np.array([np.std(future_prices[t]) for t in tickers])

    # Normalize returns and risks
    scaler = MinMaxScaler()
    norm_returns = scaler.fit_transform(ann_returns.reshape(-1, 1)).flatten()
    norm_risks = scaler.fit_transform(ann_risks.reshape(-1, 1)).flatten()

    # Step 2: Black-Litterman with sentiment views
    S = CovarianceShrinkage(prices_df).ledoit_wolf()
    mu = mean_historical_return(prices_df)
    market_weights = pd.Series(1 / len(tickers), index=tickers)
    delta = 2.5
    pi = market_implied_prior_returns(cov_matrix=S, market_caps=market_weights, risk_aversion=delta)

    P = np.eye(len(tickers))
    Q = np.array([sentiment_scores[t] for t in tickers])
    view_confidences = np.clip(np.abs(Q), 0.1, 1.0)  # or use np.array([0.8] * len(Q))
    bl = BlackLittermanModel(
        S,
        pi=pi,
        Q=Q,
        P=P,
        omega="idzorek",
        view_confidences=view_confidences,
        tau=0.05
    )
    logger.debug("Sentiment Q vector: " + str(Q))
    logger.debug("View confidence: " + str(view_confidences))

    ret_bl = bl.bl_returns()
    cov_bl = bl.bl_cov()

    logger.debug("BL Posterior Returns: " + str(ret_bl.to_dict()))

    # Step 3: Multi-Objective Optimization
    class HybridPortfolioProblem(Problem):
        def __init__(self):
            super().__init__(n_var=len(tickers), n_obj=2, xl=0.0, xu=1.0)
            self.ret_bl = ret_bl.values
            self.risks = norm_risks

        def _evaluate(self, X, out, *args, **kwargs):
            ret = np.sum(X * self.ret_bl, axis=1)
            risk = np.sum(X * self.risks, axis=1)
            out["F"] = np.column_stack([-ret, risk])  # maximize return, minimize risk

    problem = HybridPortfolioProblem()
    ref_dirs = get_reference_directions("das-dennis", 2, n_partitions=12)
    logger.info("Running NSGA3 on hybrid portfolio problem")
    res = minimize(
        problem=problem,
        algorithm=NSGA3(ref_dirs),
        termination=("n_gen", 100),
        verbose=False
    )
    logger.debug("Optimization complete. Objectives shape: " + str(res.F.shape))

    best_solution = res.X[np.argmin(res.F[:, 1])]
    total = np.sum(best_solution)
    allocation = {ticker: weight / total for ticker, weight in zip(tickers, best_solution)}

    logger.info("Final allocation (top 3): " + str(dict(list(allocation.items())[:3])))
    if return_res:
        return allocation, dict(ret_bl), res
    else:
        return allocation, dict(ret_bl)


In [73]:
from pydantic import BaseModel
from typing import Dict, List
import tensorflow as tf
class SentimentRequest(BaseModel):
    tickers: List[str]
    duration: str = "1y"
    
def fetch_historical_data(tickers):
    data = {}
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        history = stock.history(period="5y")
        data[ticker] = history['Close'].values
    return data

def get_sentiment_scores_for_tickers(tickers):
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
    model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

    def extract_article_text(url):
        try:
            article = Article(url)
            article.download()
            article.parse()
            return article.text
        except:
            return None

    def score_text(text):
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1).detach().numpy()[0]
        return probs[2] - probs[0]

    scores = {}
    for ticker in tickers:
        articles = fetch_articles_from_google_news(ticker) + fetch_yfinance_news(ticker)
        articles = extract_text(articles)
        texts = [a.get("text") for a in articles if a.get("text")]

        if texts:
            sentiment_scores = [score_text(t) for t in texts]
            scores[ticker] = np.mean(sentiment_scores)  # average polarity
        else:
            scores[ticker] = 0.0  # neutral

    return scores
def train_ann(data):
    models = {}
    for ticker, prices in data.items():
        X, y = [], []
        for i in range(len(prices) - 10):
            X.append(prices[i:i+10])
            y.append(prices[i+10])
        X, y = np.array(X), np.array(y)
        
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation='relu', input_shape=(10,)),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X, y, epochs=50, verbose=0)
        models[ticker] = model
    return models
# def sentiment_optimized_allocation(request):
#     logger.info("Sentiment allocation requested")

#     tickers = request["tickers"]
#     print(tickers)
#     data = fetch_historical_data(tickers)
#     prices_df = pd.DataFrame({ticker: pd.Series(prices) for ticker, prices in data.items()})
#     print(f"Prices data \n{prices_df}")

#     sentiment_scores = get_sentiment_scores_for_tickers(tickers)
#     logger.debug(f"sentiment scores: {sentiment_scores}")
#     models = train_ann(data)
#     future_prices = predict_with_sentiment_ann(models, data, sentiment_scores, request["duration"])

#     allocation, ret_bl = optimize_with_bl_and_moo(future_prices, prices_df, sentiment_scores)

#     return {
#         "allocation": allocation,
#         "black_litterman_returns": ret_bl,
#         "sentiment_scores": sentiment_scores
#     }

import matplotlib.pyplot as plt
import io
import base64

def plot_pareto_front(res):
    F = res.F
    plt.figure(figsize=(8, 6))
    plt.scatter(F[:, 1], -F[:, 0], c='blue', label='Pareto Front')  # x = risk, y = return
    plt.xlabel("Risk (Objective 2)")
    plt.ylabel("Return (Objective 1)")
    plt.title("Pareto Front of Portfolio Optimization")
    plt.grid(True)
    plt.legend()
    
    # Save plot to base64
    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    plot_base64 = base64.b64encode(buf.read()).decode("utf-8")
    buf.close()
    plt.close()
    return plot_base64


def compute_portfolio_metrics(historical_prices_df: pd.DataFrame, weights: dict):
    returns_df = historical_prices_df.pct_change().dropna()
    weight_vector = np.array([weights[ticker] for ticker in returns_df.columns])
    portfolio_returns = returns_df @ weight_vector

    annualized_return = np.mean(portfolio_returns) * 252
    annualized_volatility = np.std(portfolio_returns) * np.sqrt(252)
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else 0

    downside_returns = portfolio_returns[portfolio_returns < 0]
    sortino_ratio = (np.mean(portfolio_returns) * 252) / (np.std(downside_returns) * np.sqrt(252)) if len(downside_returns) > 0 else 0

    cumulative = (1 + portfolio_returns).cumprod()
    peak = cumulative.cummax()
    drawdown = (cumulative - peak) / peak
    max_drawdown = drawdown.min()

    logger.info("Annualized Return: {:.2%}".format(annualized_return))
    logger.info("Annualized Volatility: {:.2%}".format(annualized_volatility))
    logger.info("Sharpe Ratio: {:.2f}".format(sharpe_ratio))
    logger.info("Sortino Ratio: {:.2f}".format(sortino_ratio))
    logger.info("Max Drawdown: {:.2%}".format(max_drawdown))

    return {
        "Annualized Return": annualized_return,
        "Volatility": annualized_volatility,
        "Sharpe Ratio": sharpe_ratio,
        "Sortino Ratio": sortino_ratio,
        "Max Drawdown": max_drawdown
    }

import requests
import os
def format_interpretation_prompt(allocation, sentiment_scores, bl_returns, future_prices):
    volatility = {t: round(np.std(p), 4) for t, p in future_prices.items()}

    prompt = (
        "Please interpret this portfolio optimization result:\n\n"
        f"Asset Allocation: {allocation}\n"
        f"Sentiment Scores: {sentiment_scores}\n"
        f"Black–Litterman Expected Returns: {bl_returns}\n"
        f"Predicted Volatility (ANN): {volatility}\n\n"
        "Provide a client-friendly interpretation in bullet points."
    )

    return prompt
def generate_interpretation_openrouter(prompt, model="mistralai/mistral-7b-instruct"):
    api_key = "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"

    # api_key = os.getenv("OPENROUTER_API_KEY")  # store securely or hardcode temporarily
    headers = {
        "Authorization": f"Bearer {api_key}",
        "HTTP-Referer": "http://localhost",  # or your frontend domain
        "Content-Type": "application/json"
    }

    body = {
        "model": model,
        "messages": [
            {
                "role": "system",
                "content": "You are a financial advisor creating easy-to-understand interpretations of portfolio optimization results."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    }

    response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=body)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        raise Exception(f"OpenRouter error: {response.text}")

def sentiment_optimized_allocation(request):
    logger.info("Sentiment allocation requested")

    tickers = request["tickers"]
    duration = request.get("duration", "1y")
    
    data = fetch_historical_data(tickers)
    prices_df = pd.DataFrame({ticker: pd.Series(prices) for ticker, prices in data.items()})

    sentiment_scores = get_sentiment_scores_for_tickers(tickers)
    logger.debug(f"Sentiment scores: {sentiment_scores}")

    models = train_ann(data)
    future_prices = predict_with_sentiment_ann(models, data, sentiment_scores, duration)

    # Run optimization and capture res for plotting
    allocation, ret_bl, res = optimize_with_bl_and_moo(future_prices, prices_df, sentiment_scores, return_res=True)

    # Portfolio metrics
    metrics = compute_portfolio_metrics(prices_df, allocation)

    # Pareto front
    pareto_plot_base64 = plot_pareto_front(res)

    # Explainability
    sorted_by_risk = sorted(future_prices.items(), key=lambda x: np.std(x[1]))
    lowest_risk_ticker = sorted_by_risk[0][0]
    logger.info(f"High weight in {lowest_risk_ticker} due to lowest predicted variance from ANN simulation")
    
    prompt = format_interpretation_prompt(allocation, sentiment_scores, ret_bl, future_prices)
    interpretation = generate_interpretation_openrouter(prompt)

    return {
        "allocation": allocation,
        "black_litterman_returns": ret_bl,
        "sentiment_scores": sentiment_scores,
        "metrics": metrics,
        "pareto_plot_base64": pareto_plot_base64,
        "explanation": f"High weight in {lowest_risk_ticker} due to lowest predicted variance from ANN simulation",
        "llm_interpretation": interpretation
    }


# req = SentimentRequest({"tickers": ["AAPL", "MSFT", "GOOGL"]})
sentiment_optimized_allocation({"tickers": ["AAPL", "MSFT", "GOOGL"], "duration": "1y"})

INFO:__main__:Sentiment allocation requested


INFO:__main__:Running ANN + Sentiment-enhanced predictions




INFO:__main__:Running hybrid Black–Litterman + NSGA3 optimization
INFO:__main__:Running NSGA3 on hybrid portfolio problem
INFO:__main__:Final allocation (top 3): {'AAPL': 3.610017474220966e-05, 'MSFT': 0.9999638947664377, 'GOOGL': 5.0588200965224175e-09}
INFO:__main__:Annualized Return: 19.45%
INFO:__main__:Annualized Volatility: 27.09%
INFO:__main__:Sharpe Ratio: 0.72
INFO:__main__:Sortino Ratio: 1.05
INFO:__main__:Max Drawdown: -37.15%
INFO:__main__:High weight in MSFT due to lowest predicted variance from ANN simulation


{'allocation': {'AAPL': 3.610017474220966e-05,
  'MSFT': 0.9999638947664377,
  'GOOGL': 5.0588200965224175e-09},
 'black_litterman_returns': {'AAPL': 0.2749268088263086,
  'MSFT': 0.287397992444769,
  'GOOGL': 0.36082800423115025},
 'sentiment_scores': {'AAPL': 0.23946203,
  'MSFT': 0.36165634,
  'GOOGL': 0.5122429},
 'metrics': {'Annualized Return': 0.1944585293341084,
  'Volatility': 0.27094251863661456,
  'Sharpe Ratio': 0.717711381412654,
  'Sortino Ratio': 1.05325712269274,
  'Max Drawdown': -0.3714771130127735},
 'pareto_plot_base64': 'iVBORw0KGgoAAAANSUhEUgAAAyAAAAJYCAYAAACadoJwAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAW+pJREFUeJzt3Qd8U9X///FPKdCyh6xSqiioCDIUviAgirIcX4aIoqggIrhQxteFAwRUnFhUBEUBF4JgRX+iCCIoCIpfEHDiYJUNKpQlo73/x+f0n3yTNCktbU9uktfz8YglNzf3nuSk9b5zVpzjOI4AAAAAgAXFbJwEAAAAABQBBAAAAIA1BBAAAAAA1hBAAAAAAFhDAAEAAABgDQEEAAAAgDUEEAAAAADWEEAAAAAAWEMAAQAAAGANAQQAAACANQQQAAAAANYQQAAAAABYQwABAAAA