In [53]:
from fastapi import FastAPI, Query
from typing import List
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from fastapi.middleware.cors import CORSMiddleware
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import Problem
from pymoo.optimize import minimize
from pymoo.termination import get_termination
from pymoo.operators.sampling.rnd import FloatRandomSampling
from pymoo.operators.crossover.sbx import SimulatedBinaryCrossover
from pymoo.operators.mutation.pm import PolynomialMutation
from sklearn.model_selection import train_test_split
from datetime import date
from fastapi import FastAPI, Query, HTTPException
from pydantic import BaseModel
from sklearn.preprocessing import MinMaxScaler
from fastapi.responses import JSONResponse

from pymoo.algorithms.moo.moead import MOEAD
from pymoo.algorithms.moo.nsga3 import NSGA3
from pymoo.core.problem import Problem
from pymoo.optimize import minimize
from pymoo.util.ref_dirs import get_reference_directions
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [54]:
class TickerRequest(BaseModel):
    tickers: list[str]

def fetch_historical_data(tickers):
    data = {}
    for ticker in tickers:
        stock = yf.Ticker(ticker)
        history = stock.history(period="5y")
        data[ticker] = history['Close'].values
    return data

def train_ann(data):
    models = {}
    for ticker, prices in data.items():
        X, y = [], []
        for i in range(len(prices) - 10):
            X.append(prices[i:i+10])
            y.append(prices[i+10])
        X, y = np.array(X), np.array(y)
        
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation='relu', input_shape=(10,)),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X, y, epochs=50, verbose=0)
        models[ticker] = model
    return models

def predict_future_prices(models, data, duration="1y"):
    future_prices = {}
    future_dates = {}
    duration_map = {"6m": 180, "1y": 365, "5y": 1825, "10y": 3650}
    prediction_days = duration_map.get(duration, 365)  # Default to 1 year if invalid input

    for ticker, model in models.items():
        last_10_days = np.array(data[ticker][-10:]).reshape(1, -1)
        future_preds = []
        future_dates[ticker] = []

        for i in range(prediction_days):
            predicted_price = model.predict(last_10_days)[0][0]
            future_preds.append(predicted_price)
            future_dates[ticker].append(f"Future Day {i+1}")
            last_10_days = np.roll(last_10_days, -1)
            last_10_days[0, -1] = predicted_price

        future_prices[ticker] = future_preds

    return future_prices

class PortfolioOptimization(Problem):
    def __init__(self, future_prices):
        super().__init__(n_var=len(future_prices), n_obj=2, xl=0, xu=1)
        self.tickers = list(future_prices.keys())
        self.returns = np.array([np.mean(future_prices[t]) for t in self.tickers])
        self.risks = np.array([np.std(future_prices[t]) for t in self.tickers])

        # Normalize returns and risks to prevent dominance
        scaler = MinMaxScaler()
        self.returns = scaler.fit_transform(self.returns.reshape(-1, 1)).flatten()
        self.risks = scaler.fit_transform(self.risks.reshape(-1, 1)).flatten()

        print("Normalized Returns:", self.returns, "Normalized Risks:", self.risks)

    def _evaluate(self, X, out, *args, **kwargs):
        returns = np.sum(X * self.returns, axis=1)
        risks = np.sum(X * self.risks, axis=1)
        out["F"] = np.column_stack([-returns, risks])  # Maximizing returns, minimizing risk


In [83]:
class SectorRequest(BaseModel):
    sector: str

def fetch_sector_data(sector):
    try:
        sector_data = yf.Sector(sector)
        industries_df = sector_data.industries
        # print(f"Industies_df : {industries_df.info()}")
        industries = {
            row["name"]: row["symbol"] for _, row in industries_df.iterrows()
        }
        # print(f"Industry data base: {industries}")
        return {
            "ticker": sector_data.ticker,
            "top_companies": sector_data.top_companies,
            "top_etfs": sector_data.top_etfs,
            "top_mutual_funds": sector_data.top_mutual_funds,
            "research_reports": sector_data.research_reports,
            "industries": industries
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error fetching sector data: {str(e)}")

def optimize_industries(industries):
    future_prices = {}
    for industry, symbol in industries.items():
        try:
            ticker = yf.Ticker(symbol)
            stock_prices = ticker.history(period="5y")["Close"].dropna().values
            if stock_prices.size > 0 and not np.isnan(stock_prices).all():
                future_prices[industry] = np.mean(stock_prices)
        except Exception:
            pass
    
    if not future_prices:
        raise HTTPException(status_code=500, detail="No valid stock data available for optimization.")
    
    problem = PortfolioOptimization(future_prices)
    ref_dirs = get_reference_directions("das-dennis", 2, n_partitions=12)
    algorithm = NSGA3(ref_dirs)
    res = minimize(problem, algorithm, termination=("n_gen", 100), verbose=False)
    top_indices = np.argsort(res.F[:, 1])[:5]
    return [list(future_prices.keys())[i] for i in top_indices]

def get_top_companies(industry_symbol):
    try:
        if not industry_symbol or "^" in industry_symbol:
            return []  # Skip invalid symbols
        industry_data = yf.Industry(industry_symbol)
        return industry_data.top_companies["symbol"].tolist()[:5] if industry_data.top_companies is not None else []
    except Exception:
        return []


def get_top_industries(sector):
    try:
        sector_data = fetch_sector_data(sector)
        # print(sector_data)
        # top_industries = optimize_industries(sector_data["industries"])
        return {"industry": sector_data["top_companies"]}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

In [84]:
get_top_industries("technology")

{'industry':                                                name      rating  market weight
 symbol                                                                        
 AAPL                                     Apple Inc.         Buy       0.199588
 MSFT                          Microsoft Corporation  Strong Buy       0.160057
 NVDA                             NVIDIA Corporation  Strong Buy       0.159307
 AVGO                                  Broadcom Inc.  Strong Buy       0.049895
 ORCL                             Oracle Corporation         Buy       0.024986
 CRM                                Salesforce, Inc.         Buy       0.015620
 CSCO                            Cisco Systems, Inc.         Buy       0.014000
 IBM     International Business Machines Corporation         Buy       0.012790
 ACN                                   Accenture plc         Buy       0.011847
 PLTR                     Palantir Technologies Inc.        Hold       0.010966
 ADBE                       

In [4]:
def fetch_stock_data(ticker: str, start: str, end: str):
    stock = yf.download(ticker, start=start, end=end)
    return stock[['Open', 'High', 'Low', 'Close', 'Volume']]

def prepare_data(data):
    data['Target'] = data['Close'].shift(-1)
    data.dropna(inplace=True)
    X = data[['Open', 'High', 'Low', 'Close', 'Volume']].values
    y = data['Target'].values
    return X, y

def train_ann(X_train, y_train):
    X_train = np.array(X_train, dtype=np.float32)
    y_train = np.array(y_train, dtype=np.float32)
    model = Sequential([
        Dense(40, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(20, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16).prefetch(tf.data.AUTOTUNE)
    steps_per_epoch = len(X_train) // 16
    model.fit(dataset, epochs=50, steps_per_epoch=steps_per_epoch, verbose=0)
    return model

class PortfolioOptimization(Problem):
    def __init__(self, returns, risks):
        super().__init__(n_var=len(returns), n_obj=1, n_constr=1, xl=0.0, xu=1.0)
        self.returns = np.array(returns)
        self.risks = np.array(risks)
    
    def _evaluate(self, X, out, *args, **kwargs):
        portfolio_return = np.dot(X, self.returns)  # Shape: (100,)
        
        # Compute portfolio variance correctly
        portfolio_risk = np.sqrt(np.einsum('ij,jk,ik->i', X, self.risks, X))  # ✅ Fix: Proper matrix multiplication

        out["F"] = -portfolio_return / portfolio_risk  # Maximizing return/risk ratio
        out["G"] = np.sum(X, axis=1) - 1  # Constraint: Sum of weights should be 1


def optimize_portfolio(returns, risks):
    problem = PortfolioOptimization(returns, risks)
    algorithm = NSGA2(
        pop_size=100,
        sampling=FloatRandomSampling(),
        crossover=SimulatedBinaryCrossover(prob_var=0.9, eta=15),
        mutation=PolynomialMutation(prob=0.2, eta=20)
    )
    res = minimize(problem, algorithm, termination=get_termination("n_gen", 50), verbose=False)
    return res.X[np.argmin(res.F)]

In [None]:
def predict_stocks(tickers: List[str] = Query(...), start: str = "2023-01-01", end: str = date.today()):
    all_predictions = {}
    stock_returns = []
    stock_risks = []
    
    for ticker in tickers:
        data = fetch_stock_data(ticker, start, end)
        X, y = prepare_data(data)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        model = train_ann(X_train, y_train)
        predictions = model.predict(X[-10:]).flatten()
        all_predictions[ticker] = predictions.tolist()
        stock_returns.append(np.mean(predictions))
        stock_risks.append(np.std(predictions))
    
    return {"predictions": all_predictions, "returns": stock_returns, "risks": stock_risks}

print(predict_stocks(["AAPL", "MSFT", "BRK-B", "SCHD"]))

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed


{'predictions': {'AAPL': [68294.75, 90665.75, 75591.125, 122707.0, 103324.5, 122463.5, 93376.5, 111501.5, 73497.5, 73755.0], 'MSFT': [-189303.5625, -265678.6875, -241654.28125, -210564.46875, -221958.046875, -277349.09375, -264189.40625, -248682.59375, -279931.90625, -272899.0625], 'BRK-B': [-31012.6953125, -25916.9453125, -23500.1328125, -18753.6953125, -26669.6015625, -26482.2265625, -22836.4296875, -27100.5859375, -19675.8671875, -21417.0234375], 'SCHD': [-8472.7353515625, -9449.5009765625, -7513.18896484375, -7072.87646484375, -7653.98583984375, -6904.28271484375, -6352.35302734375, -9854.4228515625, -7743.01708984375, -7957.00146484375]}, 'returns': [93517.71, -247221.1, -24336.521, -7897.336], 'risks': [19663.547, 29385.63, 3593.3179, 1041.9939]}


In [11]:
def optimize(tickers: List[str] = Query(...), start: str = "2023-01-01", end: str = "2025-01-01"):
    prediction_results = predict_stocks(tickers, start, end)
    returns = prediction_results["returns"]

    # Fetch stock close prices and compute log returns
    historical_data = []
    for ticker in tickers:
        stock_prices = fetch_stock_data(ticker, start, end)["Close"].pct_change().dropna().values
        if len(stock_prices) > 0:  # Ensure non-empty series
            historical_data.append(stock_prices)
    
    # Ensure we have at least two stocks with valid data
    if len(historical_data) < 2:
        raise ValueError("Not enough valid stock data for optimization.")

    # Find the minimum available time points across all stocks
    min_length = min(len(data) for data in historical_data)

    # Truncate all time series to the minimum length
    historical_data = np.array([data[:min_length] for data in historical_data])
    print(historical_data.shape)
    print(historical_data)
    historical_data = historical_data.flatten(2)

    # Ensure data is in (num_assets, num_time_points) shape for np.cov()
    # historical_data = historical_data.T  # ✅ Transpose to match required shape

    # Compute covariance matrix (num_assets x num_assets)
    risks = np.cov(historical_data, rowvar=False)  # ✅ Proper covariance calculation

    # Handle potential NaNs in covariance matrix
    if np.isnan(risks).any():
        raise ValueError("NaN values detected in risk matrix. Check stock data.")

    best_allocation = optimize_portfolio(returns, risks)
    return {"best_portfolio": dict(zip(tickers, best_allocation.tolist()))}




print(optimize(["AAPL", "MSFT", "BRK-B", "SCHD"]))

[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


(4, 501, 1)
[[[ 0.01031438]
  [-0.01060463]
  [ 0.03679383]
  ...
  [-0.01324213]
  [-0.01326343]
  [-0.00705789]]

 [[-0.04374322]
  [-0.02963781]
  [ 0.01178528]
  ...
  [-0.01730156]
  [-0.01323952]
  [-0.0078384 ]]

 [[ 0.01497204]
  [-0.00524557]
  [ 0.01850434]
  ...
  [-0.0055981 ]
  [-0.00952883]
  [ 0.00247699]]

 [[ 0.00994174]
  [-0.00590636]
  [ 0.02495382]
  ...
  [-0.00434943]
  [-0.0087368 ]
  [ 0.00330518]]]


TypeError: order must be str, not int

In [None]:
def optimize(tickers: List[str] = Query(...), start: str = "2023-01-01", end: str = date.today()):
    prediction_results = predict_stocks(tickers, start, end)
    returns = prediction_results["returns"]
    print(returns)
    # Fetch stock close prices and compute returns
    historical_data = []
    for ticker in tickers:
        stock_prices = fetch_stock_data(ticker, start, end)["Close"].pct_change().dropna().values
        historical_data.append(stock_prices)
    print("Historical data", historical_data)

    # Ensure all time series have the same length by truncating to the shortest
    min_length = min(len(data) for data in historical_data)
    historical_data = [data[:min_length] for data in historical_data]

    # Convert to a 2D NumPy array (assets as rows, time points as columns)
    historical_data = np.vstack(historical_data)  # ✅ Fix: Stack into a proper 2D array
    # Compute covariance matrix (num_assets x num_assets)
    risks = np.cov(historical_data)  # ✅ Fix: Now correctly formatted
    print(risks)
    best_allocation = optimize_portfolio(returns, risks)
    return {"best_portfolio": dict(zip(tickers, best_allocation.tolist()))}


print(optimize(["AAPL", "MSFT", "BRK-B", "SCHD"]))

[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed


[-185599.11, -205155.72, -35710.87, -38425.82]


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
  risks = np.cov(historical_data)  # ✅ Fix: Now correctly formatted
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


Historical data [array([[ 1.03142521e-02],
       [-1.06045712e-02],
       [ 3.67940804e-02],
       [ 4.08877162e-03],
       [ 4.45651456e-03],
       [ 2.11119261e-02],
       [-5.99108449e-04],
       [ 1.01190963e-02],
       [ 8.75646110e-03],
       [-5.36996810e-03],
       [ 4.43757985e-04],
       [ 1.92207930e-02],
       [ 2.35003780e-02],
       [ 1.00631220e-02],
       [-4.70082327e-03],
       [ 1.48031291e-02],
       [ 1.36845674e-02],
       [-2.00780643e-02],
       [ 9.02078673e-03],
       [ 7.90082487e-03],
       [ 3.70627037e-02],
       [ 2.43997798e-02],
       [-1.79287753e-02],
       [ 1.92445386e-02],
       [-1.76525709e-02],
       [-6.91164288e-03],
       [ 2.45626665e-03],
       [ 1.88068056e-02],
       [-4.22498469e-03],
       [ 1.39033956e-02],
       [-1.04292766e-02],
       [-7.54680278e-03],
       [-2.66798320e-02],
       [ 2.89620779e-03],
       [ 3.29047161e-03],
       [-1.80053818e-02],
       [ 8.24768521e-03],
       [-3.44784471e-

ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (100,4)->(100,4,newaxis) (2140,2140)->(2140,2140) (100,4)->(100,newaxis,4) 