# Setup

In [None]:
#Installing dependencies 
%pip install -U \
    pypdf \
    sentence-transformers \
    faiss-cpu \
    rank_bm25
%pip install -U langchain langchain-community langchain-core langchain-openai
%pip install -U langchain-huggingface
%pip install tf-keras
%pip install yfinance
%pip install "numpy<2.0" "packaging<24" "tenacity<9" "rich<14"

In [2]:
# Libraries
import yfinance as yf
import pandas as pd
import os
import json
import numpy as np
import openai

from IPython.display import display, Markdown

In [4]:
#Setting up my OpenAI API
from dotenv import load_dotenv
load_dotenv()
try:
    from google.colab import userdata
    api_key = userdata.get('OPENAI_API_KEY')
except ImportError:
    api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OPENAI_API_KEY not found")

In [6]:
# set the key in environment
os.environ['OPENAI_API_KEY'] = api_key
client = openai.OpenAI()

## 1. User portfolio data

In [9]:
#Creating a function to store user data
def save_user_portfolio(user_name, asset_name, units, avg_cost):
 
    user_name = user_name.lower()
   
    user_name = '_'.join(user_name.split())

   
    data_folder = 'data'
    if not os.path.exists(data_folder):
        os.makedirs(data_folder) 

   
    file_path = os.path.join(data_folder, f"{user_name}_portfolio.json")

   
    portfolio_data = {
        "asset_name": asset_name.upper(),  # Store asset name in uppercase for consistency
        "units": units,  # Store the number of units for the asset
        "avg_cost": avg_cost  # Store the average cost of the asset
    }


    if os.path.exists(file_path):
        with open(file_path, 'r') as file:
            try:
                
                existing_data = json.load(file)
            except json.JSONDecodeError:
               
                existing_data = {}
    else:
        
        existing_data = {}

    
    existing_data[asset_name.upper()] = portfolio_data

    
    with open(file_path, 'w') as file:
        json.dump(existing_data, file, indent=4) 


## 2. Portfolio analysis

In [75]:
#Function to pull the asset data
def get_stock_metrics(portfolio_data):
   
    
    assets = list(portfolio_data.keys())

    # Download adjusted closing prices for the assets over the past year
    price_data = yf.download(assets, period='1y')['Close']

   
    metrics = {}

    
    for i in range(len(assets)):
        
        stock_info = yf.Ticker(assets[i]).info

        # Store various stock metrics for the asset in the metrics dictionary
        metrics[assets[i]] = {
            "units_held": portfolio_data[assets[i]]['units'],  # Number of units held
            "avg_cost": portfolio_data[assets[i]]['avg_cost'],  # Average cost of the asset
            "current_price": stock_info.get('currentPrice', 'N/A'),  # Current market price
            "previous_close": stock_info.get('previousClose', 'N/A'),  # Previous closing price
            "52_week_high": stock_info.get('fiftyTwoWeekHigh', 'N/A'),  # 52-week high price
            "52_week_low": stock_info.get('fiftyTwoWeekLow', 'N/A'),  # 52-week low price
            "dividend_yield": stock_info.get('dividendYield', 'N/A'),  # Dividend yield percentage
            "market_cap": stock_info.get('marketCap', 'N/A'),  # Market capitalization of the company
            "pe_ratio": stock_info.get('trailingPE', 'N/A'),  # Price-to-Earnings ratio
            "eps": stock_info.get('trailingEps', 'N/A'),  # Earnings per share
        }

    # Return the historical price data and the calculated metrics for each stock
    return price_data, metrics


In [14]:
# Function to calculate expected returns and covariance
def calculate_returns(prices):
  try:
    # Calculate daily returns
    returns = prices.pct_change().dropna()  
    # Calculate expected returns (mean) for each asset
    expected_returns = returns.mean()
    # Calculate the covariance matrix for the returns
    covariance_matrix = returns.cov()
    return expected_returns, covariance_matrix

  except:
    # Only has 1 asset
    return None, None

In [16]:
#Function to analyse portfolio
def portfolio_analysis(metrics, expected_returns, covariance_matrix):
    # Define a system prompt to instruct the AI model on its role and approach
    system_prompt = """
    You are a portfolio manager responsible for analyzing and optimizing investment portfolios.
    Apply Modern Portfolio Theory where relevant, and use Earnings per Share (EPS) to achieve a balanced risk-return profile.
    Provide actionable insights to help users make informed investment decisions, focusing on practical recommendations rather than explaining portfolio metrics.
    """

    
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},  # Define the system's behavior and goals
            {"role": "user", "content": f"Portfolio: {metrics}.\nExpected returns:{expected_returns}, covariance matrix:{covariance_matrix}"}
        ]
    )

    # Return the content of the AI's response, which contains the portfolio analysis and insights
    return response.choices[0].message.content


In [18]:
#Function to provide portfolio summary
def portfolio_summary(user_name):
  user_name = user_name.lower()  
  user_name = '_'.join(user_name.split())  


  file_path = os.path.join('data', f"{user_name}_portfolio.json")

  
  if not os.path.exists(file_path):
      print(f"No portfolio found for user: {user_name}")
      return None, None, None

 
  with open(file_path, 'r') as file:
      portfolio_data = json.load(file)


  total_value = 0
  total_gain_loss = 0
  summary = []

  price_data, metrics = get_stock_metrics(portfolio_data)

  for symbol in metrics:
    units = metrics[symbol]["units_held"]
    avg_cost = metrics[symbol]['avg_cost']
    current_price = metrics[symbol]["current_price"]
    # Calculate current value and gain/loss
    current_value = units * current_price
    gain_loss = (current_price - avg_cost) * units

    # Update the total portfolio value and gain/loss
    total_value += current_value
    total_gain_loss += gain_loss

    summary.append({
              "asset_name": symbol,
              "units": units,
              "avg_cost": avg_cost,
              "current_price": current_price,
              "current_value": current_value,
              "gain_loss": gain_loss
          })

  # Calculate overall portfolio value and gain/loss
  overall_summary = {
      "total_value": total_value,
      "total_gain_loss": total_gain_loss,
      "assets": summary
  }

  return overall_summary, price_data, metrics


## 3. Tax advisor

In [22]:
# Libraries
from langchain_classic.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

from langchain_openai import ChatOpenAI
from langchain_classic.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_community.vectorstores.faiss import FAISS

2026-01-17 14:40:27.210281: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [23]:
# Load the tax law pdf
loader = PyPDFLoader("The Fiscal Code of Germany.pdf")
pages = loader.load_and_split()

In [24]:
# Initialize the BM25 retriever
bm25_retriever = BM25Retriever.from_documents(pages)
bm25_retriever.k =  2  # Retrieve top 2 results
# Use Hugging Face's Sentence Transformers model as the embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embed the chunks as vectors and load them into the FAISS database
faiss_db = FAISS.from_documents(pages, embedding_model)
faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 2})

# Initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever],
                                      weights=[0.4, 0.6])


In [25]:
def advisor(query, retriever):
    # Initialize a language model for conversation
    llm = ChatOpenAI()

    # Set up a question-answering chain using the provided retriever for fetching relevant information
    # RetrievalQA uses the LLM to answer queries based on the retrieved data
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

    # Use the QA chain to process the query and generate a response
    response = qa.invoke(query)

    # Return the result of the response, containing the answer to the query
    return response['result']


## App

In [27]:
#The main logic

def start():
  # Get the user's nickname
  user_name = input("Enter your nickname: ")

  # Prompt for user preference until a valid input is provided
  while True:
      pref = input("[1] update portfolio [2] evaluate portfolio [3] Consult tax law\n")
      if pref in ["1", "2", "3"]:
          break
      print("Invalid input. Please enter either 1, 2, or 3")

  # If the user chooses to update the portfolio
  if pref == "1":
      while True:
          asset_name = input("Enter the asset name: ")
          stock = yf.Ticker(asset_name)
          try:
              if len(stock.info) > 1:
                break
              else:
                print("Invalid input. Please enter a valid asset name.")
          except Exception as e:
              print(f"Error fetching data for {asset_name}: {e}. Please enter a valid asset name.")

      # Get the number of units
      while True:
          try:
              units = int(input("Enter the number of units: "))
              if units < 0:
                  print("Number of units cannot be negative. Please try again.")
                  continue
              break
          except ValueError:
              print("Invalid input. Please enter an integer value for the number of units.")

      # Get the average cost
      while True:
          try:
              avg_cost = float(input("Enter the average cost per unit (USD): "))
              if avg_cost < 0:
                  print("Average cost cannot be negative. Please try again.")
                  continue
              break
          except ValueError:
              print("Invalid input. Please enter a numeric value for the average cost.")

      # Save the updated portfolio data for the user
      save_user_portfolio(user_name, asset_name, units, avg_cost)

  # If the user chooses to analyze their portfolio
  elif pref == "2":
      # Get a summary of the portfolio, along with price data and detailed metrics
      summary, price_data, metrics = portfolio_summary(user_name)

      # If a summary exists, display the portfolio details
      if summary:
          print(f"\nPortfolio summary for {user_name}:")
          print(f"Total Portfolio Value: ${summary['total_value']:.2f}")  
          print(f"Total Gain/Loss: ${summary['total_gain_loss']:.2f}")  

          # Loop through each asset in the portfolio and display its details
          for asset in summary['assets']:
              print(f"Asset: {asset['asset_name']}, Units: {asset['units']}, "
                    f"Avg Cost: {asset['avg_cost']}, Current Price: {asset['current_price']}, "
                    f"Current Value: {asset['current_value']}, Gain/Loss: {round(asset['gain_loss'],2)}")

          # Calculate expected returns and the covariance matrix based on historical price data
          expected_returns, covariance_matrix = calculate_returns(price_data)

          # Perform an analysis of the portfolio using the calculated metrics
          analysis = portfolio_analysis(metrics, expected_returns, covariance_matrix)

          # Escape dollar signs for proper Markdown rendering
          analysis = analysis.replace('$', '\\$')

          # Display the portfolio analysis as formatted Markdown text
          print("\nPortfolio Analysis:")
          display(Markdown(analysis))

  # If the user chooses to consult tax law
  elif pref == "3":
      # Prompt the user to specify their legal concern
      query = input("Specify your legal concern:\n")

      # Get a response from the advisor using the query and the ensemble retriever
      response = advisor(query, ensemble_retriever)

      # Display the advisor's response as formatted Markdown text
      display(Markdown(response))



In [86]:
start()

Enter your nickname:  Lucy
[1] update portfolio [2] evaluate portfolio [3] Consult tax law
 2


[*********************100%***********************]  2 of 2 completed



Portfolio summary for Lucy:
Total Portfolio Value: $1784.64
Total Gain/Loss: $1016.64
Asset: GOLD, Units: 6, Avg Cost: 50.0, Current Price: 41.91, Current Value: 251.45999999999998, Gain/Loss: -48.54
Asset: AAPL, Units: 6, Avg Cost: 78.0, Current Price: 255.53, Current Value: 1533.18, Gain/Loss: 1065.18

Portfolio Analysis:


Based on the analysis of the Modern Portfolio Theory, here are some strategic actions you can consider:

1. Diversify your holdings: Your portfolio is currently concentrated on two assets, and it lacks diversification. A well-diversified portfolio reduces unsystematic risk. Consider adding more equities of different sectors or asset classes such as bonds or Real Estate Investment Trusts to your portfolio.

2. Rebalance your allocation: Looking at your expected returns, AAPL has a lower return compared to GOLD. Yet, AAPL has a substantial market capitalization and a lower PE ratio, which suggests it's less risky. Depending on your risk appetite, you may want to consider rebalancing your allocation towards AAPL for less risky returns.

3. Evaluate based on EPS: AAPL has a much higher Earnings Per Share (EPS) than GOLD, which infers that AAPL is more profitable and potentially provides more value to shareholders. Hence, investing more in AAPL might be a good strategy.

4. Monitor closely: GOLD currently has a high PE ratio, which can signal an overvalued stock. Along with its lower EPS, it might be a good idea to keep a close eye on this. If the PE ratio continues to increase and the EPS does not improve, it could be an indicator for rebalancing away from GOLD.

5. Review Dividend Strategy: AAPL provides a lower dividend yield compared to GOLD. If you are an investor seeking dividend income, you may want to maintain or increase your investment allocation for GOLD.

Remember, each investment decision should be made based on your financial goals and risk tolerance. Consider consulting with a financial advisor before making any significant changes to your investment portfolio.

In [68]:
start()

Enter your nickname:  Lucy
[1] update portfolio [2] evaluate portfolio [3] Consult tax law
 3
Specify your legal concern:
 If I cell my gold, how much will I owe in taxes?


I don't have enough information to provide a specific calculation for how much you would owe in taxes if you sell your gold. The tax amount would depend on various factors such as where you live, the value of the gold you sell, how long you held the gold before selling it, and the tax laws that apply to the sale of gold in your jurisdiction. It's recommended to consult with a tax professional or accountant to get an accurate estimate of your tax liability after selling gold.