## Portfolio simulation

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from portfolio_simulation_class import PortfolioSimulation

In [2]:
# Read in CIK as string, so that leading zeros are preserved
analyst_ratings = pd.read_csv("../data/sp1500_sell_side_recommendations.csv", dtype={"cik": str})
llm_recommendations = pd.read_csv("../data/ciks1_ratings.csv", dtype={"cik": str})
stock_prices = pd.read_csv("../data/sp1500_monthly_prices.csv", dtype={"cik": str})
rics = pd.read_csv("../data/rics.csv", dtype={"cik": str})

In [3]:
import re

def extract_signal(text):
    # Define possible signals
    signals = ['strong buy', 'buy', 'hold', 'sell', 'strong sell']
    
    # Check if any of the signals appear in the text
    for signal in signals:
        if signal in text.lower():
            return signal
    
    # Return None if no signal is found
    return text

In [4]:
llm_recommendations["extracted_rating"] = llm_recommendations["rating"].apply(extract_signal)

In [5]:
# Subset 1 CIK for testing
cik = "0001996862"
cik_prices = stock_prices[stock_prices["cik"] == cik]
cik_llm_recommendations = llm_recommendations[llm_recommendations["cik"] == cik]
cik_llm_recommendations.rename(columns={"extracted_rating": "action"}, inplace=True)
cik_analyst_ratings = analyst_ratings[analyst_ratings["cik"] == cik]
# Drop nas from analyst ratings
cik_analyst_ratings = cik_analyst_ratings.dropna(subset=["rating"])
# Keep only 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cik_llm_recommendations.rename(columns={"extracted_rating": "action"}, inplace=True)


In [6]:
len(cik_prices), len(cik_llm_recommendations), len(cik_analyst_ratings)

(281, 92, 150)

In [7]:
cik_llm_recommendations["action"].value_counts()

action
sell    72
hold    17
buy      3
Name: count, dtype: int64

In [8]:
test_recommendations = pd.DataFrame({
    "cik": [cik] * 9,
    "action": ["buy", "hold", "hold", "sell", "hold", "buy", "buy", "hold", "sell"],
    "date": ["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04",
    "2023-01-05", "2023-01-06", "2023-01-07", "2023-01-08", "2023-01-09"]})



In [9]:
llm_recommendations.rename(columns={"extracted_rating": "action"}, inplace=True)

---

In [10]:
# Initialize portfolio simulation
sim = PortfolioSimulation(initial_capital=1000)

# Load data
sim.load_stock_prices(cik_prices)
sim.load_recommendations(cik_llm_recommendations)

# Run simulation
sim.simulate_trading()

Simulating Trades: 100%|██████████| 92/92 [00:00<00:00, 1083.65it/s]

Error fetching price for 0001996862 on 2001-03: No price data available.
Error fetching price for 0001996862 on 2001-06: No price data available.
Bought 0001996862 at 39.98 on 2004-09
Sold 0001996862 at 57.01 on 2004-12
Bought 0001996862 at 95.52 on 2023-03
Sold 0001996862 at 94.35 on 2023-06
Bought 0001996862 at 100.95 on 2023-12





In [11]:
sim.get_positions_at_date("2004-10-31")

Unnamed: 0_level_0,cik,position,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-10,cash,960.02,960.02
2004-10,0001996862,1.0,47.73


In [12]:
sim.get_portfolio_value("2004-10-31")

np.float64(1007.75)

In [13]:
sim.get_positions_at_date("2004-10-31")

Unnamed: 0_level_0,cik,position,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-10,cash,960.02,960.02
2004-10,0001996862,1.0,47.73


In [14]:
returns = sim.calculate_monthly_returns()

Calculating Monthly Returns: 100%|██████████| 273/273 [00:00<00:00, 921.28it/s]


In [16]:
returns

Unnamed: 0,month,start_value,end_value,return
0,2001-04,1000.00,1000.00,0.0
1,2001-05,1000.00,1000.00,0.0
2,2001-06,1000.00,1000.00,0.0
3,2001-07,1000.00,1000.00,0.0
4,2001-08,1000.00,1000.00,0.0
...,...,...,...,...
268,2023-08,1015.86,1015.86,0.0
269,2023-09,1015.86,1015.86,0.0
270,2023-10,1015.86,1015.86,0.0
271,2023-11,1015.86,1015.86,0.0


In [17]:
pip install git-filter-repo

Collecting git-filter-repo
  Downloading git_filter_repo-2.47.0-py3-none-any.whl.metadata (31 kB)
Downloading git_filter_repo-2.47.0-py3-none-any.whl (76 kB)
   ---------------------------------------- 0.0/76.3 kB ? eta -:--:--
   ---------------- ----------------------- 30.7/76.3 kB 1.3 MB/s eta 0:00:01
   ---------------------------------------- 76.3/76.3 kB 1.4 MB/s eta 0:00:00
Installing collected packages: git-filter-repo
Successfully installed git-filter-repo-2.47.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
month = "2004-10"
# convert to period
month_period = pd.Period(month, freq='M')
month_period.start_time

Timestamp('2004-10-01 00:00:00')

In [None]:
cik_prices.loc[:, 'date'] = pd.to_datetime(cik_prices['date'])
cik_prices.loc[:, 'date'] = cik_prices['date'].dt.to_period('M')
cik_prices.loc[:, 'date'] = cik_prices['date'].dt.to_timestamp()
