In [1]:
import pandas as pd
from tqdm import tqdm
import ollama
import requests
import json
import time

In [2]:
START_DATE = "2018-01-01"
END_DATE = "2021-12-31"
MODEL = "llama3.2:3b"

In [3]:
class Portfolio:
    def __init__(self, ticker, funds, holdings, risk):
        self.ticker = ticker
        self.funds = funds
        self.holdings = holdings
        self.value = funds
        self.risk = risk
        # self.history = []

    def buy(self, amount, price):
        self.holdings += amount
        self.funds -= amount
        self.value = self.funds + self.holdings * price

    def sell(self, amount, price):
        self.holdings -= amount
        self.funds += amount
        self.value = self.funds + self.holdings * price


p = Portfolio(ticker="AAPL", funds=1000, holdings=100, risk="HIGH")

In [4]:
def query_ollama(prompt):
    response = ollama.chat(
        model=MODEL,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    return response["message"]["content"]

In [5]:
def extract_factors(stock_ticker, news_content, k=5):
    prompt = f"""
    Please extract the top {k} factors that may affect the stock price of {stock_ticker} from the following news.
    
    {news_content}
    """
    return query_ollama(prompt)

In [6]:
def was_market_open(date):
    df = pd.read_csv("research/were-markets-open.csv")

    # Reduce to rows where 'was_open' is True
    df = df[df["was_open"]]

    return date in df["date"].values

In [7]:
# Generate a list of dates between START_DATE and END_DATE as strings
dates = pd.date_range(start=START_DATE, end=END_DATE).strftime("%Y-%m-%d")
dates

Index(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
       '2018-01-06', '2018-01-07', '2018-01-08', '2018-01-09', '2018-01-10',
       ...
       '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25', '2021-12-26',
       '2021-12-27', '2021-12-28', '2021-12-29', '2021-12-30', '2021-12-31'],
      dtype='object', length=1461)

In [8]:
df = pd.DataFrame(
    columns=["date", "price", "action", "volume", "value", "funds", "reason"]
)

In [None]:

for date in tqdm(dates):
    # Skip if the market was not open on that date
    if not was_market_open(date):
        continue

    news_data = ""
    data = requests.get(f"http://localhost:8000/AAPL/{date}")
    data = data.json()

    for news in data["news"]:
        news_data += f"""
            ### {news["title"]}
            
                {news["summary"]}
            """

    factors = extract_factors(p.ticker, news_data)
    # print(factor)

    prompt = f"""
        Today is {date} and you have {p.funds} to invest in {p.ticker}. You currently have {p.holdings} shares of {p.ticker} valued at {p.value}. Your portfolio risk tolerance is {p.risk}.

        The following are the top factors that may affect the stock price of {p.ticker} today:

        {factors}

        Please decide whether to buy or sell your shares of {p.ticker} for tomorrow. Holding is NOT an option. Please make sure not to buy more shares than you can afford or sell more shares than you own.
        
        Please reply in structured JSON, like so:
        {{
            "action": "buy",
            "volume": 10,
            "reason": "I think the stock price will go up based on the factors extracted..."
        }}
        """

    response = query_ollama(prompt)
    # print(response)

    # Update portfolio and dataframe based on response
    # Parse the response from the model
    try:
        response_data = json.loads(response)  # Assuming response is a JSON string

        action = response_data["action"].lower()
        volume = int(response_data["volume"]) if "volume" in response_data else 0

        reason = response_data["reason"] if "reason" in response_data else ""
    except:  # model was naughty!
        action = "hold"
        volume = 0
        reason = ""

    # Get the stock price for the day, assuming you have a way to retrieve this
    # Here I'll use a mock price, but you should replace this with actual data retrieval
    price = data["prices"][0]["open"]  # Mock price, replace with actual retrieval logic

    # Perform action based on the model's decision
    if action == "buy" and volume > 0:
        if p.funds >= volume * price:
            p.buy(volume, price)
            action_desc = "BUY"
        else:
            action_desc = "BUY - Insufficient funds"
    elif action == "sell" and volume > 0:
        if p.holdings >= volume:
            p.sell(volume, price)
            action_desc = f"SELL"
        else:
            action_desc = "SELL - Insufficient holdings"
    else:
        action_desc = "HOLD"

    # Record transaction details in the dataframe
    new_row = pd.DataFrame(
        {
            "date": date,
            "price": round(price, 2),
            "action": action_desc,
            "volume": volume if action in ["buy", "sell"] else 0,
            "value": round(p.value, 2),
            "funds": p.funds,
            "reason": reason,
        },
        index=[0],
    )
    df = pd.concat([df, new_row], ignore_index=True)

  df = pd.concat([df, new_row], ignore_index=True)
  2%|▏         | 24/1461 [09:21<9:44:49, 24.42s/it] 

In [None]:
import time

epoch = int(time.time())

df.to_csv(f"{epoch}.csv", index=False)
df.head()