In [198]:
import numpy as np
from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import matplotlib.pyplot as plt

In [130]:
raw = pd.read_csv('tr_eikon_eod_data.csv', index_col = 0, parse_dates = True)
raw

Unnamed: 0_level_0,AAPL.O,MSFT.O,INTC.O,AMZN.O,GS.N,SPY,.SPX,.VIX,EUR=,XAU=,GDX,GLD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2010-01-01,,,,,,,,,1.4323,1096.35,,
2010-01-04,30.572827,30.950,20.88,133.90,173.08,113.33,1132.99,20.04,1.4411,1120.00,47.71,109.80
2010-01-05,30.625684,30.960,20.87,134.69,176.14,113.63,1136.52,19.35,1.4368,1118.65,48.17,109.70
2010-01-06,30.138541,30.770,20.80,132.25,174.26,113.71,1137.14,19.16,1.4412,1138.50,49.34,111.51
2010-01-07,30.082827,30.452,20.60,130.00,177.67,114.19,1141.69,19.06,1.4318,1131.90,49.10,110.82
...,...,...,...,...,...,...,...,...,...,...,...,...
2018-06-25,182.170000,98.390,50.71,1663.15,221.54,271.00,2717.07,17.33,1.1702,1265.00,22.01,119.89
2018-06-26,184.430000,99.080,49.67,1691.09,221.58,271.60,2723.06,15.92,1.1645,1258.64,21.95,119.26
2018-06-27,184.160000,97.540,48.76,1660.51,220.18,269.35,2699.63,17.91,1.1552,1251.62,21.81,118.58
2018-06-28,185.500000,98.630,49.25,1701.45,223.42,270.89,2716.31,16.85,1.1567,1247.88,21.93,118.22


In [220]:
def extract_train_and_test_data(data):
    return np.split(data.dropna().to_numpy(), 2)

def predict_position(train, test):
    # Train Hidden Markov Model
    model = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=1000)
    model.fit(train.reshape(-1, 1))

    # Predict hidden states for test data
    hidden_states = model.predict(test.reshape(-1, 1))

    # Define a function to map hidden states to rise or fall
    def map_to_rise_or_fall(state):
        return "Rise" if state == 0 else "Fall"

    # Map hidden states to rise or fall
    predicted_rise_fall = np.array(list(map(map_to_rise_or_fall, hidden_states)))

    return predicted_rise_fall

def trade(balance, test, prediction):
    position = 0

    for today in range(len(test) - 1):
        tomorrow = today + 1
        if prediction[tomorrow] == "Rise":
            balance -= (1 - position) * test[today]
        else:
            balance += (position + 1) * test[today]
    
    balance += position * test[-1]

    return balance

def backtest(data):
    train_data, test_data = extract_train_and_test_data(data)

    N = 1000
    profit = 0
    for _ in range(N):
        prediction = predict_position(train_data, test_data)
        profit += trade(0, test_data, prediction)

    return profit / N

def run_simulation():
    raw = pd.read_csv('tr_eikon_eod_data.csv', index_col = 0, parse_dates = True)
    stock_names = ['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']
    # stock_names = ['AMZN.O']
    for stock_name in stock_names:
        print(f"{stock_name}: ${backtest(raw[stock_name])}")

In [221]:
run_simulation()

AAPL.O: $25898.205453573253
MSFT.O: $2129.5166480000007
INTC.O: $9980.475258600052
AMZN.O: $17844.423592000006
GS.N: $-36143.16516000001


In [226]:
raw = pd.read_csv('tr_eikon_eod_data.csv', index_col = 0, parse_dates = True)
apple_train_data, _ = extract_train_and_test_data(raw['AAPL.O'])
_, goldmann_test_data = extract_train_and_test_data(raw['GS.N'])

N = 1000
profit = 0
for _ in range(N):
    prediction = predict_position(apple_train_data, goldmann_test_data)
    profit += trade(0, goldmann_test_data, prediction)

print(profit / N)

29677.512099999945


In [229]:
def run_simulation_2():
    stock_names = ['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']
    raw = pd.read_csv('tr_eikon_eod_data.csv', index_col = 0, parse_dates = True)
    raw = raw[stock_names]
    raw = raw.dropna()

    all_predictions = {}
    all_train_data = {}
    all_test_data = {}
    N = 1000

    for stock_name in stock_names:
        train_data, test_data = extract_train_and_test_data(raw[stock_name])
        all_train_data[stock_name] = train_data
        all_test_data[stock_name] = test_data
        all_predictions[stock_name] = []
        for _ in range(N):
            all_predictions[stock_name].append(predict_position(train_data, test_data))
    
    def calculate_profit(test, prediction):
        balance = 0
        position = 0

        for today in range(len(test) - 1):
            tomorrow = today + 1
            if prediction[tomorrow] == "Rise":
                balance -= (1 - position) * test[today]
            else:
                balance += (position + 1) * test[today]
        
        balance += position * test[-1]

        return balance
    
    total_profit = 0
    for stock_name in stock_names:
        print(stock_name)
        best_profit = -float('inf')
        for other_stock_name in stock_names:
            profit = 0
            for prediction in all_predictions[other_stock_name]:
                profit += calculate_profit(all_test_data[stock_name], prediction)
            profit = profit / N
            print(f"{other_stock_name} {profit}")
            best_profit = max(profit, best_profit)
        print(best_profit)
        total_profit += best_profit
    print(f"total: {total_profit}")

In [230]:
run_simulation_2()

AAPL.O
AAPL.O 25985.29352180123
MSFT.O 3414.19760523545
INTC.O 33926.50375862077
AMZN.O 70.2750722065244
GS.N -21917.342845407937
33926.50375862077
MSFT.O
AAPL.O 12294.226189000055
MSFT.O 1617.7773159999997
INTC.O 16069.162077000099
AMZN.O 51.437720000000745
GS.N -10208.978761000004
16069.162077000099
INTC.O
AAPL.O 7386.089202400018
MSFT.O 970.7153455999982
INTC.O 9641.54737020004
AMZN.O 18.747831000000346
GS.N -6272.111987600012
9641.54737020004
AMZN.O
AAPL.O 151746.46054599996
MSFT.O 20037.396482000004
INTC.O 198938.60825999937
AMZN.O 990.8663179999922
GS.N -121335.578
198938.60825999937
GS.N
AAPL.O 41245.020420000044
MSFT.O 5408.518340000005
INTC.O 53786.47126000027
AMZN.O 95.56859999999847
GS.N -35035.120339999994
53786.47126000027
total: 312362.29272582056


In [224]:
import numpy as np
from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

# Generate some example data
np.random.seed(42)
num_days = 100
num_stocks = 5
stock_prices = pd.read_csv('tr_eikon_eod_data.csv', parse_dates=True)
#stock_prices = stock_prices[['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']].dropna().to_numpy()
stocks = ['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']
stock_prices = stock_prices[['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']].dropna().to_numpy()
stock_prices = stock_prices.reshape(-1, 1)
stock_prices = pd.DataFrame(stock_prices)



data = pd.read_csv('tr_eikon_eod_data.csv', parse_dates=True)
data = data[['AAPL.O', 'MSFT.O', 'INTC.O', 'AMZN.O', 'GS.N']]

# Initialize variables
current_balance = 0
position = None
value = 0

# Function to calculate profit based on transaction type
def calculate_profit(transaction_type, price, change = False):
    global current_balance, position, value
    price = float(price)
    if transaction_type == "buy":
        if change :
            position = "long"  
            current_balance -= price*value
        current_balance -= price  # Buying for $100
    elif transaction_type == "sell":
        if change : 
            position = "short" 
            current_balance += price*value
        current_balance += price  # Selling short for $100
    
    if change : 
        value = 1
    else :
        value += 1

def calculate(predicted_states, test_data) :
  global position, value, current_balance
  position = None
  value = 0
  current_balance = 0

  temp = np.hstack([predicted_states.reshape(-1,1), test_data.to_numpy()])
  for state, price in temp :
    try :
        price = float(price)
    except :
        continue
    if state == 0:  # Buy
        if position == "long":  # No position, buy
            calculate_profit("buy", price)
        if position == "short" or position is None:  # No position, buy
            calculate_profit("buy", price, True)
    else:  # Sell (short)
        if position == "short":  # Shorting when there's a position
            calculate_profit("sell", price)
        if position == "long" or position is None:  # Shorting when there's a position
            calculate_profit("sell", price, True)
    

for i in stocks :
  stock_prices = pd.read_csv('tr_eikon_eod_data.csv', parse_dates=True)
  stock_prices = stock_prices[i].dropna().to_numpy()
  stock_prices = stock_prices.reshape(-1, 1)
  stock_prices = pd.DataFrame(stock_prices)
  print(i)
  
  train_data = stock_prices
  test_data = stock_prices

  model = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=100)
  model.fit(train_data)

  total = 0

  for j in stocks :
    test_data = data[j].dropna().to_numpy()
    test_data = test_data.reshape(-1, 1)
    test_data = pd.DataFrame(test_data)

    predicted_states = model.predict(test_data)

    calculate(predicted_states, test_data)

    multiplier = 0
    if position == "long" :
        multiplier = 1
    else :
        multiplier = -1
        
    final_balance = current_balance + test_data.iloc[-1, 0] * value * multiplier
    total += final_balance
    print(i, j, final_balance)
  print(total)


AAPL.O
AAPL.O AAPL.O 27853.361191936856
AAPL.O MSFT.O -96435.26540000005
AAPL.O INTC.O -43499.33809999993
AAPL.O AMZN.O 2605379.555300001
AAPL.O GS.N 107605.82999999926
2600904.142991937
MSFT.O
MSFT.O AAPL.O -184803.37943080335
MSFT.O MSFT.O -41220.69559999986
MSFT.O INTC.O 12020.668100000035
MSFT.O AMZN.O -2606946.2453000005
MSFT.O GS.N -107656.37999999925
-2928606.0322308033
INTC.O
INTC.O AAPL.O -195801.72009076882
INTC.O MSFT.O -78245.98539999982
INTC.O INTC.O -8896.086700000007
INTC.O AMZN.O -2606946.2453000005
INTC.O GS.N -107656.37999999925
-2997546.4174907687
AMZN.O
AMZN.O AAPL.O 195956.31012128887
AMZN.O MSFT.O 115556.51539999993
AMZN.O INTC.O 43499.33809999993
AMZN.O AMZN.O -395851.60470000096
AMZN.O GS.N 107656.37999999925
66816.93892128702
GS.N
GS.N AAPL.O -195956.31012128887
GS.N MSFT.O -115556.51539999993
GS.N INTC.O -43499.33809999993
GS.N AMZN.O -580151.1991
GS.N GS.N -84820.80999999981
-1019984.1727212885
