## Trading simulation where LSTMs predict whether buying or selling will lead to a +x% or a -x% return

In [None]:
import pandas as pd
import pandas_ta as ta
from datetime import datetime
import numpy as np

from SimulateTrades import simulate_trades_max_positions, simulate_trades_max_positions_with_shorts
from TradingFunctions import train_and_predict_lstm, create_indicators, prepare_input_for_pred, add_first_hit_labels

# Arrays to store trade history for 4 different strategies
results = []
results2 = []
results3 = []
results4 = []

start_point = 25000 # Start point of dataset
train_size = 6000 # Size of train set
test_size = 500 # Size of test set

for i in range(1): # Each loop we move forward in time by test_size and train the LSTM again

    # Import data used to train the LSTM
    data = pd.read_csv('btcusd_15-min_data.csv').iloc[-start_point + i*test_size:-(start_point - train_size) + i*test_size,:]

    # Make a copy of the train set
    og_data = data.copy()

    # Create technical indicators
    data = create_indicators(data)

    # Parameters for label creation
    """
    For each price, determine whether future returns first hit the upper threshold or the lower threshold
    """
    ts = 0.03 # Upper threshold
    lts = 0 # Lower threshold

    sequence_length=1000 # Input sequence length for the LSTM

    # Selection of features to include in the model
    feature_columns = [
        'Open', 'High', 'Low', 'Close', 'Volume',
        'MACD', 'BBL', 'BBM', 'BBU', 'BB_BANDWIDTH',
        'SMA', 'EMA', 'RSI',
        'SMA_10', 'SMA_50', 'SMA_200', 'EMA_10', 'EMA_50', 'EMA_200',
        'SMA_10_50_diff', 'SMA_10_200_diff', 'SMA_50_200_diff',
        'EMA_10_50_diff', 'EMA_10_200_diff', 'EMA_50_200_diff',
        'return_1', 'return_2', 'return_3'
    ]

    # Apply labels to entire train dataset
    label_col = f'label_first_hit_{int(ts*100)}'
    data = add_first_hit_labels(data, threshold=ts, max_lookahead=len(data), lower_threshold=lts)

    # Drop rows where label couldn't be assigned (before splitting)
    data = data.dropna(subset=[label_col])

    # Train an LSTM
    result = train_and_predict_lstm(data, feature_columns, label_col, epochs=1, sequence_length=sequence_length,
    threshold=ts, lower_threshold=lts)

    production_model = result['model'] # Trained model
    best_threshold = result['best_threshold'] # Estimated optimal threshold
    scaler = result['scaler'] # Scaler used to scale the input data

    # Import new unobserved data (test data)
    new_data = pd.read_csv('btcusd_15-min_data.csv').iloc[-(start_point - train_size) + i*test_size:-(start_point - train_size - test_size) + i*test_size,:]

    # Take the last sequence length of the train data (unscaled)
    train_data = result['last_sequence_test'].iloc[:,:-1]

    # Concatenate train data to test data to create technical indicators (the first data points of the test data are
    # based on the last data points of the train data)
    last_data = pd.concat([og_data, new_data])
    last_data = create_indicators(last_data)

    """
    From last_data take only the test data and append it to the 'last sequence -1' used to train the LSTM. 
    E.g. if sequence length is 1000, we take the last 999 data points of the train set preceded by the first new data
    point from the test set to form the first test sequence
    """
    last_data = pd.concat([train_data.iloc[-sequence_length+1:,:], last_data.iloc[-test_size:,:]])

    # Transform the test data to make it LSTM-ready
    X_test = prepare_input_for_pred(last_data, sequence_length, feature_columns, scaler=scaler)

    # Predict labels
    probs = production_model.predict(X_test, verbose=0).flatten()
    pred_labels = (probs > best_threshold).astype(int)

    # Strat 1 - Inlcude short trades and use estimated optimal threshold
    sim_results, max_open_trades, signals_ignored, open_trades_history = simulate_trades_max_positions_with_shorts(
        last_data.iloc[-test_size:,:], pred_labels.flatten(), max_positions=10, max_lookahead=len(last_data), threshold=ts, lower_threshold=lts
    )

    # Strat 2 - Do not inlcude short trades and use estimated optimal threshold
    sim_results2, max_open_trades2, signals_ignored2, open_trades_history2 = simulate_trades_max_positions(
        last_data.iloc[-test_size:,:], pred_labels.flatten(), max_positions=10, max_lookahead=len(last_data), threshold=ts, lower_threshold=lts
    )

    # Strat 3 - Do not inlcude short trades and use 0.5 as threshold
    pred_labels2 = (probs > 0.5).astype(int)

    sim_results3, max_open_trades3, signals_ignored3, open_trades_history3 = simulate_trades_max_positions(
        last_data.iloc[-test_size:,:], pred_labels2.flatten(), max_positions=10, max_lookahead=len(last_data), threshold=ts, lower_threshold=lts
    )

    # Strat 4 - Inlcude short trades and use 0.5 as threshold
    pred_labels3 = (probs > 0.5).astype(int)

    sim_results4, max_open_trades4, signals_ignored4, open_trades_history4 = simulate_trades_max_positions_with_shorts(
        last_data.iloc[-test_size:,:], pred_labels2.flatten(), max_positions=10, max_lookahead=len(last_data), threshold=ts, lower_threshold=lts
    )


    # if sum(pred_labels)==0:
    #     results.append({
    #         "Total Profit": None,
    #         "Win Rate": None,
    #         "Average Return per Trade": None,
    #         "Number of Trades": None,
    #         "Maximum Open Trades": None,
    #         "Max drawdown": None
    #     })
    # else:

    # Print performance summary for strat 1
    total_profit = sim_results['profit_usd'].sum()
    win_rate = (sim_results['result'] == 'win').mean()
    avg_return = sim_results['pnl_pct'].mean()
    cum_profit = sim_results['profit_usd'].cumsum()

    results.append({
        "Total Profit": total_profit,
        "Win Rate": win_rate,
        "Average Return per Trade": avg_return,
        "Number of Trades": len(sim_results),
        "Maximum Open Trades": max_open_trades,
        "Max drawdown": cum_profit.min()
    })

    if sum(pred_labels)==0:
        results2.append({
            "Total Profit": None,
            "Win Rate": None,
            "Average Return per Trade": None,
            "Number of Trades": None,
            "Maximum Open Trades": None,
            "Max drawdown": None
        })
    else:
        # Print performance summary for strat2
        total_profit = sim_results2['profit_usd'].sum()
        win_rate = (sim_results2['result'] == 'win').mean()
        avg_return = sim_results2['pnl_pct'].mean()
        cum_profit = sim_results2['profit_usd'].cumsum()

        results2.append({
            "Total Profit": total_profit,
            "Win Rate": win_rate,
            "Average Return per Trade": avg_return,
            "Number of Trades": len(sim_results2),
            "Maximum Open Trades": max_open_trades,
            "Max drawdown": cum_profit.min()
        })

    if sum(pred_labels2)==0:
        results3.append({
            "Total Profit": None,
            "Win Rate": None,
            "Average Return per Trade": None,
            "Number of Trades": None,
            "Maximum Open Trades": None,
            "Max drawdown": None
        })
    else:
    # Print performance summary for strat3
        total_profit = sim_results3['profit_usd'].sum()
        win_rate = (sim_results3['result'] == 'win').mean()
        avg_return = sim_results3['pnl_pct'].mean()
        cum_profit = sim_results3['profit_usd'].cumsum()

        results3.append({
            "Total Profit": total_profit,
            "Win Rate": win_rate,
            "Average Return per Trade": avg_return,
            "Number of Trades": len(sim_results3),
            "Maximum Open Trades": max_open_trades,
            "Max drawdown": cum_profit.min()
        })

    if sum(pred_labels3)==0:
        results4.append({
            "Total Profit": None,
            "Win Rate": None,
            "Average Return per Trade": None,
            "Number of Trades": None,
            "Maximum Open Trades": None,
            "Max drawdown": None
        })
    else:
    # Print performance summary for strat4
        total_profit = sim_results4['profit_usd'].sum()
        win_rate = (sim_results4['result'] == 'win').mean()
        avg_return = sim_results4['pnl_pct'].mean()
        cum_profit = sim_results4['profit_usd'].cumsum()

        results4.append({
            "Total Profit": total_profit,
            "Win Rate": win_rate,
            "Average Return per Trade": avg_return,
            "Number of Trades": len(sim_results4),
            "Maximum Open Trades": max_open_trades,
            "Max drawdown": cum_profit.min()
        })


### Store results in an Excel file

In [2]:
res = pd.concat([pd.DataFrame(results)['Total Profit'], pd.DataFrame(results2)['Total Profit'],
pd.DataFrame(results3)['Total Profit'], pd.DataFrame(results4)['Total Profit']])
res.to_excel('resutls.xlsx', index=False)