In [None]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29413 sha256=ef70079fb0812b755c98b27ce656906bab83fc2c426f0634327c4816a500417e
  Stored in directory: /root/.cache/pip/wheels/5f/67/4f/8a9f252836e053e532c6587a3230bc72a4deb16b03a829610b
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
import ta

# Define the stock symbols
symbols = ["AAPL", "META", "AMZN", "MSFT", "GOOGL"]

# Define transaction cost
transaction_cost = 0.001  # 0.1% of the trade amount

# Initialize portfolio with equal weights
portfolio = {symbol: 1/len(symbols) for symbol in symbols}

# Function to create sequences for LSTM
def create_sequences(features, labels, sequence_length):
    X, y = [], []
    for i in range(len(features) - sequence_length):
        X.append(features[i:i+sequence_length])
        y.append(labels[i+sequence_length])
    return np.array(X), np.array(y)

# Function to build LSTM model
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Function to build MLP model
def build_mlp_model(input_shape):
    model = Sequential()
    model.add(Dense(50, input_shape=input_shape, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Time series split with purging and embargoing
tscv = TimeSeriesSplit(n_splits=5)

for symbol in symbols:
    # Download the stock data
    df = yf.download(symbol, start="2010-01-01", end="2022-12-31")
    # Calculate RSI
    df['RSI'] = ta.momentum.rsi(df['Close'])

   # Calculate MFI
    df['MFI'] = ta.volume.money_flow_index(df['High'], df['Low'], df['Close'], df['Volume'])

   # Calculate EMA
    df['EMA'] = ta.trend.ema_indicator(df['Close'])

    # Calculate Stochastic Oscillator
    stoch = ta.momentum.StochasticOscillator(df['High'], df['Low'], df['Close'])
    df['%K'] = stoch.stoch()
    df['%D'] = stoch.stoch_signal()

   # Calculate ATR
    df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'])

   # Calculate ROC
    df['ROC'] = ta.momentum.roc(df['Close'])

    # Create response variable 'Trend'
    df['Trend'] = np.where(df['Close'].diff() > 0, 1, 0)

#     # Calculate all technical analysis features
#     df = ta.add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume")

#     # Drop rows with missing values
    df = df.dropna()

#     # Feature scaling
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df)

#     # Create sequences for LSTM
    X, y = create_sequences(scaled_features, df['Close'], sequence_length=10)

for train_index, test_index in tscv.split(df):
#         # Split the dataset into training and testing sets
        train_data, test_data = df.iloc[train_index], df.iloc[test_index]

#         # Get the start date of the test set
        test_start_date = test_data.index[0]

#         # Apply purging and embargoing
        train_data_purged = train_data[train_data.index < test_start_date]

        embargo_days = 5  # Adjust as needed
        embargo_end_date = test_start_date + pd.Timedelta(days=embargo_days)
        train_data_embargoed = train_data[(train_data.index >= test_start_date) & (train_data.index < embargo_end_date)]

#         # Feature scaling for LSTM
#         scaler_lstm = StandardScaler()
#         scaled_features_lstm = scaler_lstm.fit_transform(train_data_embargoed)

#         # Create sequences for LSTM on embargoed training data
#         X_lstm, y_lstm = create_sequences(scaled_features_lstm, train_data_embargoed['Close'], sequence_length=10)

#         # Build and train LSTM model
#         model_lstm = build_lstm_model(input_shape=(sequence_length, 1))
#         early_stopping = EarlyStopping(monitor='val_loss', patience=3)
#         model_lstm.fit(X_lstm, y_lstm, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

#         # Feature scaling for MLP
#         scaler_mlp = StandardScaler()
#         scaled_features_mlp = scaler_mlp.fit_transform(train_data_embargoed)

#         # Build and train MLP model
#         model_mlp = build_mlp_model(input_shape=(scaled_features_mlp.shape[1],))
#         model_mlp.fit(scaled_features_mlp, train_data_embargoed['Close'], epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

#         # Feature scaling for test data
#         scaled_test_features = scaler.transform(test_data)

#         # Create sequences for LSTM on test data
#         X_test_lstm, y_test_lstm = create_sequences(scaled_test_features, test_data['Close'], sequence_length=10)

#         # Make predictions using the LSTM model
#         y_pred_lstm = (model_lstm.predict(X_test_lstm) > 0.5).astype(int)

#         # Evaluate LSTM accuracy
#         accuracy_lstm = accuracy_score(y_test_lstm, y_pred_lstm)
#         print(f'LSTM Accuracy for {symbol}: {accuracy_lstm}')

#         # Make predictions using the MLP model
#         y_pred_mlp = (model_mlp.predict(scaled_test_features) > 0.5).astype(int)

#         # Evaluate MLP accuracy
#         accuracy_mlp = accuracy_score(test_data['Close'], y_pred_mlp)
#         print(f'MLP Accuracy for {symbol}: {accuracy_mlp}')

#         # Incorporate transaction cost
#         df['Strategy_Return'] = df['Returns'] * df['Predicted_Signal']
#         df['Strategy_Return'] = df['Strategy_Return'] - transaction_cost * abs(df['Predicted_Signal'].diff())

#         # Calculate cumulative returns
#         df['Cumulative_Strategy_Returns'] = (1 + df['Strategy_Return']).cumprod()

#         # Evaluate strategy
#         print(f"Strategy return for {symbol}: {df['Cumulative_Strategy_Returns'].iloc[-1]}")

#         # Rebalance portfolio
#         rmse_lstm = sqrt(mean_squared_error(y_test_lstm, y_pred_lstm))
#         rmse_mlp = sqrt(mean_squared_error(test_data['Close'], y_pred_mlp))
#         portfolio[symbol] = 1 / (rmse_lstm + rmse_mlp)  # Assign higher weights to models with lower RMSE

# # Normalize portfolio weights so they sum to 1
# total_weight = sum(portfolio.values())
# portfolio = {symbol: weight/total_weight for symbol, weight in portfolio.items()}

# print("Final portfolio:", portfolio)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [None]:
df=scaled_features
pd.DataFrame(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-1.001262,-0.995556,-1.000048,-0.998345,-0.998345,2.353136,-2.944184,-1.013005,-0.974125,-1.588921,-1.652322,-0.694341,-1.743711,0.951732
1,-0.998956,-0.996903,-1.000741,-0.998568,-0.998568,2.031934,-2.949973,-0.996858,-0.977565,-1.545645,-1.726816,-0.699628,-1.973529,-1.050717
2,-0.996705,-0.997352,-1.004072,-1.004027,-1.004027,1.428049,-3.086962,-0.873971,-0.981317,-1.696869,-1.733038,-0.697485,-2.186608,-1.050717
3,-1.000898,-1.001505,-1.007601,-1.007067,-1.007067,2.176561,-3.157186,-0.835156,-0.984960,-1.680718,-1.765967,-0.697267,-2.028408,-1.050717
4,-1.003617,-1.005084,-1.004284,-1.004915,-1.004915,0.624593,-2.880301,-1.170560,-0.987712,-1.526795,-1.759206,-0.713582,-1.837333,0.951732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3252,1.055274,1.097261,1.081157,1.114681,1.114681,-0.751171,-1.111630,-2.048121,1.173923,-1.155009,-1.457161,1.696145,-1.251836,0.951732
3253,1.102466,1.080418,1.079462,1.063291,1.063291,-0.810890,-1.380442,-2.026724,1.157686,-1.595693,-1.541952,1.652750,-1.385569,-1.050717
3254,1.051644,1.055568,1.049226,1.025028,1.025028,-0.822689,-1.565981,-1.984046,1.138052,-1.840529,-1.646872,1.604127,-1.493999,-1.050717
3255,1.041592,1.077933,1.068159,1.092896,1.092896,-0.744378,-1.000082,-1.603104,1.131895,-1.257692,-1.683706,1.609640,-1.104179,0.951732


In [None]:
 train_data_purged

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,MFI,EMA,%K,%D,ATR,ROC,Trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2010-01-26,13.462713,13.753754,13.420671,13.574074,13.574074,174045780,17.544762,36.301960,14.365575,8.307683,9.947369,0.384980,-8.698871,1
2010-01-27,13.545295,13.704955,13.396146,13.566066,13.566066,158417424,17.473791,36.555230,14.242574,9.635334,7.823697,0.379539,-9.953163,0
2010-01-28,13.625876,13.688689,13.278278,13.370621,13.370621,129034836,15.794491,38.482700,14.108427,4.995939,7.646319,0.381744,-11.116104,0
2010-01-29,13.475726,13.538288,13.153403,13.261762,13.261762,165454380,14.933634,39.091516,13.978171,5.491439,6.707571,0.381969,-10.252679,0
2010-02-01,13.378378,13.408659,13.270771,13.338839,13.338839,89941968,18.327883,33.830717,13.879812,10.213635,6.900338,0.365178,-9.209835,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-29,75.850998,79.436996,75.827499,77.844002,77.844002,55406000,51.904650,63.529086,78.034575,38.522749,44.602390,2.360304,-0.650256,1
2020-10-30,83.372002,84.066002,80.078499,80.805496,80.805496,99878000,58.264682,66.516357,78.460871,62.271392,38.990479,2.636139,3.368852,1
2020-11-02,81.177002,82.912003,80.711998,81.216003,81.216003,44228000,59.072596,59.280252,78.884737,67.021534,55.938558,2.604987,4.426321,1
2020-11-03,81.538002,82.949997,80.605499,82.282997,82.282997,31432000,61.176297,64.743224,79.407547,79.368143,69.553689,2.586380,4.972884,1


In [None]:
test_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,MFI,EMA,%K,%D,ATR,ROC,Trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-11-05,88.522499,89.591003,87.051498,88.125000,88.125000,39758000,70.281922,72.541213,81.775137,89.651984,86.591748,2.823938,13.630501,1
2020-11-06,87.567001,88.425003,86.834000,87.986504,87.986504,28622000,69.836464,72.244111,82.730732,88.674385,89.693828,2.735872,10.954677,0
2020-11-09,89.799004,90.844498,87.877502,88.070999,88.070999,53440000,69.961553,73.032937,83.552312,82.014204,86.780191,2.752380,9.632404,1
2020-11-10,86.488503,87.969002,85.806000,86.886002,86.886002,43022000,65.837898,65.391839,84.065187,74.329642,81.672744,2.717567,6.414040,0
2020-11-11,87.419998,88.144997,87.153000,87.361504,87.361504,31838000,66.686422,65.618970,84.572313,77.413213,77.919020,2.613384,10.284737,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-23,87.110001,89.550003,87.070000,89.230003,89.230003,23003000,40.009451,20.066244,91.163310,21.619642,15.510974,2.845265,-6.014324,1
2022-12-27,88.800003,88.940002,87.010002,87.389999,87.389999,20097300,36.714162,20.401848,90.582801,8.099923,13.093779,2.800603,-6.744211,0
2022-12-28,86.980003,88.040001,85.940002,86.019997,86.019997,19523200,34.439705,21.071260,89.880831,0.588626,10.102730,2.750560,-7.335996,0
2022-12-29,86.620003,88.849998,86.610001,88.449997,88.449997,23333500,41.376879,27.046302,89.660703,18.469427,9.052659,2.756235,-5.208446,1


In [None]:
test_start_date

Timestamp('2020-11-05 00:00:00')