In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

# Load Dataset

In [2]:
def fetch_iwm_data():
    iwm = yf.Ticker("IWM")
    data = iwm.history(period="max")  # Adjust the period as needed
    return data

In [3]:
iwm_data = fetch_iwm_data()

In [4]:
iwm_data['Target'] = (iwm_data['High'] >= 1.01 * iwm_data['Open']).astype(int)

In [None]:
# Calculate the distribution of the target variable
target_distribution = iwm_data['Target'].value_counts(normalize=True)

# Print the distribution
print("Class Distribution:")
print(target_distribution)

# Visualize the distribution

plt.bar(target_distribution.index, target_distribution.values)
plt.xlabel('Class')
plt.ylabel('Proportion')
plt.title('Class Distribution')
plt.xticks([0, 1], ['No Trade', 'Trade'])
plt.show()

In [5]:
# Function to create input features and target variable with sliding window of n days
def create_sliding_window(data, n):
    X, y = [], []
    for i in range(len(data) - n):
        X.append(data.iloc[i:i+n].drop('Target', axis=1).values)
        y.append(data.iloc[i+n]['Target'])
    return np.array(X), np.array(y)

In [6]:
# Define window size
n_days = 3

# Create sliding window dataset
X, y = create_sliding_window(iwm_data[['Open', 'Close', 'High', 'Low', 'Volume', 'Target']], n_days)


In [7]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [8]:
# Flatten the input data
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flattened)
X_test_scaled = scaler.transform(X_test_flattened)


# Neural Network Model

In [9]:
# Define the neural network model
nn_classifier = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
nn_classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
# Train the model
nn_classifier.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x289b58e80>

In [11]:
def trade_decision(predictions, threshold = 0.6):
    # Threshold for entering a trade
    

    # Make trading decisions based on predicted predictions
    trades = []
    for prediction in predictions:
        if prediction > threshold:
            trades.append(1)  # Enter trade
        else:
            trades.append(0)  # Do not enter trade

    return trades

In [12]:
# Predict probabilities for the test set
nn_predictions = nn_classifier.predict(X_test_scaled)

# Threshold for entering a trade
# threshold = 0.6

# Make trading decisions based on predicted probabilities
nn_trades = trade_decision(nn_predictions)




In [13]:
y_test.shape

(1201,)

In [14]:
# Evaluate the model
nn_accuracy = np.mean(nn_trades == y_test)
print(f'Test Accuracy: {nn_accuracy}')

Test Accuracy: 0.7119067443796836


# Random Forest Classifier

In [15]:
# Initialize Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest classifier
rf_classifier.fit(X_train_flattened, y_train)

In [16]:
# Predict probabilities for the test set
rf_predictions = rf_classifier.predict_proba(X_test_flattened)[:, 1]

In [17]:
rf_trades = trade_decision(rf_predictions)

In [18]:
# Evaluate Random Forest
rf_accuracy = np.mean(rf_trades == y_test)
print(f'Random Forest Test Accuracy: {rf_accuracy}')

Random Forest Test Accuracy: 0.7035803497085762


# SVM

In [19]:
# Initialize SVM classifier
svm_classifier = SVC(kernel='rbf', probability=True, random_state=42)

# Train the SVM classifier
svm_classifier.fit(X_train_flattened, y_train)

In [20]:
# Predict probabilities for the test set
svm_predictions = svm_classifier.predict_proba(X_test_flattened)[:, 1]

In [21]:
# Make trading decisions based on predicted probabilities
svm_trades = trade_decision(svm_predictions)

In [22]:
# Evaluate the model
accuracy_svm = np.mean(svm_trades == y_test)
print(f'SVM Test Accuracy: {accuracy_svm}')

SVM Test Accuracy: 0.7060782681099084


# Logistic Regression

In [23]:

logistic_classifier = LogisticRegression(random_state=42)
logistic_classifier.fit(X_train_flattened, y_train)

In [24]:
logistic_predictions = logistic_classifier.predict_proba(X_test_flattened)[:, 1]

In [25]:
logistic_trades = trade_decision(logistic_predictions)

In [26]:
accuracy_logistic = np.mean(logistic_trades == y_test)
print(f'Logistic Regression Test Accuracy: {accuracy_logistic}')

Logistic Regression Test Accuracy: 0.704412989175687


# K-Nearest Neighbors

In [27]:

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train_flattened, y_train)

In [28]:
knn_predictions = knn_classifier.predict_proba(X_test_flattened)[:, 1]

In [29]:
knn_trades = trade_decision(knn_predictions)

In [30]:
accuracy_knn = np.mean(knn_trades == y_test)
print(f'KNN Test Accuracy: {accuracy_knn}')

KNN Test Accuracy: 0.6960865945045795


# XGBoost

In [31]:
xgb_classifier = XGBClassifier(random_state=42)
xgb_classifier.fit(X_train_flattened, y_train)

In [32]:
xgb_predictions = xgb_classifier.predict_proba(X_test_flattened)[:, 1]

In [33]:
xgb_trades = trade_decision(xgb_predictions)

In [34]:
accuracy_xgb = np.mean(xgb_trades == y_test)
print(f'XGBoost Test Accuracy: {accuracy_xgb}')

XGBoost Test Accuracy: 0.704412989175687


# Gaussian Naive Bayes

In [35]:
gnb_classifier = GaussianNB()
gnb_classifier.fit(X_train_flattened, y_train)

In [36]:
gnb_predictions = gnb_classifier.predict_proba(X_test_flattened)[:, 1]


In [37]:
gnb_trades = trade_decision(gnb_predictions)

In [38]:
accuracy_gnb = np.mean(gnb_trades == y_test)
print(f'Gaussian Naive Bayes Test Accuracy: {accuracy_gnb}')

Gaussian Naive Bayes Test Accuracy: 0.7027477102414654


# Decision Trees

In [39]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train_flattened, y_train)

In [40]:
dt_predictions = dt_classifier.predict_proba(X_test_flattened)[:, 1]

In [41]:
dt_trades = trade_decision(dt_predictions)

In [42]:
accuracy_dt = np.mean(dt_trades == y_test)
print(f'Decision Tree Test Accuracy: {accuracy_dt}')

Decision Tree Test Accuracy: 0.6361365528726062


# Bagging

In [43]:
bagging_classifier = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagging_classifier.fit(X_train_flattened, y_train)



In [44]:
bagging_predictions = bagging_classifier.predict_proba(X_test_flattened)[:, 1]

In [45]:
bagging_trades = trade_decision(bagging_predictions)

In [46]:

accuracy_bagging = np.mean(bagging_trades == y_test)
print(f'Bagging Test Accuracy: {accuracy_bagging}')


Bagging Test Accuracy: 0.6969192339716903


# AdaBoost

In [47]:
adaboost_classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
adaboost_classifier.fit(X_train_flattened, y_train)

In [48]:
adaboost_predictions = adaboost_classifier.predict_proba(X_test_flattened)[:, 1]

In [49]:
adaboost_trades = trade_decision(adaboost_predictions)

In [50]:
accuracy_adaboost = np.mean(adaboost_trades == y_test)
print(f'AdaBoost Test Accuracy: {accuracy_adaboost}')

AdaBoost Test Accuracy: 0.704412989175687


# Gaussian Processes

In [51]:
gp_classifier = GaussianProcessClassifier(kernel=RBF(), random_state=42)
gp_classifier.fit(X_train_flattened, y_train)

In [52]:
gp_predictions = gp_classifier.predict_proba(X_test_flattened)[:, 1]

In [53]:
gp_trades = trade_decision(gp_predictions)

In [54]:
accuracy_gp = np.mean(gp_trades == y_test)
print(f'Gaussian Processes Test Accuracy: {accuracy_gp}')

Gaussian Processes Test Accuracy: 0.704412989175687


# Store Accuracies

In [55]:
# Store accuracies in a dictionary
accuracies = {
    'Model': ['Neural Network', 'Random Forest', 'SVM', 'Logistic Regression', 'KNN', 'XGBoost', 
              'Gaussian Naive Bayes', 'Decision Tree', 'Bagging', 'AdaBoost', 'Gaussian Processes'],
    'Test Accuracy': [nn_accuracy, rf_accuracy, accuracy_svm, accuracy_logistic, accuracy_knn, accuracy_xgb,
                      accuracy_gnb, accuracy_dt, accuracy_bagging, accuracy_adaboost, accuracy_gp]
}

# Create a DataFrame from the accuracies dictionary
accuracies_df = pd.DataFrame(accuracies)

# Sort the DataFrame by Test Accuracy in descending order
accuracies_df_sorted = accuracies_df.sort_values(by='Test Accuracy', ascending=False)

# Display the sorted DataFrame
print(accuracies_df_sorted)

                   Model  Test Accuracy
0         Neural Network       0.711907
2                    SVM       0.706078
3    Logistic Regression       0.704413
5                XGBoost       0.704413
9               AdaBoost       0.704413
10    Gaussian Processes       0.704413
1          Random Forest       0.703580
6   Gaussian Naive Bayes       0.702748
8                Bagging       0.696919
4                    KNN       0.696087
7          Decision Tree       0.636137
