<a href="https://colab.research.google.com/github/abbyconners06/stock-marketai/blob/main/Stocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [312]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score
import datetime
import yfinance as yf
!pip install xgboost
from xgboost import XGBClassifier




In [391]:
# Data scraping
ticker_symbol = "AAPL"
end_date = datetime.datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.datetime.today() - datetime.timedelta(days=650)).strftime('%Y-%m-%d')
data = yf.download(ticker_symbol, start=start_date, end=end_date)
data.to_csv('AAPL.csv')

[*********************100%***********************]  1 of 1 completed


In [392]:
# Creating new column that tells whether the stock price went up or down
# Calculate the price differences
data['Price_Difference'] = data['Close'].diff()

# Create a new column indicating the direction of the stock movement: 0 represents up, 1 represents down
data['Stock_Movement'] = np.where(data['Price_Difference'] > 0, 1, 0)
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Price_Difference,Stock_Movement
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-09-24,145.660004,147.470001,145.559998,146.919998,145.435806,53477900,,0
2021-09-27,145.470001,145.960007,143.820007,145.369995,143.901489,74150700,-1.550003,0
2021-09-28,143.25,144.75,141.690002,141.910004,140.47644,108972300,-3.459991,0
2021-09-29,142.470001,144.449997,142.029999,142.830002,141.387131,74602000,0.919998,1
2021-09-30,143.660004,144.380005,141.279999,141.5,140.070572,89056700,-1.330002,0


In [393]:
X = data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
y = data['Stock_Movement']

In [431]:
# Initialize stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=8, shuffle=True, random_state=42)

In [432]:
# Initialize a list to store the accuracy scores
accuracy_scores = []

In [433]:
model_rf = RandomForestClassifier()
model_xgb = XGBClassifier()
model_et = ExtraTreesClassifier()
model_bagging = BaggingClassifier()

In [434]:
voting_model = VotingClassifier(
    estimators=[
        ('rf', model_rf),
        ('xgb', model_xgb),
        ('et', model_et),
        ('bagging', model_bagging),
    ],
    voting='hard'  # Majority voting based on predicted class labels
)

In [435]:
# Perform stratified k-fold cross-validation
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit each individual model using the training data
    model_rf.fit(X_train, y_train)
    model_xgb.fit(X_train, y_train)
    model_et.fit(X_train, y_train)
    model_bagging.fit(X_train, y_train)

    # Train the Voting Classifier model using the fitted individual models
    voting_model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = voting_model.predict(X_test)

    # Calculate the accuracy of the Voting Classifier model
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)


In [436]:
# Calculate the mean accuracy across all folds for the Voting Classifier
mean_accuracy = np.mean(accuracy_scores)
print('Voting Classifier Mean Accuracy:', mean_accuracy)

Voting Classifier Mean Accuracy: 0.7622159090909091
