In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error



2024-03-21 18:18:11.985975: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
def get_yahoo_finance_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

In [10]:
def preprocess_data(data):
    # Drop any missing values
    data.dropna(inplace=True)
    # Normalize the data
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler

In [21]:
def train_test_split(data, train_size):
    train = data[:train_size]
    test = data[train_size:]
    print("Start")
    print(train)
    print("end")
    return train, test

In [22]:
def train_arima_model(train):
    history = [x for x in train]
    predictions = []
    for t in range(len(train)):
        model = ARIMA(history, order=(5,1,0))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        history.append(test[t])
    return predictions

In [23]:
def create_ann_model(train, test):
    X_train, y_train = train[:, :-1], train[:, -1]
    X_test, y_test = test[:, :-1], test[:, -1]

    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)

    predictions = model.predict(X_test)

    return predictions

In [24]:
def hybrid_model(train, test):
    arima_predictions = train_arima_model(train)
    ann_predictions = create_ann_model(train, test)

    combined_predictions = (np.array(arima_predictions) + np.array(ann_predictions)) / 2

    return combined_predictions

In [25]:
def main():
    # Get data from Yahoo Finance
    ticker = 'AAPL'  # Example: Apple Inc.
    start_date = '2020-01-01'
    end_date = '2021-01-01'
    data = get_yahoo_finance_data(ticker, start_date, end_date)

    # Preprocess data
    scaled_data, scaler = preprocess_data(data)

    # Split data into train and test sets
    train_size = int(len(scaled_data) * 0.8)
    train_data, test_data = train_test_split(scaled_data, train_size)

    # Make predictions using ARIMA-ANN hybrid model
    predictions = hybrid_model(train_data, test_data)

    # Invert predictions
    predictions = scaler.inverse_transform(np.concatenate((train_data, predictions), axis=1))[:, -1]

    # Evaluate the model
    test_actual = scaler.inverse_transform(test_data)[:, -1]
    rmse = np.sqrt(mean_squared_error(test_actual, predictions))
    print(f'Root Mean Squared Error (RMSE): {rmse}')

if __name__ == "__main__":
    main()

[*********************100%%**********************]  1 of 1 completed

Start
[[0.21029244 0.22071883 0.25428793 0.23567731 0.23102334 0.23376706]
 [0.21310006 0.22065754 0.2583218  0.22661991 0.22208209 0.26231331]
 [0.20273357 0.21875956 0.24677445 0.23397124 0.22933937 0.18876348]
 ...
 [0.76144636 0.78460788 0.80058512 0.80173079 0.80001564 0.17341932]
 [0.79303957 0.78889375 0.8087144  0.78076237 0.77917349 0.18088235]
 [0.77674931 0.77505668 0.7699154  0.74304416 0.74168228 0.19469289]]
end





ValueError: SARIMAX models require univariate `endog`. Got shape (202, 6).