In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf

from arch import arch_model
from sklearn.model_selection import train_test_split
from scipy.stats import chi2, jarque_bera
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import acf, adfuller
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.arima.model import ARIMA


### Data Import

In [8]:
import pandas as pd
import yfinance as yf
import os

# Define the 5 stock tickers (can be randomized)
tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "TSLA"]

stock_data = {}

for ticker in tickers:
    csv_filepath = f"data/{ticker}.csv"  
    try:
        df = pd.read_csv(csv_filepath, index_col="Date", parse_dates=True)
        print(f"Data loaded successfully from {csv_filepath}")
    except FileNotFoundError:
        print(f"Error: CSV file not found at {csv_filepath}. Downloading data from Yahoo Finance...")
        stock = yf.Ticker(ticker)
        hist = stock.history(start="2010-01-01", end="2025-03-07")
        df = pd.DataFrame(hist, columns=["Close"])
        df.to_csv(csv_filepath)
        print(f"Data for {ticker} downloaded and saved to {csv_filepath}")
    # Keep only the Close price
    df.rename(columns={"Close": ticker}, inplace=True)
    stock_data[ticker] = df[[ticker]]  


df_combined = pd.concat(stock_data.values(), axis=1)
df_combined.dropna(inplace=True)

print(df_combined.head())


Data loaded successfully from data/AAPL.csv
Data loaded successfully from data/MSFT.csv
Data loaded successfully from data/GOOGL.csv
Data loaded successfully from data/AMZN.csv
Data loaded successfully from data/TSLA.csv
                               AAPL       MSFT      GOOGL    AMZN      TSLA
Date                                                                       
2010-06-29 00:00:00-04:00  7.709079  17.675097  11.326808  5.4305  1.592667
2010-06-30 00:00:00-04:00  7.569441  17.447613  11.094666  5.4630  1.588667
2010-07-01 00:00:00-04:00  7.477657  17.561352  10.958523  5.5480  1.464000
2010-07-02 00:00:00-04:00  7.431316  17.644762  10.885215  5.4570  1.280000
2010-07-06 00:00:00-04:00  7.482172  18.061806  10.873248  5.5030  1.074000
