In [1]:
pip install yfinance pandas numpy matplotlib torch scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd
import os

# Create data directory if it doesn't exist
os.makedirs('data', exist_ok=True)

# Define tickers
tickers = ['AMZN', 'IBM', 'MSFT']

# Download and save each stock's data
for ticker in tickers:
    stock_data = yf.download(ticker, start="2010-01-01", end="2024-01-01")
    stock_data.to_csv(f'data/{ticker}.csv')
    print(f"{ticker} data saved.")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


AMZN data saved.


[*********************100%***********************]  1 of 1 completed


IBM data saved.


[*********************100%***********************]  1 of 1 completed

MSFT data saved.





In [5]:
#Data  preprocessing
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def preprocess_data(file_path, sequence_length=60):
    # Load csv data into data frame
    df = pd.read_csv(file_path)

    # Ensure 'Close' column is numeric and drop NaNs
    df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
    df.dropna(subset=['Close'], inplace=True)

    # Extracts only close price and reshapes it into 2D array((rows,1)) so it works with sckits-learn's scaler
    close_prices = df['Close'].values.reshape(-1, 1)

    # Normalize data close prices to be in between 0 and 1 as it helps to train model speed and accurately
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(close_prices)

    # Create sequences and targets
    X, y = [], []
    for i in range(sequence_length, len(scaled_data)):   
        X.append(scaled_data[i-sequence_length:i, 0])    #sliding window -> takes i-60 to i adn stores in X
        y.append(scaled_data[i, 0])                      #next day's price as prediction

    # Convert to numpy arrays
    X = np.array(X)
    y = np.array(y)

    # Reshape X to be 3D [samples, time_steps, features]
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    return X, y, scaler



In [4]:
X_amzn, y_amzn, scaler_amzn = preprocess_data('data/AMZN.csv')
print(f"X shape: {X_amzn.shape}, y shape: {y_amzn.shape}")

X shape: (3462, 60, 1), y shape: (3462,)
