# Random Forest Time Series Forecasting for NVIDIA Stock Prices

## Data Extraction

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
from datetime import datetime, timedelta
import math

import yfinance as yf

In [None]:
# Define the stock list
stocks_list = ['NVDA']

# Define the date range
end = datetime.now()
start = datetime(end.year - 2, end.month, end.day)

# Download stock data
for stock in stocks_list:
    globals()[stock] = yf.download(stock, start, end)

# Reset the index and rename columns
NVDA.reset_index(inplace=True)
NVDA.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

# Check for NaN values in each column
print("\nNumber of NaN values in each column:")
print(NVDA.isna().sum())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume,company_name,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 7_level_1,NVDA,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2024-10-15 00:00:00+00:00,,,,,,,NVIDA,131.600006,131.600006,138.570007,128.740005,137.869995,377831000.0
2024-10-16 00:00:00+00:00,,,,,,,NVIDA,135.720001,135.720001,136.619995,131.580002,133.979996,264879700.0
2024-10-17 00:00:00+00:00,,,,,,,NVIDA,136.929993,136.929993,140.889999,136.869995,139.339996,306435900.0
2024-10-18 00:00:00+00:00,,,,,,,NVIDA,138.0,138.0,138.899994,137.279999,138.669998,176090200.0
2024-10-21 00:00:00+00:00,,,,,,,NVIDA,143.710007,143.710007,143.710007,138.0,138.130005,264554500.0
2024-10-22 00:00:00+00:00,,,,,,,NVIDA,143.589996,143.589996,144.419998,141.779999,142.910004,226311600.0
2024-10-23 00:00:00+00:00,,,,,,,NVIDA,139.559998,139.559998,142.429993,137.460007,142.029999,285930000.0
2024-10-24 00:00:00+00:00,,,,,,,NVIDA,140.410004,140.410004,141.350006,138.460007,140.820007,172354900.0
2024-10-25 00:00:00+00:00,,,,,,,NVIDA,141.539993,141.539993,144.130005,140.800003,140.929993,205122100.0
2024-10-28 00:00:00+00:00,,,,,,,NVIDA,140.520004,140.520004,143.139999,140.050003,143.0,173031000.0


## Data Pre-Processing

In [None]:
# Create a new data frame with only the 'Close' column
nvidia_data = NVDA[['Close']]

# Drop rows where 'Close' is NaN
nvidia_data = nvidia_data.dropna()

# Convert the DataFrame to a NumPy array
data = nvidia_data.values

In [None]:
# Standardize the data (standardization is less sensitive to outliers compared to normalization)
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

In [None]:
# Split the scaled data into training and testing sets
training_data_len = int(np.ceil(len(scaled_data) * .8))
train_data = scaled_data[0:int(training_data_len), :]

# Split the data into x_train, y_train datasets
x_train, y_train = [], []
for i in range(60, len(train_data)):
    x_train.append(train_data[i - 60:i, 0])
    y_train.append(train_data[i,0])

# Convert the x_train and y_train to numppy array and reshape the data
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape

## Random Forest Regressor