In [None]:
# numpy and pandas will be used for data manipulation
import numpy as np
import pandas as pd

# matplotlib will be used for visually representing our data
import matplotlib.pyplot as plt

# Yfinance will be used for importing historical oil prices
import yfinance as yfin

In [None]:
# Setting our ticker
ticker = 'AMZN'
ticker = yfin.Ticker(ticker)

# Importing our data
data = ticker.history(period='5y')

In [None]:
# Setting the text on the Y-axis
plt.ylabel("Amazon Stock Prices")

# Setting the size of our graph
data['Close'].plot(figsize=(10,5))

In [None]:
data['MA_20'] = data['Close'].shift(1).rolling(window=20).mean()
data['MA_50']= data['Close'].shift(1).rolling(window=50).mean()
data['EMA_20'] = data['Close'].ewm(span=20, adjust=False).mean()

In [None]:
data.head(20)

In [None]:
# Dropping the NaN values
data = data.dropna()

# Initialising X and assigning the two feature variables
X = data[['EMA_20','MA_50']]

# Getting the head of the data
X.head()

In [None]:
# Dropping the NaN values
data = data.dropna()

# Initialising X and assigning the feature variables
X = data[['MA_20','EMA_20','MA_50', 'Close']]

# Getting the head of the data
X.head()

In [None]:
# Setting the training set to 70% of the data
training = 0.7
t = int(training*len(X))

# Training dataset
data_train= X[:t]

# Testing dataset
data_test = X[t:]

In [None]:
X[:t]

In [None]:
X[t:]

In [None]:
from pycaret.regression import *
s = setup(
    data=data[['MA_20','MA_50','EMA_20','Close']],
    target='Close',
    session_id=123,
    numeric_features=['MA_20','MA_50','EMA_20'],
    fold_strategy='timeseries', # Ensures temporal integrity
    data_split_shuffle=False,     # Prevents random shuffling
    n_jobs=-1
)

In [None]:
# Create a Linear Regression model
lr = create_model('lr')

In [None]:
tuned_lr = tune_model(lr)

In [None]:
models()

In [None]:
#Define Random Forest Regressor
rf = create_model('rf')

In [None]:
#Train & Test Random Forest Regressor
tuned_rf = tune_model(rf)


In [None]:
print(tuned_rf)

In [None]:
plot_model(tuned_rf)

In [None]:
plot_model(tuned_rf, plot = 'error')

In [None]:
plot_model(tuned_rf, plot = 'feature')

In [None]:
evaluate_model(tuned_rf)

In [None]:
predict_model(tuned_rf)

In [None]:
final_rf = finalize_model(tuned_rf)

In [None]:
print(final_rf)

In [None]:
predict_model(final_rf)

In [None]:
unseenrf_predictions = predict_model(final_rf, data=data_test)
unseenrf_predictions

In [None]:
#Visualise Random Forest Regressor
predictedrf_price = pd.DataFrame(unseenrf_predictions,index=unseenrf_predictions.index,columns = ['prediction_label'])
predictedrf_price.plot(figsize=(10,5))
unseenrf_predictions['Close'].plot()
plt.legend(['Predicted Price','Actual Price'])
plt.ylabel("Amazon Stock Prices")
plt.show()

#Comparison of performance against Linear Regression Model - See Cwk Documentation