![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

<a href="https://hub.callysto.ca/jupyter/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2Fcallysto%2Fdata-viz-of-the-week&branch=main&subPath=world-childrens-day/world-childrens-day.ipynb&depth=1" target="_parent"><img src="https://raw.githubusercontent.com/callysto/curriculum-notebooks/master/open-in-callysto-button.svg?sanitize=true" width="123" height="24" alt="Open in Callysto"/></a>

In [None]:
from datetime import date
from datetime import timedelta
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score

try:
    from prophet import Prophet
    from prophet.plot import plot_plotly
except ImportError:
    !pip install prophet
    from prophet import Prophet
    from prophet.plot import plot_plotly

try:
    import yfinance as yf
except:
    !pip install yfinance
    import yfinance as yf

print("Libaries imported.")

In [None]:
sp500 = yf.Ticker("^GSPC")
sp500 = sp500.history(period="max")
sp500

In [None]:
sp500_plots = make_subplots(rows=1, cols=2, shared_yaxes=True, subplot_titles=("Open Price for S&P 500", "Close Price for S&P 500"))
sp500_plots.add_trace(px.line(sp500, x=sp500.index, y="Open").data[0], row=1, col=1)
sp500_plots.add_trace(px.line(sp500, x=sp500.index, y="Close").data[0], row=1, col=2)

sp500_plots.update_layout(title="S&P 500 Open and Close Price").show()

In [None]:
def fetch_stock_data(symbol):
    try:
        # Get stock data 
        symbol_data = yf.Ticker(symbol)
        stock_data = symbol_data.history(period="max")
        return stock_data
        # Invalid stock
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

In [None]:
symbols = ['^IXIC']

for symbol in symbols:
    data = fetch_stock_data(symbol)
    
    if data is not None:
        print(f"\n{symbol} Stock Data:\n")
        display(data)

In [None]:
data["Tomorrow"] = data["Close"].shift(-1)
data["Target"] = (data["Tomorrow"] > data["Close"]).astype(int)
data

In [None]:
data = data[data.index >= "2000-01-01"]
data

In [None]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=42)

train = data.iloc[:-100]
test = data.iloc[-100:]

predictors = ["Open", "High", "Low", "Close", "Volume"]
model.fit(train[predictors], train["Target"])

In [None]:
predictions = model.predict(test[predictors])
predictions = pd.Series(predictions, index=test.index)
precision_score(test["Target"], predictions)

In [None]:
combined = pd.concat([test["Target"], predictions], axis=1)
combined.rename(columns={"Target": "Actual", 0: "Predicted"}, inplace=True)
combined

In [None]:
px.line(combined, x=combined.index, y=['Actual', 'Predicted'], labels={'index': 'Date', 'value': 'Values'},title='Actual vs Predicted').show()

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

def backtest(data, model, predictors, start=2500, step=250):
    predictions_list = []  
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions_list.append(predict(train, test, predictors, model))
    predictions = pd.concat(predictions_list)
    return predictions

print("Functions defined.")

In [None]:
predictions = backtest(data, model, predictors)
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])

In [None]:
horizons = [2,5,60,250,1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = data.rolling(horizon).mean()
    ratio_column = f"Close_ratio_{horizon}"
    data[ratio_column] = data["Close"] / rolling_averages["Close"]
    trend_column = f"Trend_{horizon}"
    data[trend_column] = data.shift(1).rolling(horizon).sum()["Target"]

    new_predictors += [ratio_column, trend_column]

data = data.dropna()
data

In [None]:
model = RandomForestClassifier(n_estimators=200, min_samples_split=100, random_state=42)

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:,1]
    preds[preds >= .6] = 1
    preds[preds < .6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [None]:
predictions = backtest(data, model, new_predictors)
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])

In [None]:
px.line(combined, x=combined.index, y=['Actual', 'Predicted'], labels={'index': 'Date', 'value': 'Values'},title='Actual vs Predicted').show()