In [None]:
import yfinance as yf

In [None]:
bse_sensex = yf.Ticker("^BSESN")

In [None]:
bse_sensex = bse_sensex.history(period="max")

In [None]:
bse_sensex

In [None]:
bse_sensex.index

In [None]:
bse_sensex.plot.line(y="Close", use_index=True)

In [None]:
del bse_sensex["Dividends"]
del bse_sensex["Stock Splits"]

In [None]:
bse_sensex["Tomorrow"] = bse_sensex["Close"].shift(-1)

In [None]:
bse_sensex

In [None]:
bse_sensex["Target"] = (bse_sensex["Tomorrow"] > bse_sensex["Close"]).astype(int)

In [None]:
bse_sensex

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=1)

In [None]:
train = bse_sensex.iloc[:-100]
test = bse_sensex.iloc[-100:]
predictors = ["Close", "Volume", "Open", "High", "Low"]
model.fit(train[predictors], train["Target"])

In [None]:
from sklearn.metrics import precision_score
import pandas as pd

In [None]:
preds = model.predict(test[predictors])
preds = pd.Series(preds, index=test.index)
preds

In [None]:
precision_score(test["Target"], preds)

In [None]:
combined  = pd.concat([test["Target"], preds], axis=1)
combined.plot()

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined


In [None]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []
    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        train = train.dropna()
        test = test.dropna()
        if train.empty or test.empty:
            continue
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
    return pd.concat(all_predictions)

In [None]:
predictions = backtest(bse_sensex, model, predictors)

In [None]:
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])

In [None]:
predictions["Target"].value_counts() / predictions.shape[0]

In [None]:
horizons = [2,5,30,60,250,1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = bse_sensex.rolling(horizon).mean()

    ratio_column = f"Close_Ratio_{horizon}"
    bse_sensex[ratio_column] = bse_sensex["Close"] / rolling_averages["Close"]

    trend_column = f"Trend_{horizon}"
    bse_sensex[trend_column] = bse_sensex.shift(1).rolling(horizon).sum()["Target"]

    new_predictors += [ratio_column, trend_column]

In [None]:
bse_sensex.dropna()

In [None]:
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)

In [None]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:,1]
    preds[preds >= .6] = 1
    preds[preds < .6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [None]:
predictions = backtest(bse_sensex, model, new_predictors)

In [None]:
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])