In [None]:
import yfinance as yf
import pandas as pd


#loop through all indices
df_list = pd.read_html('https://finance.yahoo.com/world-indices/')
majorStockIdx = df_list[0]
majorStockIdx.head()

stock_list = []
for s in majorStockIdx.Symbol:
    #get the data for this ticker
    tickerData = yf.Ticker(s)
    #get the historical prices for this ticker
    tickerDf1 = tickerData.history(period='1d', start='2010-1-1', end='2020-1-25')

    tickerDf1['ticker'] = s
    stock_list.append(tickerDf1)
msi = pd.concat(stock_list, axis = 0)

region_idx= { 'US & Canada' : ['^GSPC', '^DJI', '^IXIC', '^RUT','^GSPTSE'],
  'Latin America' : ['^BVSP', '^MXX', '^IPSA'],
  'East Asia' : ['^N225', '^HSI', '000001.SS', '399001.SZ', '^TWII', '^KS11'],
  'ASEAN & Oceania' : ['^STI', '^JKSE', '^KLSE','^AXJO',  '^NZ50'],
  'South & West Asia' : ['^BSESN', '^TA125.TA'],
  'Europe' : ['^FTSE', '^GDAXI', '^FCHI', '^STOXX50E','^N100', '^BFX']
}
def getRegion(ticker):
    for k in region_idx.keys():
        if ticker in region_idx[k]:
            return k
msi['region']= msi.ticker.apply(lambda x: getRegion(x))







In [None]:

tickerDf1

In [177]:
del tickerDf1["Dividends"]
del tickerDf1["Stock Splits"]

In [178]:
tickerDf1["Tomorrow"] = tickerDf1["Close"].shift(-1)

In [None]:
tickerDf1["Target"] = (tickerDf1["Tomorrow"] > tickerDf1["Close"]).astype(int)
tickerDf1

In [180]:
tickerDf1 = tickerDf1.loc["1991-01-01":].copy()

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=1)

train = tickerDf1.iloc[:-100]
test = tickerDf1.iloc[-100:]

predictors = ["Close", "Volume", "Open", "High", "Low"]
model.fit(train[predictors], train["Target"])

RandomForestClassifier(min_samples_split=100, random_state=1)


In [182]:
from sklearn.metrics import precision_score

preds = model.predict(test[predictors])

In [183]:
import pandas as pd
preds = pd.Series(preds, index=test.index)

In [184]:
precision_score(test["Target"], preds) 

0.5849056603773585

In [185]:
combined = pd.concat([test["Target"], preds], axis=1)


In [153]:
#combined.plot()

In [186]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [188]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []

    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
    
    return pd.concat(all_predictions)

In [None]:
tickerDf1

In [None]:
predictions = backtest(tickerDf1, model, new_predictors)

In [None]:
predictions["Predictions"].value_counts()

In [None]:
#mean of close price in last two days, last trade of the week, past three months, past 4 years
horizons = [2, 5, 60, 250, 1000]
new_predictors = []

for horizon in horizons:
    rolling_averages = tickerDf1.rolling(horizon).mean()

    ratio_column = f"Close_Ratio_{horizon}"
    tickerDf1[ratio_column] = tickerDf1["Close"] / rolling_averages["Close"]

    trend_column = f"Trend_{horizon}"
    tickerDf1[trend_column] = tickerDf1.shift(1).rolling(horizon).sum()["Target"]

    new_predictors += [ratio_column, trend_column]

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])

In [None]:
predictions["Target"].value_counts() / predictions.shape[0]

In [199]:
tickerDf1 = tickerDf1.dropna()

In [200]:
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)


In [201]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:,1]
    preds[preds>=0.6] = 1
    preds[preds<0.6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [None]:
predictions = backtest(tickerDf1, model, new_predictors)

In [None]:
predictions["Predictions"].value_counts()

In [None]:
precision_score(predictions["Target"], predictions["Predictions"])