This is a classification based algorithm. It takes data from today such as OHLCV, as well values of the ATR,EMA, and CMO indicators at the day's end. Based on this data, it calculates the return of the next day. This is then sent to a classification model, which analyzes the data and tells whether a long or short position should be taken

In [1]:
pip install python-dateutil

Note: you may need to restart the kernel to use updated packages.


In [72]:
#importing non sklearn libraries 
import numpy as np
import pandas as pd
import talib
import yfinance as yf
from datetime import datetime,timedelta
from dateutil.relativedelta import relativedelta
import joblib
#importing sklearn libraries 
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split,cross_validate,GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


In [73]:
def linreg(df): #linear regression - regression model 
    x=df.iloc[:,0:-1]
    y=df.iloc[:,-1]
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
    pipe=Pipeline([
        ('scaler',StandardScaler()),
        ('linreg',LinearRegression())
    ])
    pipe.fit(x_train,y_train)
    pred=pipe.predict(x_test)
    mse=mean_squared_error(y_test,pred)
    return pipe

In [74]:
def forestreg(df): #random forest regressor - regression model
    x=df.iloc[:,0:-1]
    y=df.iloc[:,-1]
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
    hyperparam={'forest__n_estimators':[1000,2000,5000]}
    pipe=Pipeline([
        ('scaler',StandardScaler()),
        ('forest',RandomForestRegressor(n_jobs=-1))
    ])
    gs=GridSearchCV(cv=5,estimator=pipe,param_grid=hyperparam)
    gs.fit(x_train,y_train)
    best=gs.best_estimator_
    best.fit(x_train,y_train)
    pred=best.predict(x_test)
    mse=mean_squared_error(y_test,pred)
    return best

In [75]:
def forestclas(df): #random forest classifier-classification model 
    x=df.iloc[:,0:-1]
    y=df.iloc[:,-1]
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
    hyperparam={'forest__n_estimators':[1000,2000,5000]}
    pipe=Pipeline([
        ('scaler',StandardScaler()),
        ('forest',RandomForestClassifier(n_jobs=-1))
    ])
    gs=GridSearchCV(cv=5,estimator=pipe,param_grid=hyperparam)
    gs.fit(x_train,y_train)
    best=gs.best_estimator_
    best.fit(x_train,y_train)
    pred=best.predict(x_test)
    return best

In [76]:
def dataprepare(ticker,startdate,enddate):
    
    df=yf.download(ticker,startdate,enddate) #downloading OHLCV data from yahoo finance 
    df['CMO']=talib.CMO(df['Close'],timeperiod=20) #Calculating CMO value
    df['EMA']=talib.EMA(df['Close'],timeperiod=20) #Calculating EMA value
    df['ATR']=talib.ATR(df['High'],df['Low'],df['Close'],timeperiod=20) #calculating ATR value
    df['daily_buying_return']= (df['Close']-df['Open'])/df['Open']
    df['tomorrow_buying_return']=df['daily_buying_return'].shift(-1) #calculating tomorrow's buying return 
    df.pop('daily_buying_return')
    df=df.dropna() #data for regression
    df2=df.copy()
    avg_r = df2['tomorrow_buying_return'].mean() #mean value for
    std_r = df2['tomorrow_buying_return'].std()  #standard deviation value
    df2['position']=np.nan
    le= df['tomorrow_buying_return']>avg_r+std_r #condition for long entry 
    se= df['tomorrow_buying_return']<avg_r-std_r #condition for short entry 
    df2.loc[le,'position']=1
    df2.loc[se,'position']=-1
    df2=df2.dropna() #data for classification
    return df,df2
        

In [1]:
def model():
    ticker=input("Enter ticker")
    enddate=input("Enter present date in yy-mm-dd format")
    end_date_obj = datetime.strptime(enddate, '%Y-%m-%d')
    start_date_obj = end_date_obj - relativedelta(years=1) #date one year from the date entered by user
    startdate = start_date_obj.strftime('%Y-%m-%d')
    
    d1,d2=dataprepare(ticker,startdate,enddate)#preparing the data for regression and classification
    
    linregmodel=linreg(d1) #linear regression model 
    rfrmodel=forestreg(d1) #random forest regressor model
    rfcmodel=forestclas(d2) #random forest classifier model

    #saving the models
    joblib.dump(linregmodel,'linearregressionmodel.joblib')
    joblib.dump(rfrmodel,'forestregressormodel.joblib')
    joblib.dump(rfcmodel,'forestclassifiermodel.joblib')
    #loading the models
    m1=joblib.load('linearregressionmodel.joblib')
    m2=joblib.load('forestregressormodel.joblib')
    m3=joblib.load('forestclassifiermodel.joblib')
    #getting new data 
    o=float(input("enter today's open"))
    h=float(input("enter today's high"))
    l=float(input("enter today's low"))
    c=float(input("enter today's close"))
    ac=float(input("enter today's adjusted close"))
    v=float(input("enter today's volume"))
    atr=float(input("enter today's ATR"))
    ema=float(input("enter today's EMA"))
    cmo=float(input("enter today's CMO"))
    #regression predictions 
    pred1=m1.predict([[o,h,l,c,ac,v,ema,cmo,atr]])
    pred2=m2.predict([[o,h,l,c,ac,v,ema,cmo,atr]])
    buying_return=np.round(pred1[0]+pred2[0]/2,2)
    print(buying_return)
    #classification prediction
    pred3=m3.predict([[o,h,l,c,ac,v,ema,cmo,atr,buying_return]])
    if pred3==1:
        print("Buy")
    else:
        print("Don't Buy")
    

In [None]:
def mainmenu():
    model()
mainmenu()