In [1]:
import datetime as dt
import yfinance as yf
import pandas as pd
import numpy as np

## Define techinical indicators

In [2]:
def MACD(DF, a, b, c):
    intra_price = DF.copy()
    intra_price["MA_FAST"] = intra_price["Adj Close"].ewm(span = a, min_periods = a).mean()
    intra_price["MA_SLOW"] = intra_price["Adj Close"].ewm(span = b, min_periods = b).mean()
    intra_price["MACD"] = intra_price["MA_FAST"] - intra_price["MA_SLOW"]
    intra_price["SIGNAL"] = intra_price["Adj Close"].ewm(span = c, min_periods = c).mean()
    intra_price.dropna(inplace=True)
    return intra_price

def SMA(DF, day):
    copy = DF.copy()
    sma = copy.rolling(window=day, min_periods=day).mean() 
    return sma

def EMA(DF, day):
    copy = DF.copy()
    sma = copy.ewm(span=day, min_periods=day).mean() 
    return sma

def ATR(DF, n):
    df = DF.copy()
    df["H-L"] = abs(df["High"] - df["Low"]).shift(1)
    df["H-PC"] = abs(df["High"] - df["Adj Close"]).shift(1)
    df["L-PC"] = abs(df["Low"] - df["Adj Close"]).shift(1)
    df["TR"] = df[["H-L", "H-PC", "L-PC"]].max(axis=1, skipna=False)
    df["ATR"] = df["TR"].rolling(n).mean()
    df2 = df.drop(["H-L", "H-PC", "L-PC"], axis=1)
    return df2

def BollBnd(DF, n):
    df = DF.copy()
    df["MA"] = df["Adj Close"].rolling(n).mean()
    df["BB_UP"] = df["MA"] + 2*df["MA"].rolling(n).std()
    df["BB_DOWN"] = df["MA"] - 2*df["MA"].rolling(n).std()
    df["BB_RANGE"] = df["BB_UP"] - df["BB_DOWN"]
    df.dropna(inplace=True)
    return df

def RSI(DF, n):
    df = DF.copy()
    print(len(df))
    df["delta"] = df["Adj Close"] - df["Adj Close"].shift(1)
    df["gain"] = np.where(df["delta"]>=0, df["delta"], 0)
    df["loss"] = np.where(df["delta"]<0, abs(df["delta"]), 0)
    avg_gain = []
    avg_loss = []
    gain = df["gain"].tolist()
    loss = df["loss"].tolist()
    for i in range(len(df)):
        if i < n:
            avg_gain.append(np.NaN)
            avg_loss.append(np.NaN)
        elif i == n:
            avg_gain.append(df["gain"].rolling(n).mean().tolist()[n])
            avg_loss.append(df["loss"].rolling(n).mean().tolist()[n])
        elif i > n:
            avg_gain.append((avg_gain[i - 1]*(n - 1) + gain[i])/n)
            avg_loss.append((avg_loss[i - 1]*(n - 1) + loss[i])/n)
    df["avg_gain"] = np.array(avg_gain)

    df["avg_loss"] = np.array(avg_loss)
    df["RS"] = df["avg_gain"]/df["avg_loss"]
    df["RSI"] = 100 - (100/(1+df["RS"]))
    return df["RSI"]

def ADX(DF, n):
    df2 = DF.copy()
    df2["TR"] = ATR(df2, n)["TR"]
    df2["DMplus"] = np.where((df2["High"] - df2["High"].shift(1))>(df2["Low"].shift(1)-df2["Low"]), 
                             df2["High"] - df2["High"].shift(1),
                             0)
    df2["DMplus"] = np.where(df2["DMplus"]<0, 0, df2["DMplus"])
    df2["DMminus"] = np.where((df2["Low"].shift(1)-df2["Low"])>(df2["High"] - df2["High"].shift(1)),
                             df2["Low"].shift(1)-df2["Low"],
                             0)
    df2["DMminus"] = np.where(df2["DMminus"]<0, 0, df2["DMminus"])
    TRn=[]
    DMplusN=[]
    DMminusN=[]
    TR = df2["TR"].tolist()
    DMplus = df2['DMplus'].tolist()
    DMminus = df2['DMminus'].tolist()
    for i in range(len(df2)):
        if i<n:
            TRn.append(np.NaN)
            DMplusN.append(np.NaN)
            DMminusN.append(np.NaN)
        elif i == n:
            TRn.append(df2["TR"].rolling(n).sum().tolist()[n])
            DMplusN.append(df2["DMplus"].rolling(n).sum().tolist()[n])
            DMminusN.append(df2["DMminus"].rolling(n).sum().tolist()[n])
        else:
            TRn.append(TRn[i-1] - (TRn[i-1]/14) + TR[i])
            DMplusN.append(DMplusN[i-1] - (DMplusN[i-1]/14) + DMplus[i])
            DMminusN.append(DMminusN[i-1] - (DMminusN[i-1]/14) + DMminus[i])
            
    df2["TRn"] = np.array(TRn)
    df2["DMplusN"] = np.array(DMplusN)
    df2["DMminusN"] = np.array(DMminusN)
    df2["DIplusN"] = (100*df2["DMplusN"]/df2["TRn"])
    df2["DIminusN"] = (100*df2["DMminusN"]/df2["TRn"])
    df2["DIsum"] =  df2["DIplusN"] + df2["DIminusN"] 
    df2["DIdiff"] =  abs(df2["DIplusN"] - df2["DIminusN"])
    df2["DX"] = 100*(df2["DIdiff"]/df2["DIsum"])
    ADX = []
    DX = df2["DX"].tolist()
    for j in range(len(df2)):
        if j < 2*n-1:
            ADX.append(np.NaN)
        elif j == 2*n-1:
            ADX.append(df2["DX"][j-n+1: j+1].mean())
        elif j > 2*n-1:
            ADX.append(((n - 1)*ADX[j-1] + DX[j])/n)
    df2["ADX"] =np.array(ADX)
    return df2["ADX"]
        

## Try on one single stock microsoft for one year

In [3]:
start = dt.datetime.today() - dt.timedelta(360)
end = dt.datetime.today()
MSTF_cl = pd.DataFrame()
MSTF_cl = yf.download("MSFT", start, end)
daily_return = MSTF_cl.pct_change()["Adj Close"]
MSTF_cl["RETURN"] = daily_return

[*********************100%***********************]  1 of 1 completed


In [4]:
MSTF_cl["MACD"] = MACD(MSTF_cl, 12, 26, 9)["SIGNAL"]
MSTF_cl["SMA"] = SMA(daily_return, 5)
MSTF_cl["EMA"] = EMA(daily_return, 5)
MSTF_cl = ATR(MSTF_cl, 20)
MSTF_cl = BollBnd(MSTF_cl,10)
MSTF_cl["RSI"] = np.array(RSI(MSTF_cl, 14))
MSTF_cl["ADX"] = np.array(ADX(MSTF_cl, 14))

223


In [5]:
import seaborn as sns
import numpy as np # linear algebra
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import utils

In [6]:
df = MSTF_cl.dropna(axis=0)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RETURN,MACD,SMA,EMA,TR,ATR,MA,BB_UP,BB_DOWN,BB_RANGE,RSI,ADX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2020-05-19,185.029999,186.600006,183.490005,183.630005,181.782730,26799100,-0.006922,180.616239,0.001283,0.001158,3.150146,5.152773,181.361989,185.976534,176.747444,9.229089,60.210904,22.046414
2020-05-20,184.809998,185.850006,183.940002,185.660004,184.304169,31261300,0.013871,181.353829,0.007082,0.005395,4.817276,5.015636,181.722038,185.696301,177.747775,7.948526,62.816184,21.972207
2020-05-21,185.399994,186.669998,183.289993,183.429993,182.090454,29119500,-0.012011,181.501155,0.003812,-0.000407,1.910004,4.952137,181.755782,185.069834,178.441729,6.628105,59.154061,22.068945
2020-05-22,183.190002,184.460007,182.539993,183.509995,182.169861,20826900,0.000436,181.634896,0.000986,-0.000126,4.579544,4.912892,181.690553,184.275663,179.105443,5.170220,59.245842,21.911200
2020-05-26,186.339996,186.500000,181.100006,181.570007,180.244049,36073600,-0.010572,181.356726,-0.003040,-0.003608,2.290146,4.834900,181.228815,183.190542,179.267088,3.923454,55.961593,22.200123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-22,237.419998,237.929993,232.399994,234.509995,234.509995,36423100,-0.026808,240.658032,-0.008192,-0.011315,3.680008,4.712461,242.234996,245.790636,238.679356,7.111281,48.521085,34.774528
2021-02-23,230.330002,234.830002,228.729996,233.270004,233.270004,30191200,-0.005288,239.180427,-0.008197,-0.009306,5.529999,4.757000,241.370712,244.037338,238.704087,5.333251,46.614107,33.380912
2021-02-24,230.009995,235.199997,229.000000,234.550003,234.550003,26301400,0.005487,238.254342,-0.007971,-0.004375,6.100006,4.784000,240.504727,242.670896,238.338558,4.332337,48.848943,31.975967
2021-02-25,232.080002,234.589996,227.880005,228.990005,228.990005,39481600,-0.023705,236.401475,-0.012376,-0.010818,6.199997,4.889001,239.177524,241.705098,236.649950,5.055147,40.849563,30.930572


In [7]:
y_t = np.array(df['RETURN'])

y_t = np.where(y_t < 0, 0, 1)

X_t = df
X_t = df.drop(["Open", "High", "Low", "Adj Close", "Volume", "RETURN" ],axis=1)
X_t = np.array(X_t)

lab_enc = preprocessing.LabelEncoder()
y_t = lab_enc.fit_transform(y_t)

print("shape of Y :"+ str(y_t.shape))
print("shape of X :"+ str(X_t.shape))

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_t = scaler.fit_transform(X_t)

shape of Y :(196,)
shape of X :(196, 12)


In [8]:
X_train,X_test,Y_train,Y_test = train_test_split(X_t,y_t,test_size=.20,random_state=42)
print("shape of X Train :"+str(X_train.shape))
print("shape of X Test :"+str(X_test.shape))
print("shape of Y Train :"+str(Y_train.shape))
print("shape of Y Test :"+str(Y_test.shape))


print(utils.multiclass.type_of_target(Y_train))
print(utils.multiclass.type_of_target(Y_test))


shape of X Train :(156, 12)
shape of X Test :(40, 12)
shape of Y Train :(156,)
shape of Y Test :(40,)
binary
binary


In [9]:
for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='linear',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("Linear SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))

Linear SVM value of C:1, training score :0.775641 , Test Score: 0.850000 

Linear SVM value of C:3, training score :0.820513 , Test Score: 0.900000 

Linear SVM value of C:5, training score :0.814103 , Test Score: 0.925000 

Linear SVM value of C:10, training score :0.826923 , Test Score: 0.900000 

Linear SVM value of C:40, training score :0.858974 , Test Score: 0.825000 

Linear SVM value of C:60, training score :0.858974 , Test Score: 0.825000 

Linear SVM value of C:80, training score :0.852564 , Test Score: 0.825000 

Linear SVM value of C:100, training score :0.846154 , Test Score: 0.825000 



In [10]:
for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='poly',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("Poly SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))

Poly SVM value of C:1, training score :0.923077 , Test Score: 0.725000 

Poly SVM value of C:3, training score :0.935897 , Test Score: 0.725000 

Poly SVM value of C:5, training score :0.929487 , Test Score: 0.725000 

Poly SVM value of C:10, training score :0.929487 , Test Score: 0.700000 

Poly SVM value of C:40, training score :0.961538 , Test Score: 0.700000 

Poly SVM value of C:60, training score :0.974359 , Test Score: 0.700000 

Poly SVM value of C:80, training score :0.974359 , Test Score: 0.750000 

Poly SVM value of C:100, training score :0.980769 , Test Score: 0.750000 



In [11]:
for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='rbf',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("RBF SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))

RBF SVM value of C:1, training score :0.782051 , Test Score: 0.800000 

RBF SVM value of C:3, training score :0.871795 , Test Score: 0.825000 

RBF SVM value of C:5, training score :0.897436 , Test Score: 0.700000 

RBF SVM value of C:10, training score :0.910256 , Test Score: 0.750000 

RBF SVM value of C:40, training score :0.955128 , Test Score: 0.675000 

RBF SVM value of C:60, training score :0.961538 , Test Score: 0.725000 

RBF SVM value of C:80, training score :0.967949 , Test Score: 0.700000 

RBF SVM value of C:100, training score :0.974359 , Test Score: 0.725000 



## Try on 6 stocks, 1000 days of data
- "MSFT", "TSLA", "^N225", "^DJI", "BTC-USD", "EURUSD=X"

In [12]:
start = dt.datetime.today() - dt.timedelta(1000)
end = dt.datetime.today()

In [13]:
def calTechnicalIndicator(stocks, start, end):
    cl = pd.DataFrame()
    for ticker in stocks:
        MSTF_cl = pd.DataFrame()
        MSTF_cl = yf.download(ticker, start, end)
        daily_return = MSTF_cl.pct_change()["Adj Close"]
        MSTF_cl["RETURN"] = daily_return
        MSTF_cl["MACD"] = MACD(MSTF_cl, 12, 26, 9)["SIGNAL"]
        MSTF_cl["SMA"] = SMA(daily_return, 5)
        MSTF_cl["EMA"] = EMA(daily_return, 5)
        MSTF_cl = ATR(MSTF_cl, 20)
        MSTF_cl = BollBnd(MSTF_cl,10)
        MSTF_cl["RSI"] = np.array(RSI(MSTF_cl, 14))
        MSTF_cl["ADX"] = np.array(ADX(MSTF_cl, 14))
        cl = cl.append(MSTF_cl)
    return cl

In [14]:
stocks = ["MSFT", "TSLA", "^N225", "^DJI", "BTC-USD", "EURUSD=X" ]

In [15]:
df = calTechnicalIndicator(stocks, start, end)

[*********************100%***********************]  1 of 1 completed
664
[*********************100%***********************]  1 of 1 completed
664
[*********************100%***********************]  1 of 1 completed
639
[*********************100%***********************]  1 of 1 completed
664
[*********************100%***********************]  1 of 1 completed
972
[*********************100%***********************]  1 of 1 completed
669


In [16]:
df.dropna(inplace=True)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RETURN,MACD,SMA,EMA,TR,ATR,MA,BB_UP,BB_DOWN,BB_RANGE,RSI,ADX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-08-16,108.300003,108.860001,107.300003,107.639999,104.268089,21384300,-0.000186,104.633042,-0.002925,-0.001876,4.702538,4.596897,104.901703,105.697325,104.106081,1.591244,57.992994,27.771456
2018-08-17,107.360001,107.900002,106.690002,107.580002,104.209976,18061500,-0.000557,104.548428,-0.001815,-0.001436,4.591911,4.598088,104.897265,105.744284,104.050246,1.694037,57.710991,26.250663
2018-08-20,107.510002,107.900002,106.480003,106.870003,103.522209,17914200,-0.006600,104.343183,-0.001686,-0.003157,3.690025,4.499908,104.815366,105.650653,103.980080,1.670573,54.342974,24.679578
2018-08-21,106.919998,107.349998,105.849998,105.980003,102.660103,22881900,-0.008328,104.006566,-0.005846,-0.004881,4.377792,4.521138,104.574884,105.367197,103.782571,1.584627,50.374440,23.083707
2018-08-22,105.849998,107.339996,105.779999,107.059998,103.706261,18000600,0.010191,103.946505,-0.001096,0.000143,4.689896,4.509016,104.380155,105.076521,103.683790,1.392731,54.697852,21.654217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-22,1.212445,1.215510,1.209219,1.212636,1.212636,0,0.002850,1.209875,0.000075,0.001020,0.006472,0.006203,1.210393,1.212770,1.208017,0.004754,52.073570,12.253703
2021-02-23,1.216693,1.217878,1.213681,1.216680,1.216680,0,0.003335,1.211236,0.000575,0.001792,0.006291,0.006173,1.211525,1.214715,1.208336,0.006380,56.159439,11.900740
2021-02-24,1.215200,1.217404,1.211130,1.215214,1.215214,0,-0.001204,1.212032,0.000989,0.000793,0.004198,0.006050,1.211847,1.215486,1.208208,0.007278,54.351492,11.069688
2021-02-25,1.216951,1.224155,1.215658,1.217137,1.217137,0,0.001582,1.213053,0.002041,0.001056,0.006274,0.005817,1.212349,1.216306,1.208391,0.007915,56.337917,11.421201


In [17]:
y_t = np.array(df['RETURN'])

y_t = np.where(y_t < 0, 0, 1)

X_t = df
X_t = df.drop(["Open", "High", "Low", "Adj Close", "Volume", "RETURN" ],axis=1)
X_t = np.array(X_t)

lab_enc = preprocessing.LabelEncoder()
y_t = lab_enc.fit_transform(y_t)

print("shape of Y :"+ str(y_t.shape))
print("shape of X :"+ str(X_t.shape))

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_t = scaler.fit_transform(X_t)

shape of Y :(4110,)
shape of X :(4110, 12)


In [18]:
X_train,X_test,Y_train,Y_test = train_test_split(X_t,y_t,test_size=.20,random_state=42)
print("shape of X Train :"+str(X_train.shape))
print("shape of X Test :"+str(X_test.shape))
print("shape of Y Train :"+str(Y_train.shape))
print("shape of Y Test :"+str(Y_test.shape))


print(utils.multiclass.type_of_target(Y_train))
print(utils.multiclass.type_of_target(Y_test))

shape of X Train :(3288, 12)
shape of X Test :(822, 12)
shape of Y Train :(3288,)
shape of Y Test :(822,)
binary
binary


In [19]:
for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='linear',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("Linear SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))

for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='poly',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("Poly SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))
    
for this_C in [1,3,5,10,40,60,80,100]:
    clf = SVC(kernel='rbf',C=this_C).fit(X_train,Y_train)
    scoretrain = clf.score(X_train,Y_train)
    scoretest  = clf.score(X_test,Y_test)
    print("RBF SVM value of C:{}, training score :{:2f} , Test Score: {:2f} \n".format(this_C,scoretrain,scoretest))

Linear SVM value of C:1, training score :0.680353 , Test Score: 0.680049 

Linear SVM value of C:3, training score :0.714720 , Test Score: 0.722628 

Linear SVM value of C:5, training score :0.723236 , Test Score: 0.738443 

Linear SVM value of C:10, training score :0.745742 , Test Score: 0.753041 

Linear SVM value of C:40, training score :0.768248 , Test Score: 0.762774 

Linear SVM value of C:60, training score :0.773114 , Test Score: 0.772506 

Linear SVM value of C:80, training score :0.774027 , Test Score: 0.774939 

Linear SVM value of C:100, training score :0.772506 , Test Score: 0.779805 

Poly SVM value of C:1, training score :0.744526 , Test Score: 0.734793 

Poly SVM value of C:3, training score :0.761253 , Test Score: 0.754258 

Poly SVM value of C:5, training score :0.768248 , Test Score: 0.757908 

Poly SVM value of C:10, training score :0.776460 , Test Score: 0.773723 

Poly SVM value of C:40, training score :0.788625 , Test Score: 0.774939 

Poly SVM value of C:60, tra