In [95]:
import os
import sys
import requests as req
import json

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from datetime import datetime as dt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report

pd.options.mode.chained_assignment = None

In [96]:
import requests

In [97]:
resp = req.get('http://localhost:8081/stocks', headers={
    'x-service-token': '63071cfea6be52aadcd12e4170499a339eb5f481f2fa99b6d265ec335581ef77',
})

print(resp.status_code)

200


In [98]:
content = resp.json()

stocks_df = pd.DataFrame(content['stocks'])

In [99]:
date_format = "%Y-%m-%d"

stocks_df['stocked_time'] = pd.to_datetime(
    stocks_df['stocked_time'],
)
stocks_df['stocked_time'] = stocks_df['stocked_time'].dt.strftime(date_format)

In [100]:
stocks_df.sort_values(
    by=[
        'ticker_id',
        'stocked_time',
    ],
    inplace=True,
)

In [101]:
past_date_data = stocks_df.query(f"stocked_time == '2023-05-12'") 
past_date_data.head()

Unnamed: 0,ticker_id,open_price,close_price,highest_price,lowest_price,trading_volume,stocked_time,created_at
15763,A,128.68,127.49,128.68,126.525,1422587,2023-05-12,2023-05-14T13:30:03.140209Z
15764,AA,128.68,127.49,128.68,126.525,1422587,2023-05-12,2023-05-14T13:31:05.253728Z
15765,AAA,24.5899,24.55,24.5899,24.55,1480,2023-05-12,2023-05-14T13:32:05.965977Z
15766,AAAU,20.01,19.94,20.05,19.9197,461356,2023-05-12,2023-05-14T13:33:09.390369Z
15767,AABB,0.0289,0.0345,0.0394,0.0285,29554762,2023-05-12,2023-05-14T13:34:10.254875Z


#### data smooth

In [102]:
aplha_factor = 0.0095

smoothed_df = stocks_df.groupby('ticker_id')[[
    'open_price', 
    'close_price', 
    'highest_price',
    'lowest_price',
    'trading_volume',
]].transform(
    lambda x: x.ewm(alpha=aplha_factor).mean()
)

smoothed_df = pd.concat([
    stocks_df[['ticker_id', 'stocked_time']],
    smoothed_df
], axis=1, sort=False)

stocks_df = smoothed_df

In [103]:
# stocks_df.head()

In [104]:
stocks_df['price_change'] = stocks_df['close_price'].diff()

In [105]:
mask = stocks_df['ticker_id'] != stocks_df['ticker_id'].shift(1)

stocks_df['price_change'] = np.where(
    mask == True,
    np.nan,
    stocks_df['price_change'],
)

In [106]:
stocks_df[stocks_df.isna().any(axis=1)]

Unnamed: 0,ticker_id,stocked_time,open_price,close_price,highest_price,lowest_price,trading_volume,price_change
0,A,2022-04-18,126.440000,123.840000,126.750000,123.130000,1597266.0,
250,AA,2022-04-18,126.440000,123.840000,126.750000,123.130000,1597266.0,
500,AAA,2022-04-18,24.888300,24.868300,24.910000,24.861100,904.0,
750,AAALY,2022-09-08,34.000000,34.000000,34.000000,34.000000,105.0,
752,AAAU,2022-04-18,19.790000,19.650000,19.810100,19.635000,513638.0,
...,...,...,...,...,...,...,...,...
173766,ANTH,2022-06-09,0.000001,0.000001,0.000001,0.000001,187.0,
173822,ANTI,2022-08-02,0.000001,0.000001,0.000001,0.000001,20000.0,
173825,ANTMF,2022-06-13,0.045400,0.045400,0.045400,0.045400,1667.0,
148023,IBM,2022-05-16,133.100000,135.030000,136.510000,132.410000,4250395.0,


In [107]:
period = 27

sample_df = stocks_df[['ticker_id', 'price_change']].copy()

up_df = sample_df.copy()
up_df.loc['price_change'] = up_df.loc[(up_df['price_change'] < 0), 'price_change'] = 0

down_df = sample_df.copy()
down_df.loc['price_change'] = down_df.loc[(down_df['price_change'] > 0), 'price_change'] = 0

down_df['price_change'] = down_df['price_change'].abs()

def form_moving_window(df, n):
    return df.groupby('ticker_id')['price_change'].transform(
        lambda x: x.ewm(span=n).mean(),
    )

ewm_up = form_moving_window(up_df, period)
ewm_down = form_moving_window(down_df, period)

relative_strength = ewm_up / ewm_down
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))


In [108]:
stocks_df['rsi_indicator'] = relative_strength_index

In [109]:
# stocks_df.head(n=27)

In [110]:
def calculate_min_for_lowest(df, n):
    lowest_df = df[['ticker_id', 'lowest_price']].copy()

    min_for_lowest = lowest_df.groupby('ticker_id')['lowest_price'].transform(
        lambda x: x.rolling(window=n).min()
    )
    return min_for_lowest

In [111]:
def calculate_max_for_highest(df,  n):
    highest_df = df[['ticker_id', 'highest_price']].copy()

    max_for_highest = highest_df.groupby('ticker_id')['highest_price'].transform(
        lambda x: x.rolling(window=n).max()
    )
    return max_for_highest

In [112]:
period = 14

lowest_df = calculate_min_for_lowest(stocks_df, period)
highest_df = calculate_max_for_highest(stocks_df, period)

In [113]:
close_df = stocks_df['close_price']

stochastic = 100.0 * ((close_df - lowest_df) / (highest_df - lowest_df))

stocks_df['stochastic_indicator'] = stochastic

In [114]:
period = 14

lowest_df = calculate_min_for_lowest(stocks_df, period)
highest_df = calculate_max_for_highest(stocks_df, period)

In [115]:
close_df = stocks_df['close_price']

williams_pr = -100.0 * ((highest_df - close_df) / (highest_df - lowest_df))

stocks_df['williams_indicator'] = williams_pr

In [116]:
ema_26 = stocks_df.groupby('ticker_id')['close_price'].transform(
    lambda x: x.ewm(span=26).mean()
)

ema_12 = stocks_df.groupby('ticker_id')['close_price'].transform(
    lambda x: x.ewm(span=12).mean()
)

macd_indicator = ema_12 - ema_26

sl_indicator = macd_indicator.ewm(span=9).mean()

stocks_df['macd_indicator'] = macd_indicator
stocks_df['sl_indicator'] = sl_indicator

In [117]:
# stocks_df.head(n=2)

In [120]:
period = 21

stocks_df['roc_indicator'] = stocks_df.groupby('ticker_id')['close_price'].transform(
    lambda x: x.pct_change(periods=period)
)

In [121]:
# stocks_df.head(n=30)

In [122]:
close_df = stocks_df.groupby('ticker_id')['close_price'].transform(
    lambda x: np.sign(x.diff()).shift(1)
)

In [123]:
stocks_df['prediction'] = close_df 

stocks_df.loc[stocks_df['prediction'] == 0.0] = 1.0


In [124]:
# stocks_df.head(n=4)

In [125]:
stocks_df

stocks_df = stocks_df.dropna()

In [127]:
past_date_data = stocks_df.query(f"stocked_time == '2023-05-12'") 
past_date_data.head()

Unnamed: 0,ticker_id,stocked_time,open_price,close_price,highest_price,lowest_price,trading_volume,price_change,rsi_indicator,stochastic_indicator,williams_indicator,macd_indicator,sl_indicator,roc_indicator,prediction
15763,A,2023-05-12,139.216358,139.102703,140.643451,137.466719,1458569.0,-0.120633,3.369221,38.680255,-61.319745,-0.33257,-0.271758,-0.008951,-1.0
15764,AA,2023-05-12,121.95027,121.875584,123.287112,120.364793,1973234.0,0.058322,0.943138,10.919758,-89.080242,-4.382724,-3.690893,-0.131687,-1.0
15765,AAA,2023-05-12,24.455564,24.454108,24.4613,24.447446,687.4034,0.000996,99.920203,74.454536,-25.545464,0.007698,0.007767,0.000993,1.0
15766,AAAU,2023-05-12,18.551634,18.559848,18.640637,18.468133,1548967.0,0.014337,99.993925,78.390098,-21.609902,0.109396,0.108021,0.018119,1.0
15767,AABB,2023-05-12,0.036106,0.035944,0.037786,0.034429,7578087.0,-1.5e-05,5.5e-05,31.52388,-68.47612,-0.000941,-0.000996,-0.066971,-1.0


In [128]:
x_samples = stocks_df[[
    'rsi_indicator', 
    'stochastic_indicator', 
    'williams_indicator', 
    'roc_indicator',
    'macd_indicator',
    'sl_indicator',
]]
y_samples = stocks_df['prediction']



x_train, x_test, y_train, y_test = train_test_split(
    x_samples, 
    y_samples, 
    random_state=0,
    shuffle=False, 
    stratify=None,
)

In [129]:
rf_classifier = RandomForestClassifier(
    bootstrap=True,
    n_estimators = 100, 
    oob_score = True, 
    criterion = "gini", 
    random_state = 0,
)

In [130]:
rf_classifier.fit(x_train, y_train)

In [131]:
predict = rf_classifier.predict(x_test)

In [132]:
real = y_test.tolist()

for idx in range(len(predict)):
    print(f'predict: {predict[idx]} real: {real[idx]}')

predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: 1.0 real: 1.0
predict: -1.0 real: -1.0
predict: 

In [133]:
score = accuracy_score(y_test, rf_classifier.predict(x_test), normalize=True)
print(f'accuracy score: {score}')

accuracy score: 0.9036336563830621


In [134]:
report = classification_report(
    y_true=y_test, 
    y_pred=predict,
    target_names=['reduced', 'increased'],
    output_dict=True,
)

report_df = pd.DataFrame(report).transpose()
report_df

Unnamed: 0,precision,recall,f1-score,support
reduced,0.91739,0.917513,0.917452,22343.0
increased,0.884343,0.884176,0.884259,15938.0
accuracy,0.903634,0.903634,0.903634,0.903634
macro avg,0.900866,0.900845,0.900856,38281.0
weighted avg,0.903631,0.903634,0.903632,38281.0


In [135]:
report_df.loc['accuracy']

precision    0.903634
recall       0.903634
f1-score     0.903634
support      0.903634
Name: accuracy, dtype: float64

In [136]:
indicators_factors_df = pd.Series(
    rf_classifier.feature_importances_,
    index=x_samples.columns,
).sort_values(
    ascending=False,
).to_frame()

In [137]:
indicators_factors_df.columns = ['factor']
indicators_factors_df

Unnamed: 0,factor
stochastic_indicator,0.287054
williams_indicator,0.268307
rsi_indicator,0.177164
roc_indicator,0.115512
macd_indicator,0.083682
sl_indicator,0.068281


In [138]:
stocks_df

Unnamed: 0,ticker_id,stocked_time,open_price,close_price,highest_price,lowest_price,trading_volume,price_change,rsi_indicator,stochastic_indicator,williams_indicator,macd_indicator,sl_indicator,roc_indicator,prediction
21,A,2022-05-17,120.793990,120.688963,122.768383,118.747121,2.067932e+06,0.126253,25.630479,32.897861,-67.102139,-0.613564,-0.508192,-0.025444,-1.0
22,A,2022-05-18,120.813076,120.693832,122.726773,118.822444,2.032656e+06,0.004870,25.733489,33.565639,-66.434361,-0.607528,-0.528177,-0.035926,1.0
23,A,2022-05-19,120.789734,120.772998,122.766362,118.855275,2.045528e+06,0.079166,27.491848,36.176491,-63.823509,-0.590227,-0.540646,-0.046009,1.0
24,A,2022-05-20,120.934730,120.967967,122.892268,119.015686,2.070104e+06,0.194969,31.776008,39.410396,-60.589604,-0.556281,-0.543785,-0.041959,1.0
25,A,2022-05-23,121.173127,121.184611,123.046915,119.220758,2.068679e+06,0.216644,36.281194,43.003824,-56.996176,-0.508125,-0.536631,-0.032519,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149292,MSFT,2023-05-08,262.557160,262.918386,265.666789,259.749626,2.981217e+07,0.485443,99.776216,75.569362,-24.430638,2.295235,2.087863,0.031103,1.0
149293,MSFT,2023-05-09,263.033993,263.380935,266.132398,260.238185,2.972328e+07,0.462549,99.795689,75.841425,-24.158575,2.353397,2.140970,0.031422,1.0
149294,MSFT,2023-05-10,263.511853,263.893839,266.623693,260.735394,2.972700e+07,0.512904,99.814921,76.425153,-23.574847,2.413062,2.195388,0.032240,1.0
149295,MSFT,2023-05-11,263.999739,264.377829,267.089672,261.212143,2.974746e+07,0.483990,99.831079,77.006898,-22.993102,2.470917,2.250494,0.032927,1.0


In [139]:
import datetime

In [141]:
print(past_date_data['ticker_id'].to_list()[:10])

['A', 'AA', 'AAA', 'AAAU', 'AABB', 'AABVF', 'AAC', 'AAC.WS', 'AACAY', 'AACG']


In [93]:
print(datetime.datetime.now().date())

2023-05-15


In [94]:
past_date_indicators = past_date_data[[
    'rsi_indicator',
    'stochastic_indicator',
    'williams_indicator',
    'roc_indicator',
    'macd_indicator',
    'sl_indicator',
]]


predicts = rf_classifier.predict(past_date_indicators)

In [665]:
from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

In [671]:
estimator = rf_classifier.estimators_[0]

export_graphviz(
    estimator, 
    out_file='tree.dot', 
    feature_names = x_samples.columns,
    class_names=['reduced', 'increased'],
    rounded = True, 
    proportion = False, 
    precision = 2, 
    filled = True,
)

call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])

Image(filename = 'tree.png')