In [345]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pandas_ta as ta
from DataManager import DMGT

In [346]:
def rolling_tt(series, n):
    '''

    :param series: close price for eth/btc in pandas series
    :param n: lookahead from constructor
    :return: prediction for close_{+n}
    '''

    y = series.values.reshape(-1, 1)
    t = np.arange(len(y))
    X = np.c_[np.ones_like(y), t, t ** 2]
    betas = np.linalg.inv(X.T @ X) @ X.T @ y
    new_vals = np.array([1, t[-1]+n, (t[-1]+n)**2])
    pred = new_vals@betas  # beta0 + beta1 * t[-1]+n + beta2 * (t[-1]+n)**2
    return pred

In [347]:
dmgt = DMGT('../../BTCUSDT.csv')
dmgt.change_resolution('30min')
df = dmgt.df.copy()

In [348]:
df['ma_50'] = ta.sma(df.close,50)
df['ma_200'] = ta.sma(df.close,200)
macd = ta.macd(df.close,12,26)
df =  pd.concat([df,macd],axis=1)
df['preds'] = df.close.rolling(12).apply(rolling_tt, args=(4,), raw=False)
df['preds'] = (df['preds']>df['close']) * 1
df['target'] = (df.close<df.close.shift(-3)) * 1
df =df.rename(columns={'MACD_12_26_9':'macd', 'MACDh_12_26_9':'macdh', 'MACDs_12_26_9':'macds'})
df = df.dropna()
df_train = df.iloc[:int(len(df)*0.8)]
df_test = df.iloc[int(len(df)*0.8):]

In [349]:
from collections import deque
import random
def create_sequences(df):
    sequences = []
    lookback_seq = deque(maxlen=12)
    for i in df.values:
        lookback_seq.append([n for n in i[:-1]])
        if len(lookback_seq) == 12:
            sequences.append([np.array(lookback_seq),i[-1]])
            
    buys = [sequences[x] for x in range(len(sequences)) if sequences[x][1] == 1]
    stales = [ sequences[x] for x in range(len(sequences)) if sequences[x][1] == 0]
    min_idx = min(len(buys), len(stales))
    print(min_idx)
    buys = buys[:min_idx]
    stales = stales[:min_idx]
    sequences = buys+stales
    random.shuffle(sequences)
    X = [sequences[x][0] for x in range(len(sequences))]
    y = [sequences[x][1] for x in range(len(sequences))]
    return np.array(X), np.array(y)
    

In [350]:
X_train, y_train = create_sequences(df_train)
X_test, y_test = create_sequences(df_test)

35915
9155


In [351]:
features = ['open',	'high',	'low',	'close','volume','ma_50','ma_200',
            	'macd'	,'macdh','macds','preds']
model = RandomForestClassifier(n_estimators=200, min_samples_split=50, random_state=1)
model.fit(df_train[features],df_train['target'])



In [358]:
preds = model.predict_proba(df_test[features])[:,1]
preds[preds >= 0.8] = 1
preds[preds < 0.8] = 0

In [359]:
from sklearn.metrics import precision_score
precision_score(df_test['target'], preds)

0.6086956521739131

In [360]:
pd.Series(preds).value_counts()

0.0    18334
1.0       23
dtype: int64

In [361]:
df_test['target'].value_counts()

1    9194
0    9163
Name: target, dtype: int64

In [369]:
preds = pd.Series(preds)
targets = pd.DataFrame(df_test['target'])
targets.reset_index(inplace=True)

In [372]:
res = pd.concat([targets,preds], axis=1)

In [370]:
targets

Unnamed: 0,time,target
0,2022-06-09 09:30:00,0
1,2022-06-09 10:00:00,0
2,2022-06-09 10:30:00,0
3,2022-06-09 11:00:00,0
4,2022-06-09 11:30:00,0
...,...,...
18352,2023-06-30 22:00:00,0
18353,2023-06-30 22:30:00,1
18354,2023-06-30 23:00:00,0
18355,2023-06-30 23:30:00,0


In [373]:
res.to_csv('./results.csv')