In [1]:
import pandas as pd
import optuna
import yfinance as yf
import pandas_datareader.data as web
from datetime import datetime
import numpy as np
import pandas_ta as ta
from keras.optimizers import Adam
from keras.optimizers import SGD
from keras.optimizers import Nadam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

In [2]:
import warnings
warnings.filterwarnings('ignore', category  = pd.errors.PerformanceWarning)

In [3]:
# Pobieramy dane dotyczące BTC z yahoo
BTC = yf.download('BTC-USD', start = '2018-01-01', end = '2019-12-31')

BTC['Target'] = (BTC['Close'].diff() > 0).astype(int)

def add_lagged_features(data, column_name, n_days):
    for i in range(1, n_days + 1):
        lagged_column_name = f'{column_name}_{i}d_back'
        data[lagged_column_name] = data[column_name].shift(i)
    return data

BTC = add_lagged_features(BTC, 'Close', 30)
BTC = add_lagged_features(BTC, 'Volume', 30)
BTC = add_lagged_features(BTC, 'High', 1)
BTC = add_lagged_features(BTC, 'Low', 1)

# print(BTC.head())
# print(len(BTC))
# print(BTC.shape)

[*********************100%%**********************]  1 of 1 completed


In [4]:
# SMA
BTC['SMA'] = ta.sma(BTC['Close_1d_back'], length = 30)

# MFI 
BTC['MFI'] = ta.mfi(BTC['High_1d_back'], BTC['Low_1d_back'], 
                    BTC['Close_1d_back'], BTC['Volume_1d_back'], length = 29)

# RSI 
BTC['RSI'] = ta.rsi(BTC['Close_1d_back'], length = 29)

# Bollinger Bands
bollinger = ta.bbands(BTC['Close_1d_back'], length = 30, std = 2)
BTC['BB_Upper'] = bollinger['BBU_30_2.0']
BTC['BB_Middle'] = bollinger['BBM_30_2.0']
BTC['BB_Lower'] = bollinger['BBL_30_2.0']

# CCI 
BTC['CCI'] = ta.cci(BTC['High_1d_back'], BTC['Low_1d_back'], 
                    BTC['Close_1d_back'], length = 30)

In [5]:
high_series = BTC['High_1d_back']
low_series = BTC['Low_1d_back']
close_series = BTC['Close_1d_back']

# Wprowadzenie jasnych parametrów dla kanału Donchian
donchian = ta.donchian(high = high_series, low = low_series, close = close_series, lower_length = 30, upper_length = 30)

# Przypisanie wyników kanału Donchian do DataFrame
BTC['Donchian_High'] = donchian['DCU_30_30']
BTC['Donchian_Low'] = donchian['DCL_30_30']

In [6]:
# Usuwamy pierwszych 30 wierszy
BTC = BTC.dropna().reset_index(drop = True)

In [7]:
# Konwersacja zmiennej Target na zmienną binarną
target = to_categorical(BTC['Target'].values)

# Usuwamy niepotrzebne kolumny
features = BTC.drop(['Target', 'Adj Close'], axis = 1).values 

# Dzielimy dane na treningowe i testowe
split_idx = int(len(features) * 0.8)

m = int(np.floor(0.8 * len(features)))
train_features = features[:m]
test_features = features[m:]
train_target = target[:m]
test_target = target[m:]

In [None]:
##################################################
##################################################
# WERSJA MAŁO SKOMPLIKOWANA
##################################################
##################################################

In [8]:
# Budujemy model
model = Sequential([
    Dense(64, activation = 'relu', input_shape = (train_features.shape[1],)),
    Dense(64, activation = 'relu'),
    Dense(2, activation = 'softmax')
])

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Trenujemy model
history = model.fit(train_features, train_target, epochs = 50, batch_size = 12, validation_split = 0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.4982 - loss: 296839808.0000 - val_accuracy: 0.5714 - val_loss: 595772288.0000
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5257 - loss: 186140720.0000 - val_accuracy: 0.5268 - val_loss: 466046688.0000
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5839 - loss: 176296976.0000 - val_accuracy: 0.4643 - val_loss: 460460768.0000
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5595 - loss: 115678784.0000 - val_accuracy: 0.5804 - val_loss: 424016608.0000
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5546 - loss: 112637464.0000 - val_accuracy: 0.5982 - val_loss: 649639872.0000
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5602 - loss: 12946198

Epoch 48/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6118 - loss: 5278472.5000 - val_accuracy: 0.4821 - val_loss: 32154178.0000
Epoch 49/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6308 - loss: 3364639.7500 - val_accuracy: 0.4554 - val_loss: 93525688.0000
Epoch 50/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6089 - loss: 13856806.0000 - val_accuracy: 0.4732 - val_loss: 54569636.0000


In [9]:
# Predykcja na zbiorze treningowym
train_predictions = model.predict(train_features)
train_predictions = np.argmax(train_predictions, axis = 1)
train_true = np.argmax(train_target, axis = 1)

# Obliczanie macierzy pomyłek
cm = confusion_matrix(train_true, train_predictions)
TP = cm[1, 1]  # True Positives
TN = cm[0, 0]  # True Negatives
FP = cm[0, 1]  # False Positives
FN = cm[1, 0]  # False Negatives

print(TP)
print(TN)
print(FP)
print(FN)

# TPR, FPR, TNR, PPV, NPV
ACC = (TP + TN) / (TP + TN + FP + FN)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)

print('Accuracy (ACC):', ACC)
print('True Positive Rate (TPR):', TPR)
print('False Positive Rate (FPR):', FPR)
print('True Negative Rate (TNR):', TNR)
print('Positive Predictive Value (PPV):', PPV)
print('Negative Predictive Value (NPV):', NPV)

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
233
93
163
70
Accuracy (ACC): 0.5831842576028623
True Positive Rate (TPR): 0.768976897689769
False Positive Rate (FPR): 0.63671875
True Negative Rate (TNR): 0.36328125
Positive Predictive Value (PPV): 0.5883838383838383
Negative Predictive Value (NPV): 0.5705521472392638


In [10]:
# Predykcja na zbiorze testowym
test_predictions = model.predict(test_features)
test_predictions = np.argmax(test_predictions, axis = 1)
test_true = np.argmax(test_target, axis = 1)

# Obliczanie macierzy pomyłek
cm = confusion_matrix(test_true, test_predictions)
TP = cm[1, 1]  # True Positives
TN = cm[0, 0]  # True Negatives
FP = cm[0, 1]  # False Positives
FN = cm[1, 0]  # False Negatives

print(TP)
print(TN)
print(FP)
print(FN)

# TPR, FPR, TNR, PPV, NPV
ACC = (TP + TN) / (TP + TN + FP + FN)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)

print('Accuracy (ACC):', ACC)
print('True Positive Rate (TPR):', TPR)
print('False Positive Rate (FPR):', FPR)
print('True Negative Rate (TNR):', TNR)
print('Positive Predictive Value (PPV):', PPV)
print('Negative Predictive Value (NPV):', NPV)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
46
30
51
13
Accuracy (ACC): 0.5428571428571428
True Positive Rate (TPR): 0.7796610169491526
False Positive Rate (FPR): 0.6296296296296297
True Negative Rate (TNR): 0.37037037037037035
Positive Predictive Value (PPV): 0.4742268041237113
Negative Predictive Value (NPV): 0.6976744186046512


In [None]:
##################################################
##################################################
##################################################
##################################################

In [None]:
##################################################
##################################################
# WERSJA BARDZIEJ SKOMPLIKOWANA
##################################################
##################################################

In [None]:
# Korzystamy z biblioteki OPTUNA, która służy do automatycznego dostrajania hiperparametrów
def create_model(trial):
    # Liczba warstw ukrytych, od 1 do 3
    n_layers = trial.suggest_int('n_layers', 1, 3)
    model = Sequential()
    # Dodanie pierwszej warstwy ukrytej
    model.add(Dense(trial.suggest_int('n_units_first', 10, 300), activation = 'relu', input_shape = (train_features.shape[1],)))
    # Dodanie warstwy Dropout
    model.add(Dropout(trial.suggest_float('dropout_first', 0.0, 0.5)))

    # Dodawanie kolejnych warstw ukrytych w pętli zależnie od liczby warstw
    for i in range(n_layers):
        model.add(Dense(trial.suggest_int(f'n_units_{i}', 10, 300), activation = 'relu'))
        model.add(Dropout(trial.suggest_float(f'dropout_{i}', 0.0, 0.5)))

    # Dodanie warstwy wyjściowej
    model.add(Dense(2, activation = 'softmax'))
    # Sugestia Optuna dotycząca współczynnika uczenia dla optymalizatora Adama
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    model.compile(optimizer = Adam(learning_rate = lr), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model

# Definicja funkcji celu dla procesu dostrajania Optuna
def objective(trial):
    # Tworzenie modelu z bieżącymi parametrami sugerowanymi przez Optuna
    model = create_model(trial)
    # Ustawienie mechanizmu wczesnego zatrzymywania treningu w celu uniknięcia przeuczenia
    early_stopping = EarlyStopping(monitor = 'val_loss', patience = 10, verbose = 1, restore_best_weights = True)
    model.fit(train_features, train_target, epochs = 50, batch_size = 32, validation_split = 0.2, 
              callbacks = [early_stopping], verbose = 0)
    _, accuracy = model.evaluate(test_features, test_target, verbose = 0)
    return accuracy

# Uruchomienie procesu dostrajania
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 10)

# Wyświetlenie najlepszych parametrów znalezionych przez Optuna
print('Najlepsze parametry:', study.best_trial.params)

In [None]:
best_params = {'n_layers': 3, 
               'n_units_first': 163, 
               'dropout_first': 0.4646466449656871, 
               'n_units_0': 94, 
               'dropout_0': 0.002135199259788778, 
               'n_units_1': 26, 
               'dropout_1': 0.2559029328072675, 
               'n_units_2': 91, 
               'dropout_2': 0.2524767713200529, 
               'lr': 0.002103151359743856}

# Budujemy model sieci neuronowej
model = Sequential()
model.add(Dense(best_params['n_units_first'], activation = 'relu', input_shape = (train_features.shape[1],)))
model.add(Dropout(best_params['dropout_first']))

model.add(Dense(best_params['n_units_0'], activation = 'relu'))
model.add(Dropout(best_params['dropout_0']))

model.add(Dense(best_params['n_units_1'], activation = 'relu'))
model.add(Dropout(best_params['dropout_1']))

model.add(Dense(best_params['n_units_2'], activation = 'relu'))
model.add(Dropout(best_params['dropout_2']))

model.add(Dense(2, activation = 'softmax'))

# Optymalizator Adam
optimizer = Adam(learning_rate = best_params['lr'])

model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Dodajemy funkcję, która przerwie trenowanie modelu, gdy model przestaje poprawiać swoją wydajność na zbiorze walidacyjnym
early_stopping = EarlyStopping(monitor = 'val_loss',  
                               patience = 10,         # liczba epok bez poprawy po której trening zostanie zatrzymany
                               verbose = 1,           
                               restore_best_weights = True) 


# Trenowanie modelu z dodaniem callbacku
history = model.fit(train_features, train_target, 
                    epochs = 100, 
                    batch_size = 32, 
                    validation_split = 0.2,
                    callbacks = [early_stopping]) 

# Ocena modelu na danych testowych
test_performance = model.evaluate(test_features, test_target)
print('Test loss:', test_performance[0])
print('Test accuracy:', test_performance[1])

# Ocena modelu na danych treningowych
train_performance = model.evaluate(train_features, train_target)
print('Train loss:', train_performance[0])
print('Train accuracy:', train_performance[1])

In [None]:
# Predykcja na zbiorze treningowym
train_predictions = model.predict(train_features)
train_predictions = np.argmax(train_predictions, axis = 1)
train_true = np.argmax(train_target, axis = 1)

# Obliczanie macierzy pomyłek
cm = confusion_matrix(train_true, train_predictions)
TP = cm[1, 1]  # True Positives
TN = cm[0, 0]  # True Negatives
FP = cm[0, 1]  # False Positives
FN = cm[1, 0]  # False Negatives

print(TP)
print(TN)
print(FP)
print(FN)

# TPR, FPR, TNR, PPV, NPV
ACC = (TP + TN) / (TP + TN + FP + FN)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)

print('Accuracy (ACC):', ACC)
print('True Positive Rate (TPR):', TPR)
print('False Positive Rate (FPR):', FPR)
print('True Negative Rate (TNR):', TNR)
print('Positive Predictive Value (PPV):', PPV)
print('Negative Predictive Value (NPV):', NPV)

In [None]:
# Predykcja na zbiorze testowym
test_predictions = model.predict(test_features)
test_predictions = np.argmax(test_predictions, axis = 1)
test_true = np.argmax(test_target, axis = 1)

# Obliczanie macierzy pomyłek
cm = confusion_matrix(test_true, test_predictions)
TP = cm[1, 1]  # True Positives
TN = cm[0, 0]  # True Negatives
FP = cm[0, 1]  # False Positives
FN = cm[1, 0]  # False Negatives

print(TP)
print(TN)
print(FP)
print(FN)

# TPR, FPR, TNR, PPV, NPV
ACC = (TP + TN) / (TP + TN + FP + FN)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)

print('Accuracy (ACC):', ACC)
print('True Positive Rate (TPR):', TPR)
print('False Positive Rate (FPR):', FPR)
print('True Negative Rate (TNR):', TNR)
print('Positive Predictive Value (PPV):', PPV)
print('Negative Predictive Value (NPV):', NPV)