In [24]:
import yfinance as yf
import pandas as pd
import ta
from sklearn.model_selection import GridSearchCV
from backtesting import Backtest, Strategy
import xgboost as xgb
from backtesting import Backtest, Strategy
from sklearn.metrics import balanced_accuracy_score
import plotly.graph_objects as go
import numpy as np

In [4]:
train_data = yf.download('AMZN', start='2021-01-01', end='2023-12-31')
test_data = yf.download('AMZN', start='2024-01-01', end='2024-05-06')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


### Wizualizacja

In [7]:
def plot_signals(data):
    close_prices = data['Close']

    # Generowanie sygnałów kupna (jeśli następnego dnia cena rośnie)
    signals_buy = ((data['Close'].shift(-1) / data['Close']) > 1).astype(int)
    signals_buy = signals_buy.reset_index()
    highlight_buy = signals_buy[signals_buy['Close'] == 1]['Date']

    # Generowanie sygnałów sprzedaży (jeśli następnego dnia cena spada)
    signals_sell = ((data['Close'].shift(1) / data['Close']) < 1).astype(int)
    signals_sell = signals_sell.reset_index()
    highlight_sell = signals_sell[signals_sell['Close'] == 1]['Date']

    # Wykres interaktywny z użyciem Plotly
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=close_prices.index, y=close_prices, mode='lines', name='Cena zamknięcia'))

    # Dodawanie zielonych punktów na określonych datach sygnałów kupna
    for date in highlight_buy:
        if date in data.index:
            fig.add_trace(go.Scatter(
                x=[date], 
                y=[data.loc[date, 'Close']], 
                mode='markers', 
                marker=dict(color='green', size=6), 
                name=f'Kup {date}'
            ))

    # Dodawanie czerwonych punktów na określonych datach sygnałów sprzedaży
    for date in highlight_sell:
        if date in data.index:
            fig.add_trace(go.Scatter(
                x=[date], 
                y=[data.loc[date, 'Close']], 
                mode='markers', 
                marker=dict(color='red', size=6), 
                name=f'Sprzedaj {date}'
            ))

    fig.update_layout(
        title='Zmiany ceny zamknięcia akcji',
        xaxis_title='Data',
        yaxis_title='Cena zamknięcia (USD)',
        template='plotly_white',
        showlegend=False
    )

    fig.show()


plot_signals(train_data)

In [8]:
plot_signals(test_data)

### Przygotowanie danych

In [13]:
def calculate_technical_indicators(data, window=14):
    data['SMA'] = ta.trend.sma_indicator(data['Close'], window=window)
    data['SMA_2w'] = ta.trend.sma_indicator(data['Close'], window=2*window)
    data['RSI'] = ta.momentum.rsi(data['Close'], window=window)
    data['RSI_2w'] = ta.momentum.rsi(data['Close'], window=2*window)
    data['ADX'] = ta.trend.adx(data['High'], data['Low'], data['Close'], window=window)
    data['Stochastic'] = ta.momentum.stoch(data['High'], data['Low'], data['Close'], window=window)
    data['BB_upper'], data['BB_middle'], data['BB_lower'] = ta.volatility.bollinger_hband(data['Close']), ta.volatility.bollinger_mavg(data['Close']), ta.volatility.bollinger_lband(data['Close'])
    data['ATR'] = ta.volatility.average_true_range(data['High'], data['Low'], data['Close'], window=window)
    data['DI_pos'] = ta.trend.adx_pos(data['High'], data['Low'], data['Close'], window=window)
    data['DI_neg'] = ta.trend.adx_neg(data['High'], data['Low'], data['Close'], window=window)
    
    data = data.dropna()
    
    return data

In [14]:
X_data=pd.concat([train_data, test_data], axis=0)

conditions = [
    (X_data['Close'].shift(-1) / X_data['Close']) > 1,
    (X_data['Close'].shift(1) / X_data['Close']) < 1
]
choices = [1, 2]

X_data['signal'] = np.select(conditions, choices, default=0)


X_data_prep=calculate_technical_indicators(X_data) #wykonano tę funkcje na wszystkich danych, aby nie tracić początkowych obserwacji z 2024

In [17]:
# Podział danych na treningowe i testowe
train_data_prep = X_data_prep[(X_data_prep.index.year >= 2021) & (X_data_prep.index.year <= 2022)]
test_data_prep = X_data_prep[X_data_prep.index.year == 2024]

X_train = train_data_prep.drop(columns="signal")
y_train = train_data_prep["signal"]

X_test = test_data_prep.drop(columns="signal")
y_test = test_data_prep["signal"]

In [20]:
# Przygotowanie modelu XGBoost
model = xgb.XGBClassifier()

# Definicja siatki parametrów
param_grid = {
    'n_estimators': [100, 150, 200],
    'max_depth': [3, 6, 9, 12],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Optymalizacja hiperparametrów
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Najlepsze parametry
best_params = grid_search.best_params_
print("Najlepsze parametry: ", best_params)

# Stworzenie modelu z najlepszymi parametrami
model = xgb.XGBClassifier(**best_params)
model.fit(X_train, y_train)

Najlepsze parametry:  {'colsample_bytree': 1.0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}


In [25]:
# Predykcja
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Ocena modelu
train_accuracy = balanced_accuracy_score(y_train, y_pred_train)
test_accuracy = balanced_accuracy_score(y_test, y_pred_test)

print(train_accuracy)
print(test_accuracy)

0.5089175966971591
0.434695782008212


In [26]:
y_pred_test

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1,
       1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 0, 0, 2, 1, 2,
       0, 2, 2, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1],
      dtype=int64)

In [28]:
# Dodanie kolumny predykcji do danych testowych
test_data_prep['signal'] = y_pred_test

# Definiowanie strategii w backtesting.py
class MLStrategy(Strategy):
    def init(self):
        self.signal = self.data.signal

    def next(self):
        if self.signal == 1 and not self.position:
            self.buy()
        elif self.signal == 2 and self.position:
            self.sell()

# Przeprowadzenie backtestu
bt = Backtest(test_data_prep, MLStrategy, cash=10000, commission=.002)
stats = bt.run()
bt.plot()

# Wyświetlanie statystyk
print(stats)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Passing lists of formats for DatetimeTickFormatter scales was deprecated in Bokeh 3.0. Configure a single string format for each scale


DatetimeFormatter scales now only accept a single format. Using the first provided: '%d %b'


Passing lists of formats for DatetimeTickFormatter scales was deprecated in Bokeh 3.0. Configure a single string format for each scale


DatetimeFormatter scales now only accept a single format. Using the first provided: '%m/%Y'


'M' is deprecated and will be removed in a future version, please use 'ME' instead.


found multiple competing values for 'toolbar.active_drag' property; using the latest value


found multiple competing values for 'toolbar.active_scroll' property; using the latest value

Start                     2024-01-02 00:00:00
End                       2024-05-03 00:00:00
Duration                    122 days 00:00:00
Exposure Time [%]                   97.674419
Equity Final [$]                 12795.400383
Equity Peak [$]                  12935.480217
Return [%]                          27.954004
Buy & Hold Return [%]                24.19797
Return (Ann.) [%]                  105.917267
Volatility (Ann.) [%]               55.006689
Sharpe Ratio                         1.925534
Sortino Ratio                        7.542252
Calmar Ratio                        13.100381
Max. Drawdown [%]                   -8.085052
Avg. Drawdown [%]                   -2.323009
Max. Drawdown Duration       22 days 00:00:00
Avg. Drawdown Duration        8 days 00:00:00
# Trades                                    1
Win Rate [%]                            100.0
Best Trade [%]                      28.179667
Worst Trade [%]                     28.179667
Avg. Trade [%]                    

Osiągnięto 27,95% zwrotu  inwestycji.