In [1]:
import pandas as pd
import ta 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import optuna
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

dataset = pd.read_csv("aapl_5m_train.csv")
dataset = dataset.drop(columns=['Volume', "Gmtoffset", "Timestamp", "Unnamed: 0", "Datetime"])
dataset.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,Open,High,Low,Close
0,133.570007,133.611602,132.389999,132.809997
1,132.75,132.75,131.809997,131.889999
2,131.5,132.339996,131.5,132.059997
3,132.0,132.25,131.899993,132.25
4,132.0,132.018096,131.520004,131.589996


In [4]:
def func_objective(trial, df):

    dataset = df.copy()

    rsi_window = trial.suggest_int("rsi_window", 5, 20)
    ultimate_window1 = trial.suggest_int("ultimate_window1", 1, 10)
    ultimate_window2 = trial.suggest_int("ultimate_window2", 10, 20)
    ultimate_window3 = trial.suggest_int("ultimate_window3", 20, 30)
    
    williams_lbp = trial.suggest_int("williams_lbp", 10, 20)

    dataset["RSI"] = ta.momentum.RSIIndicator(dataset.Close, window=rsi_window).rsi()
    
    dataset["ultimate"] = ta.momentum.UltimateOscillator(
        high=dataset['High'], low=dataset['Low'], close=dataset['Close'],
        window1=ultimate_window1, window2=ultimate_window2, window3=ultimate_window3
    ).ultimate_oscillator()
    
    dataset['Williams'] = ta.momentum.WilliamsRIndicator(
        high=dataset['High'], low=dataset['Low'], close=dataset['Close'], lbp=williams_lbp
    ).williams_r()

    # Retorno futuro a 2 horas
    dataset['future_return'] = dataset['Close'].shift(-24) / dataset['Close'] - 1

    buy_threshold = 0.015
    sell_threshold = -0.015

    def generate_signal(x):
        if x > buy_threshold:
            return 'BUY'
        elif x < sell_threshold:
            return 'SELL'
        else:
            return 'WAIT'

    dataset['signal'] = dataset['future_return'].apply(generate_signal)
    
    dataset = dataset.dropna()

    X = dataset.drop("signal", axis=1)
    y = dataset[["signal"]]
    index = dataset.index

    X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
        X, y, index, test_size=0.2, shuffle=False
    )
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    C = trial.suggest_float("C", 0.01, 100, log=True)
    
    svm = SVC(kernel="rbf", C=C, gamma='scale', class_weight='balanced', max_iter=10_000)
    svm.fit(X_train, y_train)
    
    ypred = svm.predict(X_test)
    
    f1_macro = f1_score(y_test, ypred, average='macro')
    
    return f1_macro
    
    

In [5]:
study = optuna.create_study(direction="maximize")
study.optimize(lambda t: func_objective(t, dataset), n_trials=10)


[I 2025-03-25 22:29:16,588] A new study created in memory with name: no-name-5e477e2a-1099-4717-999f-de5e2ead81dd
  y = column_or_1d(y, warn=True)
[I 2025-03-25 22:29:27,024] Trial 0 finished with value: 0.8478364767965928 and parameters: {'rsi_window': 13, 'ultimate_window1': 4, 'ultimate_window2': 19, 'ultimate_window3': 21, 'williams_lbp': 12, 'C': 0.0256509155894321}. Best is trial 0 with value: 0.8478364767965928.
  y = column_or_1d(y, warn=True)
[I 2025-03-25 22:29:28,637] Trial 1 finished with value: 0.9651417023899477 and parameters: {'rsi_window': 14, 'ultimate_window1': 3, 'ultimate_window2': 19, 'ultimate_window3': 25, 'williams_lbp': 18, 'C': 3.9269902928454545}. Best is trial 1 with value: 0.9651417023899477.
  y = column_or_1d(y, warn=True)
[I 2025-03-25 22:29:34,189] Trial 2 finished with value: 0.8932748089830622 and parameters: {'rsi_window': 12, 'ultimate_window1': 4, 'ultimate_window2': 14, 'ultimate_window3': 27, 'williams_lbp': 17, 'C': 0.11165669682391491}. Best i

In [6]:
study.best_params

{'rsi_window': 14,
 'ultimate_window1': 3,
 'ultimate_window2': 19,
 'ultimate_window3': 25,
 'williams_lbp': 18,
 'C': 3.9269902928454545}

In [7]:
study.best_value

0.9651417023899477