In [None]:
# @title 1. Instalação e Importação das Bibliotecas
#!pip install yfinance pywavelets scikit-learn tensorflow numpy matplotlib seaborn --quiet


import yfinance as yf
import pandas as pd
import numpy as np
import pywt
from scipy.fft import rfft, irfft
import os

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import regularizers

import warnings
warnings.filterwarnings('ignore')
tf.random.set_seed(42)
np.random.seed(42)

print("Bibliotecas instaladas e importadas com sucesso!")


Bibliotecas instaladas e importadas com sucesso!


In [3]:
# Corrigindo a célula para evitar erros
import pandas as pd
import numpy as np
from yfinance import download

# Função para baixar dados

def download_data(ticker, start_date, end_date):
    """
    Baixa dados históricos do Yahoo Finance para um único ticker e ajusta o cabeçalho.
    """
    # Baixa os dados do Yahoo Finance
    data = yf.download(ticker, start=start_date, end=end_date, progress=False)

    # Verifica se os dados estão vazios
    if data.empty:
        raise ValueError(f"Não foi possível baixar dados para o ticker {ticker}.")

    # Ajusta o cabeçalho para garantir que as colunas estejam corretas
    data.reset_index(inplace=True)  # Garante que a coluna 'Date' seja incluída
    data = data.rename(columns={
        "Date": "Date",
        "Open": "Open",
        "High": "High",
        "Low": "Low",
        "Close": "Close",
        "Adj Close": "Adj Close",
        "Volume": "Volume"
    })

    # Define a coluna 'Date' como índice
    data.set_index("Date", inplace=True)

    return data

def create_features(df):
    # Verificações iniciais
    if df.empty:
        raise ValueError("O DataFrame está vazio. O download dos dados pode ter falhado.")
    for col in ['Close', 'High', 'Low', 'Volume']:
        if col not in df.columns:
            raise ValueError(f"A coluna '{col}' não foi encontrada no DataFrame. Colunas disponíveis: {df.columns.tolist()}")

    df = df.sort_index()
    features_df = pd.DataFrame(index=df.index)
    close = df['Close']
    high = df['High']
    low = df['Low']
    volume = df['Volume']

    # --- 1. Indicadores Técnicos ---
    features_df['SMA_20'] = close.rolling(window=20).mean()
    features_df['EMA_20'] = close.ewm(span=20, adjust=False).mean()
    delta = close.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    features_df['RSI_14'] = 100 - (100 / (1 + rs))
    ema_fast = close.ewm(span=12, adjust=False).mean()
    ema_slow = close.ewm(span=26, adjust=False).mean()
    features_df['MACD_12_26_9'] = ema_fast - ema_slow
    features_df['MACDs_12_26_9'] = features_df['MACD_12_26_9'].ewm(span=9, adjust=False).mean()
    features_df['MACDh_12_26_9'] = features_df['MACD_12_26_9'] - features_df['MACDs_12_26_9']
    sma_20 = close.rolling(window=20).mean()
    std_20 = close.rolling(window=20).std()
    features_df['BBM_20_2.0'] = sma_20
    features_df['BBU_20_2.0'] = sma_20 + (std_20 * 2)
    features_df['BBL_20_2.0'] = sma_20 - (std_20 * 2)

    # --- 2. Features de Processamento de Sinal ---
    close_prices = close.values
    if len(close_prices) == 0:
        raise ValueError("A série de preços está vazia. Não é possível calcular FFT.")
    fft_vals = rfft(close_prices)
    if len(fft_vals) > 10:
        fft_vals[int(len(fft_vals) * 0.1):] = 0
    smoothed_fft = irfft(fft_vals)
    features_df['fft_smoothed'] = np.resize(smoothed_fft, len(df))
    try:
        coeffs = pywt.wavedec(close_prices, 'db4', level=4)
        coeffs_denoised = coeffs[:]
        for i in range(1, len(coeffs_denoised)):
            coeffs_denoised[i] = pywt.threshold(coeffs_denoised[i], value=np.std(coeffs_denoised[i])/2, mode='soft')
        denoised_prices = pywt.waverec(coeffs_denoised, 'db4')
        features_df['wavelet_denoised'] = np.resize(denoised_prices, len(df))
        for i, c in enumerate(coeffs):
            c_resized = np.resize(c, len(df))
            features_df[f'wavelet_coeff_{i}'] = pd.Series(c_resized, index=df.index)
    except Exception as e:
        print(f"Erro ao aplicar Wavelet: {e}")

    full_feature_df = pd.concat([df, features_df], axis=1)
    return full_feature_df

In [5]:
main_ticker = 'VALE3.SA'
data_path = '../../data/'
end_date = pd.to_datetime('today')
start_date = end_date - pd.DateOffset(years=10)

In [None]:
print(f"Baixando dados para {main_ticker}...")
raw_data = download_data(main_ticker, start_date, end_date)

# Salva o CSV imediatamente após o download, se não estiver vazio
if not raw_data.empty:
    raw_data.to_csv(f"{data_path}{main_ticker}.csv")
    print(f"Dados de {main_ticker} salvos em data/{main_ticker}.csv")
else:
    print(f"Falha ao baixar dados de {main_ticker}.")

In [9]:
raw_data = pd.read_csv(f"{data_path}{main_ticker}.csv", index_col="Date", parse_dates=True)

print(raw_data.head())

                Close       High       Low      Open    Volume
Date                                                          
2015-07-31   9.434408   9.524159  9.043727  9.254906   8740700
2015-08-03   9.286583   9.471365  9.133478  9.402731   4459200
2015-08-04   9.555838   9.666706  9.460807  9.460807   3890900
2015-08-05  10.036268  10.178814  9.846207  9.846207   7956400
2015-08-06  10.337199  10.448069  9.856768  9.904283  10318000


In [10]:
print("Criando features...")
full_feature_df = create_features(raw_data)
full_feature_df.fillna(method='bfill', inplace=True)
full_feature_df.dropna(inplace=True)
print("\nDataFrame de Features finalizado:")
print(full_feature_df.head())

Criando features...

DataFrame de Features finalizado:
                Close       High       Low      Open    Volume    SMA_20  \
Date                                                                       
2015-07-31   9.434408   9.524159  9.043727  9.254906   8740700  9.360496   
2015-08-03   9.286583   9.471365  9.133478  9.402731   4459200  9.360496   
2015-08-04   9.555838   9.666706  9.460807  9.460807   3890900  9.360496   
2015-08-05  10.036268  10.178814  9.846207  9.846207   7956400  9.360496   
2015-08-06  10.337199  10.448069  9.856768  9.904283  10318000  9.360496   

              EMA_20     RSI_14  MACD_12_26_9  MACDs_12_26_9  ...  BBM_20_2.0  \
Date                                                          ...               
2015-07-31  9.434408  43.142514      0.000000       0.000000  ...    9.360496   
2015-08-03  9.420330  43.142514     -0.011792      -0.002358  ...    9.360496   
2015-08-04  9.433235  43.142514      0.000582      -0.001770  ...    9.360496   
2015-08

In [11]:
full_feature_df

Unnamed: 0_level_0,Close,High,Low,Open,Volume,SMA_20,EMA_20,RSI_14,MACD_12_26_9,MACDs_12_26_9,...,BBM_20_2.0,BBU_20_2.0,BBL_20_2.0,fft_smoothed,wavelet_denoised,wavelet_coeff_0,wavelet_coeff_1,wavelet_coeff_2,wavelet_coeff_3,wavelet_coeff_4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-31,9.434408,9.524159,9.043727,9.254906,8740700,9.360496,9.434408,43.142514,0.000000,0.000000,...,9.360496,10.644105,8.076888,30.484253,9.585574,38.947643,-0.049567,0.390073,0.016244,0.046702
2015-08-03,9.286583,9.471365,9.133478,9.402731,4459200,9.360496,9.420330,43.142514,-0.011792,-0.002358,...,9.360496,10.644105,8.076888,26.014341,9.578236,38.936443,-0.206771,1.243593,0.141925,0.026478
2015-08-04,9.555838,9.666706,9.460807,9.460807,3890900,9.360496,9.433235,43.142514,0.000582,-0.001770,...,9.360496,10.644105,8.076888,21.749227,9.615778,38.727532,-0.578461,-0.298023,0.593382,0.169280
2015-08-05,10.036268,10.178814,9.846207,9.846207,7956400,9.360496,9.490667,43.142514,0.048595,0.008303,...,9.360496,10.644105,8.076888,17.817061,9.696445,38.868856,-2.083854,0.005090,-0.152499,0.382488
2015-08-06,10.337199,10.448069,9.856768,9.904283,10318000,9.360496,9.571289,43.142514,0.109665,0.028575,...,9.360496,10.644105,8.076888,14.326774,9.638099,39.262837,2.567201,-0.427182,-0.045725,0.118147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-24,56.529999,57.290001,56.290001,56.939999,21651200,54.937000,55.001571,56.425105,0.907931,0.661069,...,54.937000,57.583181,52.290819,51.027416,55.809862,125.483675,1.093473,-2.444090,-0.552876,-0.422563
2025-07-25,55.700001,56.279999,55.270000,56.180000,21918900,55.078500,55.068088,56.316300,0.847784,0.698412,...,55.078500,57.556616,52.600384,47.596184,55.218927,139.084912,-2.422531,-1.192798,-0.204485,0.377761
2025-07-28,55.160000,55.320000,54.639999,54.939999,19175100,55.204000,55.076842,52.705213,0.747922,0.708314,...,55.204000,57.402759,53.005242,43.708709,54.610057,134.067864,-0.088761,0.429229,1.069554,-0.853021
2025-07-29,54.820000,55.700001,54.770000,55.549999,14648700,55.277000,55.052380,53.707217,0.634037,0.693459,...,55.277000,57.308572,53.245428,39.473602,54.179640,129.896727,6.124500,-0.082130,-1.105359,0.109143


In [None]:
# @title 3. Rotulagem com o Método da Barreira Tripla
def get_daily_vol(close, span0=100):
    df0 = close.pct_change()
    df0 = df0.ewm(span=span0).std()
    df0.dropna(inplace=True)
    return df0

def apply_triple_barrier(close, events, pt_sl, molecule):
    events_ = events.loc[molecule]
    out = events_[['t1']].copy(deep=True)
    pt, sl = pt_sl[0], -pt_sl[1]
    pt_barrier = pt * events_['trgt'] if pt > 0 else pd.Series(index=events.index)
    sl_barrier = sl * events_['trgt'] if sl < 0 else pd.Series(index=events.index)
    for loc, t1 in events_['t1'].fillna(close.index[-1]).iteritems():
        df0 = close[loc:t1]
        df0 = (df0 / close[loc] - 1)
        out.loc[loc, 'sl'] = df0[df0 < sl_barrier[loc]].index.min()
        out.loc[loc, 'pt'] = df0[df0 > pt_barrier[loc]].index.min()
    return out

def get_events(close, t_events, pt_sl, target, min_ret, vertical_barrier_times):
    target = target.loc[t_events]
    target = target[target > min_ret]
    if len(target) == 0: return pd.DataFrame()
    events = pd.concat({'t1': vertical_barrier_times, 'trgt': target}, axis=1).dropna(subset=['trgt'])
    first_touch_events = apply_triple_barrier(close, events, pt_sl, events.index)
    events['t1'] = first_touch_events.min(axis=1)
    events.drop(columns=['pt', 'sl'], inplace=True, errors='ignore')
    return events

def get_bins(events, close):
    events_ = events.dropna(subset=['t1'])
    px = close.reindex(events_.index.union(events_['t1'].values)).dropna()
    out = pd.DataFrame(index=events_.index)
    out['ret'] = px.loc[events_['t1'].values].values / px.loc[events_.index] - 1
    out.loc[events_['t1'].isna(), 'bin'] = 0
    out['bin'] = np.sign(out['ret'])
    first_touch_price = close.loc[events_['t1']]
    entry_price = close.loc[events_.index]
    sl_hit_mask = (first_touch_price / entry_price - 1) < (-pt_sl[1] * events_['trgt'])
    out.loc[sl_hit_mask, 'bin'] = -1
    return out

# --- Execução da Rotulagem ---
close_prices = full_feature_df['Close']
daily_vol = get_daily_vol(close_prices)
full_feature_df['trgt'] = daily_vol
t_events = close_prices.index
num_days = 10
vertical_barrier_ts = pd.Series(t_events + pd.Timedelta(days=num_days), index=t_events)
vertical_barrier_ts = vertical_barrier_ts[vertical_barrier_ts < close_prices.index[-1]]
pt_sl = [1.5, 1.5]
min_ret = 0.001
triple_barrier_events = get_events(close_prices, t_events, pt_sl, daily_vol, min_ret, vertical_barrier_ts)

labels = get_bins(triple_barrier_events, close_prices)
labels['bin'] = labels['bin'].astype(int)
final_df = full_feature_df.join(labels[['bin', 'ret']]).rename(columns={'bin': 'label'})
final_df.dropna(inplace=True)
print("\nDistribuição dos Rótulos (Barreira Tripla):")
print(final_df['label'].value_counts())

KeyError: "[Timestamp('2015-07-31 00:00:00'), Timestamp('2015-08-03 00:00:00')] not in index"

In [None]:
# @title 3.1. Visualização dos Rótulos na Série de Preços
def plot_labels_on_price(df, ticker, n_points=500):
    df_subset = df.iloc[-n_points:]
    close_price = df_subset['Close']
    buy_signals = df_subset[df_subset['label'] == 1]
    sell_signals = df_subset[df_subset['label'] == -1]
    hold_signals = df_subset[df_subset['label'] == 0]
    plt.figure(figsize=(18, 9))
    plt.plot(close_price.index, close_price.values, label='Preço de Fechamento', color='k', alpha=0.7)
    plt.scatter(buy_signals.index, close_price.loc[buy_signals.index], label='Compra (1)', marker='^', color='green', s=100, alpha=0.9, zorder=5)
    plt.scatter(sell_signals.index, close_price.loc[sell_signals.index], label='Venda (-1)', marker='v', color='red', s=100, alpha=0.9, zorder=5)
    plt.scatter(hold_signals.index, close_price.loc[hold_signals.index], label='Manter (0)', marker='o', color='blue', s=20, alpha=0.6, zorder=4)
    plt.title(f'Sinais da Barreira Tripla vs. Preço de Fechamento - {ticker} ({n_points} dias)')
    plt.xlabel('Data'); plt.ylabel('Preço (R$)'); plt.legend(); plt.grid(True); plt.show()
plot_labels_on_price(final_df, main_ticker)

In [None]:
# @title 4. Divisão de Dados In-Sample e Out-of-Sample
# Define o período de teste Out-of-Sample (OOS) como os últimos 2 anos.
oos_period_years = 2
split_date = final_df.index.max() - pd.DateOffset(years=oos_period_years)

df_in_sample = final_df[final_df.index <= split_date]
df_oos_test = final_df[final_df.index > split_date]

print(f"Período total dos dados: {final_df.index.min().date()} a {final_df.index.max().date()}")
print(f"Período In-Sample (Treino/Validação): {df_in_sample.index.min().date()} a {df_in_sample.index.max().date()} ({len(df_in_sample)} amostras)")
print(f"Período Out-of-Sample (Teste Final): {df_oos_test.index.min().date()} a {df_oos_test.index.max().date()} ({len(df_oos_test)} amostras)")

In [None]:
# @title 5. Validação Cruzada K-Fold com Purga e Embargo (Apenas em dados In-Sample)
class PurgedKFold(KFold):
    def __init__(self, n_splits=10, t1=None, pct_embargo=0.01):
        super().__init__(n_splits=n_splits, shuffle=False)
        self.t1 = t1
        self.pct_embargo = pct_embargo

    def split(self, X, y=None, groups=None):
        if self.t1 is None: raise ValueError("t1 series must be provided.")
        t1_aligned = self.t1.reindex(X.index).dropna()
        indices = np.arange(X.shape[0])
        embargo_size = int(X.shape[0] * self.pct_embargo)
        test_splits = np.array_split(indices, self.n_splits)
        for i in range(self.n_splits):
            test_indices = test_splits[i]
            if len(test_indices) == 0: continue
            test_start_time = X.index[test_indices[0]]
            train_indices_all = np.concatenate([split for j, split in enumerate(test_splits) if i != j])
            t1_train = t1_aligned.iloc[train_indices_all]
            purged_train_mask = t1_train < test_start_time
            train_indices = t1_train[purged_train_mask].index
            train_indices = X.index.get_indexer(train_indices)
            if len(test_indices) > 0:
                test_end_time = X.index[test_indices[-1]]
                embargo_start_time = test_end_time + pd.Timedelta(days=1)
                embargo_end_time = embargo_start_time + pd.Timedelta(days=embargo_size)
                embargo_mask = (X.index[train_indices] < embargo_start_time) | (X.index[train_indices] > embargo_end_time)
                train_indices = train_indices[embargo_mask]
            yield train_indices, test_indices

# --- Preparação para a Validação ---
X_is = df_in_sample.drop(['label', 'ret', 'trgt'], axis=1)
y_is = df_in_sample['label']
t1_series_is = triple_barrier_events.loc[y_is.index, 't1']
purged_kfold = PurgedKFold(n_splits=5, t1=t1_series_is, pct_embargo=0.01)

print("\nValidador PurgedKFold instanciado para dados In-Sample.")

In [None]:
# @title 6. Definição e Treinamento do Modelo CNN-LSTM com Regularização e Callbacks
def create_cnn_lstm_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l2(0.01))(inputs)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.3)(x)
    x = LSTM(units=100, return_sequences=False, kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.3)(x)
    outputs = Dense(units=num_classes, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_sequences(X, y, time_steps=1):
    Xs, ys, idxs = [], [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        label = y.iloc[i + time_steps]
        ys.append(label)
        idx = y.index[i + time_steps]
        idxs.append(idx)
    return np.array(Xs), np.array(ys), np.array(idxs)

# --- Loop de Validação Cruzada (In-Sample) ---
TIME_STEPS = 20
N_SPLITS = 5
all_true_labels_cv, all_pred_labels_cv, all_returns_cv = [], [], []
y_mapped_is = y_is.map({-1: 0, 0: 1, 1: 2})

fold = 0
for train_idx, test_idx in purged_kfold.split(X_is, y_mapped_is):
    fold += 1
    print(f"\n--- Processando Dobra CV {fold}/{N_SPLITS} ---")
    if len(train_idx) == 0 or len(test_idx) == 0:
        print("Dobra vazia, pulando."); continue

    X_train, X_test = X_is.iloc[train_idx], X_is.iloc[test_idx]
    y_train, y_test = y_mapped_is.iloc[train_idx], y_mapped_is.iloc[test_idx]

    scaler = RobustScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test_scaled = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)

    X_train_seq, y_train_seq, _ = create_sequences(X_train_scaled, y_train, TIME_STEPS)
    X_test_seq, y_test_seq, y_test_indices = create_sequences(X_test_scaled, y_test, TIME_STEPS)

    if X_train_seq.shape[0] == 0 or X_test_seq.shape[0] == 0:
        print("Dados insuficientes. Pulando."); continue

    y_train_cat = to_categorical(y_train_seq, num_classes=3)
    model = create_cnn_lstm_model(input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), num_classes=3)

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint(f'best_model_fold_{fold}.keras', monitor='val_loss', save_best_only=True, verbose=0)

    model.fit(X_train_seq, y_train_cat, epochs=100, batch_size=32, verbose=0, validation_split=0.1, callbacks=[early_stopping, model_checkpoint])

    predictions = np.argmax(model.predict(X_test_seq), axis=1)
    pred_labels_original = pd.Series(predictions).map({0: -1, 1: 0, 2: 1}).values
    true_labels_original = pd.Series(y_test_seq).map({0: -1, 1: 0, 2: 1}).values
    all_pred_labels_cv.extend(pred_labels_original)
    all_true_labels_cv.extend(true_labels_original)
    test_returns = df_in_sample.loc[y_test_indices, 'ret']
    strategy_returns = test_returns * pred_labels_original
    all_returns_cv.extend(strategy_returns)

print("\n--- Validação Cruzada In-Sample Concluída ---")

In [None]:
# @title 7. Treinamento do Modelo Final (com todos os dados In-Sample)
print("\n--- Treinando o Modelo Final ---")

# Preparar todos os dados in-sample para o treinamento final
X_is_full = df_in_sample.drop(['label', 'ret', 'trgt'], axis=1)
y_is_full_mapped = df_in_sample['label'].map({-1: 0, 0: 1, 1: 2})

# É crucial usar um novo scaler e ajustá-lo a TODOS os dados in-sample
final_scaler = RobustScaler()
X_is_scaled_full = pd.DataFrame(final_scaler.fit_transform(X_is_full), index=X_is_full.index, columns=X_is_full.columns)

X_final_train_seq, y_final_train_seq, _ = create_sequences(X_is_scaled_full, y_is_full_mapped, TIME_STEPS)
y_final_train_cat = to_categorical(y_final_train_seq, num_classes=3)

# Cria e treina o modelo final
final_model = create_cnn_lstm_model(input_shape=(X_final_train_seq.shape[1], X_final_train_seq.shape[2]), num_classes=3)
print(f"Treinando modelo final com {X_final_train_seq.shape[0]} amostras...")
final_model.fit(X_final_train_seq, y_final_train_cat, epochs=30, batch_size=32, verbose=1) # Treina por um número fixo de épocas
final_model.save('final_production_model.keras')
print("Modelo final treinado e salvo.")

In [None]:
# @title 8. Avaliação de Desempenho (Resultados da Validação Cruzada)
def calculate_financial_metrics(returns, risk_free_rate=0.0):
    returns = pd.Series(returns).dropna()
    if returns.empty or returns.std() == 0:
        return {'Sharpe Ratio': 0.0, 'Sortino Ratio': 0.0, 'Maximum Drawdown': 0.0, 'Calmar Ratio': 0.0, 'Cumulative Returns': pd.Series([1.0])}
    sharpe = (returns.mean() * 252) / (returns.std() * np.sqrt(252))
    downside_std = returns[returns < 0].std()
    sortino = (returns.mean() * 252) / (downside_std * np.sqrt(252)) if downside_std != 0 else 0
    cum_ret = (1 + returns).cumprod()
    peak = cum_ret.expanding(min_periods=1).max()
    drawdown = (cum_ret - peak) / peak
    mdd = drawdown.min()
    calmar = (returns.mean() * 252) / abs(mdd) if mdd != 0 else 0
    return {'Sharpe Ratio': sharpe, 'Sortino Ratio': sortino, 'Maximum Drawdown': mdd, 'Calmar Ratio': calmar, 'Cumulative Returns': cum_ret}

print("\n\n--- AVALIAÇÃO DA VALIDAÇÃO CRUZADA (IN-SAMPLE) ---")
if all_true_labels_cv:
    print(f"Acurácia Geral (CV): {accuracy_score(all_true_labels_cv, all_pred_labels_cv):.4f}\n")
    print("Relatório de Classificação (CV):")
    print(classification_report(all_true_labels_cv, all_pred_labels_cv, target_names=['Vender (-1)', 'Manter (0)', 'Comprar (1)'], labels=[-1, 0, 1], zero_division=0))
    cm = confusion_matrix(all_true_labels_cv, all_pred_labels_cv, labels=[-1, 0, 1])
    plt.figure(figsize=(8, 6)); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Vender', 'Manter', 'Comprar'], yticklabels=['Vender', 'Manter', 'Comprar'])
    plt.title('Matriz de Confusão (CV)'); plt.ylabel('Verdadeiro'); plt.xlabel('Previsto'); plt.show()

    financial_metrics_cv = calculate_financial_metrics(all_returns_cv)
    print("\nMétricas Financeiras (CV):")
    print(f"Sharpe Ratio: {financial_metrics_cv['Sharpe Ratio']:.4f}"); print(f"Sortino Ratio: {financial_metrics_cv['Sortino Ratio']:.4f}")
    print(f"Maximum Drawdown: {financial_metrics_cv['Maximum Drawdown']:.4%}"); print(f"Calmar Ratio: {financial_metrics_cv['Calmar Ratio']:.4f}")
    plt.figure(figsize=(12, 6)); financial_metrics_cv['Cumulative Returns'].plot(); plt.title(f'Retorno Acumulado (CV) - {main_ticker}')
    plt.xlabel('Trades'); plt.ylabel('Retorno Acumulado'); plt.grid(True); plt.show()
else:
    print("Nenhuma previsão gerada na Validação Cruzada.")

In [None]:
# @title 9. Avaliação Final no Conjunto Out-of-Sample (OOS)
print("\n\n--- AVALIAÇÃO FINAL NO CONJUNTO DE TESTE OUT-OF-SAMPLE (OOS) ---")

# Preparar dados OOS
X_oos = df_oos_test.drop(['label', 'ret', 'trgt'], axis=1)
y_oos_mapped = df_oos_test['label'].map({-1: 0, 0: 1, 1: 2})

# Usar o scaler AJUSTADO NOS DADOS DE TREINO para transformar os dados OOS
X_oos_scaled = pd.DataFrame(final_scaler.transform(X_oos), index=X_oos.index, columns=X_oos.columns)

# Criar sequências para o conjunto OOS
X_oos_seq, y_oos_seq, y_oos_indices = create_sequences(X_oos_scaled, y_oos_mapped, TIME_STEPS)

if X_oos_seq.shape[0] > 0:
    # Carregar o modelo final salvo e fazer previsões
    production_model = load_model('final_production_model.keras')
    predictions_oos = np.argmax(production_model.predict(X_oos_seq), axis=1)

    # Converter rótulos de volta para o formato original (-1, 0, 1)
    pred_labels_oos = pd.Series(predictions_oos).map({0: -1, 1: 0, 2: 1}).values
    true_labels_oos = pd.Series(y_oos_seq).map({0: -1, 1: 0, 2: 1}).values

    # --- 1. Métricas de Classificação OOS ---
    print(f"Acurácia Geral (OOS): {accuracy_score(true_labels_oos, pred_labels_oos):.4f}\n")
    print("Relatório de Classificação (OOS):")
    print(classification_report(true_labels_oos, pred_labels_oos, target_names=['Vender (-1)', 'Manter (0)', 'Comprar (1)'], labels=[-1, 0, 1], zero_division=0))
    cm_oos = confusion_matrix(true_labels_oos, pred_labels_oos, labels=[-1, 0, 1])
    plt.figure(figsize=(8, 6)); sns.heatmap(cm_oos, annot=True, fmt='d', cmap='Blues', xticklabels=['Vender', 'Manter', 'Comprar'], yticklabels=['Vender', 'Manter', 'Comprar'])
    plt.title('Matriz de Confusão (OOS)'); plt.ylabel('Verdadeiro'); plt.xlabel('Previsto'); plt.show()

    # --- 2. Métricas Financeiras OOS ---
    oos_returns = df_oos_test.loc[y_oos_indices, 'ret']
    strategy_returns_oos = oos_returns * pred_labels_oos
    financial_metrics_oos = calculate_financial_metrics(strategy_returns_oos)

    print("\nMétricas Financeiras (OOS):")
    print(f"Sharpe Ratio: {financial_metrics_oos['Sharpe Ratio']:.4f}"); print(f"Sortino Ratio: {financial_metrics_oos['Sortino Ratio']:.4f}")
    print(f"Maximum Drawdown: {financial_metrics_oos['Maximum Drawdown']:.4%}"); print(f"Calmar Ratio: {financial_metrics_oos['Calmar Ratio']:.4f}")

    # --- 3. Gráfico de Retornos Acumulados OOS ---
    plt.figure(figsize=(12, 6)); financial_metrics_oos['Cumulative Returns'].plot(); plt.title(f'Retorno Acumulado da Estratégia (OOS) - {main_ticker}')
    plt.xlabel('Data'); plt.ylabel('Retorno Acumulado'); plt.grid(True); plt.show()
else:
    print("Não há dados suficientes no conjunto Out-of-Sample para gerar sequências e avaliar.")