In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed

# --- 1. Get Data (USD/BRL) ---
# We download daily data.
ticker = 'BRL=X'
data = yf.download(ticker, start='2015-01-01', end='2024-01-01')['Close']

# Feature Engineering: We use Volatility (rolling std dev) as the feature
# because regimes are often defined by volatility shifts, not just price levels.
window_size = 30
df = pd.DataFrame(data)
df['Returns'] = df['Close'].pct_change()
df['Vol'] = df['Returns'].rolling(window=5).std() # 5-day rolling vol
df.dropna(inplace=True)

# Split into Training (Normal Regime) and Testing (Unknown/Potential New Regimes)
train_end = '2019-12-31'
train_data = df.loc[:train_end, ['Vol']]
test_data = df.loc[train_end:, ['Vol']]

# Scale Data (Crucial for Neural Networks)
scaler = MinMaxScaler()
scaler.fit(train_data)
train_scaled = scaler.transform(train_data)
test_scaled = scaler.transform(test_data)

# --- 2. Create Sequences (LSTM requires 3D input: [Samples, Timesteps, Features]) ---
def create_sequences(data, steps=30):
   sequences = []
   for i in range(len(data) - steps):
       sequences.append(data[i:(i + steps)])
   return np.array(sequences)

TIME_STEPS = 30
X_train = create_sequences(train_scaled, TIME_STEPS)
X_test = create_sequences(test_scaled, TIME_STEPS)

# --- 3. Build the LSTM Autoencoder ---
model = Sequential([
   # Encoder: Compresses the 30-day sequence into a small vector
   LSTM(16, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),

   # The "Latent Space" (Compressed Representation)
   RepeatVector(X_train.shape[1]),

   # Decoder: Unpacks the vector back into the 30-day sequence
   LSTM(16, activation='relu', return_sequences=True),
   TimeDistributed(Dense(X_train.shape[2]))
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, X_train, epochs=20, batch_size=32, validation_split=0.1, verbose=0)

# --- 4. Detect Regimes (Calculate Reconstruction Error) ---
# We ask the model to reconstruct the Test data (2020+)
X_test_pred = model.predict(X_test)

# Calculate Mean Absolute Error (MAE) for each day
test_mae_loss = np.mean(np.abs(X_test_pred - X_test), axis=1)

# Create a DataFrame for plotting
test_score_df = pd.DataFrame(index=test_data[TIME_STEPS:].index)
test_score_df['loss'] = test_mae_loss
test_score_df['threshold'] = 0.15  # Set a manual threshold for visual clarity
test_score_df['Close'] = df.loc[test_score_df.index]['Close']

# --- 5. Visualization ---
plt.figure(figsize=(14, 6))

# Plot 1: The Reconstruction Error (The "Regime Signal")
plt.subplot(2, 1, 1)
plt.plot(test_score_df.index, test_score_df['loss'], label='Reconstruction Error', color='red')
plt.axhline(y=0.15, color='black', linestyle='--', label='Regime Threshold')
plt.title('Regime Detection: Reconstruction Error (Higher = Unknown Regime)')
plt.legend()

# Plot 2: The Actual Price (USDBRL)
plt.subplot(2, 1, 2)
plt.plot(test_score_df.index, test_score_df['Close'], label='USDBRL Price', color='blue')
plt.title('Actual USDBRL Price')

plt.tight_layout()
plt.show()





ModuleNotFoundError: No module named 'yfinance'