# Stock Trading Example

This notebook demonstrates using the TimeSeries Agent for stock trading, showing:

1. Working with financial data
2. Feature engineering for market indicators
3. Training an RL agent for directional prediction (Up/Down/Same)
4. Real-time trading signals

In [None]:
!pip install python-dotenv --quiet

In [None]:
import os
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import requests
from dotenv import load_dotenv
load_dotenv()  # Load .env file
from timeseries_agent.api import train_from_csv
import warnings
warnings.filterwarnings("ignore")

## 1. Get Stock Data

In [None]:
def add_features(df, col_names_dict):
    """Add technical indicators as features."""
    df = df.copy()
    
    # Drop date column
    df = df.reset_index(drop=True)
    df.drop(columns=col_names_dict['Date'], inplace=True)
    
    # Price features
    df['returns'] = df[col_names_dict['Close']].pct_change()
    df['log_returns'] = np.log1p(df['returns'])
    
    # Moving averages
    df['sma_20'] = df[col_names_dict['Close']].rolling(20).mean()
    df['sma_50'] = df[col_names_dict['Close']].rolling(50).mean()
    df['sma_ratio'] = df['sma_20'] / df['sma_50']
    
    # Volatility
    df['volatility'] = df['returns'].rolling(20).std()
    
    # Volume features
    volume_ma = df[col_names_dict['Volume']].rolling(20).mean()
    df['volume_ratio'] = df[col_names_dict['Volume']].div(volume_ma)
    
    return df.dropna().reset_index(drop=True)

In [None]:
# Load your FMP API key
api_key = os.environ.get("FMP_API_KEY")
# Symbol to fetch historical data 
symbol = "EURUSD"
interval = "15min"  # Options: '1min', '5min', '15min', '30min', '1hour', '4hour', '1day', '1week', '1month'

# Define the endpoint for historical stock data for NVIDIA
url = f'https://financialmodelingprep.com/api/v3/historical-chart/{interval}/{symbol}?apikey={api_key}'

# Fetch the data from FMP API
response = requests.get(url)
data = response.json()

# Convert the historical data into a pandas DataFrame
data = pd.DataFrame(data)
display(data.head())
display(data.tail())

In [None]:
# Reverse the DataFrame to have the most recent data at the top
data = data.iloc[::-1].reset_index(drop=True)
display(data.head())
display(data.tail())

### 1.a. Add Features Columns

In [None]:
col_names_dict = {
    'Date': 'date',
    'Open': 'open',
    'High': 'high',
    'Low': 'low',
    'Close': 'close',
    'Volume': 'volume'
}
# Add technical indicators as features
df = add_features(data, col_names_dict)

target_column = 'close'                 # Target column for price prediction
df = df.head(1000)                      # Limit to the first 1000 rows for training
# Save test data except the last n rows
last_n_test_rows = 100
train_data = df.iloc[:-last_n_test_rows]
test_data = df.tail(last_n_test_rows)


# Save train data
print(f"Train Shape: {train_data.shape}, Test Shape: {test_data.shape}")
display(train_data.head())
display(test_data.head())

In [None]:
# Plot stock price/train and test data
plt.figure(figsize=(18, 6))
plt.plot(train_data.index, train_data['close'], color='blue', label='Train Close Price')
plt.plot(test_data.index, test_data['close'], color='orange', label='Test Close Price')
plt.scatter(train_data.index, train_data['close'], color='blue', s=10, alpha=0.5)
plt.scatter(test_data.index, test_data['close'], color='orange', s=10, alpha=0.5)
plt.xlabel('Time')
plt.ylabel('Price')
plt.title(f"{symbol} Price Data")
plt.legend()
plt.show()

### 1.b. Compute Target Direction Distribution

In [None]:
# Compute distribution of directions
train_target_diff = train_data[target_column].diff()
train_directions = train_target_diff.apply(lambda x: 0 if x > 0 else 1 if x < 0 else 2).iloc[1:]
train_directions_counts = Counter(train_directions)
print("Train Directions Counts:", train_directions_counts)

test_target_diff = test_data[target_column].diff()
test_directions = test_target_diff.apply(lambda x: 0 if x > 0 else 1 if x < 0 else 2).iloc[1:]
test_directions_counts = Counter(test_directions)
print("Test Directions Counts:", test_directions_counts)

In [None]:
# Create the a bar plot for the distribution of directions
plt.figure(figsize=(10, 5))
sns.barplot(x=list(train_directions_counts.keys()), y=list(train_directions_counts.values()), color='blue', alpha=0.6, label='Train')
sns.barplot(x=list(test_directions_counts.keys()), y=list(test_directions_counts.values()), color='orange', alpha=0.6, label='Test')
plt.xlabel('Direction')
plt.ylabel('Count')
plt.title('Distribution of Directions in Train and Test Data')
plt.xticks([0, 1, 2], ['Up', 'Down', 'No Change'])
plt.legend()
plt.grid(True, linestyle='--', alpha=0.7)
plt.show()

In [None]:
# Save the training and testing data to CSV files
DATA_DIR = 'data'
os.makedirs(DATA_DIR, exist_ok=True)
train_csv_path = os.path.join(DATA_DIR, 'fin_train.csv')
test_csv_path = os.path.join(DATA_DIR, 'fin_test.csv')

train_data.to_csv(train_csv_path, index=False)
test_data.to_csv(test_csv_path, index=False)

## 2. Configure Reward Strategy

The agent automatically calculates rewards during training based on:
- Action 0: Predict price will go Up
- Action 1: Predict price will go Down  
- Action 2: Predict price will stay Same

Reward is +1 for correct predictions, -1 for incorrect

In [None]:
reward_config = {
    'method': 'proportional',
    'scale': 100.0,                # Amplify small returns
    'min_change_pct': 0.001        # Min 0.1% move for reward. This helps avoid noise and serves as a bid/ask spread filter
}

## 3. Train Agent

In [None]:
# Define feature columns
feature_cols = [
    'returns', 'sma_ratio', 'volatility',
    'volume_ratio', 'log_returns'
    ] + [target_column]

# Define environment kwargs
lookback = 10
env_kwargs = {
        'lookback': lookback,
        'normalize_state': True,
        'test_size': 0.2                    # Use 20% of data for validation
    }

# Define agent kwargs 
agent_type = 'ppo'                          # options: 'ppo', 'reinforce', and 'reinforce_step'
agent_kwargs = {
        'hidden_layers': [64, 32],          # More complex architecture for better learning
        'output_size': 3,                   # Three possible actions: Up, Down, Same
        'agent_type': agent_type,  
    }

# Define trainer kwargs
trainer_kwargs = {
        'max_epochs': 100,
        'enable_checkpointing': True,
        'experiment_name': f'{symbol}_trading'
    }

# Train with automatic action sampling and reward generation
agent = train_from_csv(
    csv_path=train_csv_path,
    feature_cols=feature_cols,
    target_col=target_column,
    reward_config=reward_config,
    env_kwargs=env_kwargs,
    agent_kwargs=agent_kwargs,
    trainer_kwargs=trainer_kwargs
)

### 3.a. Plot Training Logs

In [None]:
from timeseries_agent.utils.extras import plot_training_metrics
plot_training_metrics(agent)

## 4. Evaluate Agent
The agent predicts market direction (Up/Down/Same) based on recent data

In [None]:
from timeseries_agent.api import load_agent

In [None]:
log_dir = agent.trainer.logger.log_dir
checkpoint_path = os.path.join(log_dir, 'checkpoints', 'last.ckpt')
print(checkpoint_path)

In [None]:
# Load the trained agent
loaded_agent = load_agent(
    checkpoint_path=checkpoint_path,
    csv_path=test_csv_path,
    feature_cols=feature_cols,
    target_col=target_column,
    agent_type=agent_type,              # Specify the agent type
    **env_kwargs                        # Pass the same environment configuration used during training
)

In [None]:
def get_profit_loss(current_val, next_val, true_action, pred_action):
    """
    Calculate the profit or loss based on price change and action taken,
    considering pip_value as the cost of trade.
    """
    pip_value = 0.0001  # For forex pairs like AUDUSD, pip value is typically 0.0001
    diff = (next_val - current_val)
    profit_loss = 0.0

    if pred_action == 2:
        # No action taken, return 0
        profit_loss = 0.0
    elif true_action == pred_action:
        # Correct prediction
        if abs(diff) > 0:
            profit_loss = abs(diff) - pip_value # abs(diff) because the action was correct and we want profit
    return profit_loss

In [None]:
def get_true_action(current_val, next_val):
    true_action = 0 if next_val > current_val else 1 if next_val < current_val else 2
    return true_action

# Batch predictions on historical data
def get_batch_predictions(df, lookback_size):
    predictions = []
    true_actions = []
    total_profit = 0.0
    for i in range(lookback_size, len(df)):
        if i >= len(df) - 1:
            continue
        current_features = df[feature_cols].iloc[i-lookback_size:i].values.astype(np.float32)
        current_target = df[target_column].iloc[i]
        pred_action, probs = loaded_agent.act(current_features, return_probs=True)
        predictions.append(pred_action)
        next_target = df[target_column].iloc[i+1]
        true_action = get_true_action(current_target, next_target)
        pl = get_profit_loss(current_target, next_target, true_action, pred_action)
        total_profit += pl
        message = f'Profit={pl:.4f}' if pl > 0 else f'Loss={pl:.4f}'
        print(f'True={true_action} -- Pred={pred_action} -- Prob={probs} -- {message}')
        true_actions.append(true_action)
        
    return true_actions, predictions, total_profit

# Get predictions
df = pd.read_csv(test_csv_path)
y_true, y_pred, total_profit = get_batch_predictions(df[:], lookback)

print(f'\n true dist == {Counter(y_true)}, pred dist == {Counter(y_pred)}')
print(f'Total Profit/Loss: {total_profit:.4f} pips')  

### 4.a. Visualize Agents Evaluation Metrics

In [None]:
from timeseries_agent.utils.extras import plot_prediction_density, plot_confusion_matrix_n_metrics

In [None]:
plot_prediction_density(y_true, y_pred)

In [None]:
plot_confusion_matrix_n_metrics(y_true, y_pred)