In [1]:
import pandas as pd
import os

In [2]:
data_url = "/Users/benjaminzaidel/Desktop/Kaggle/Forex_Pairs"

In [3]:
import os
import pandas as pd

def create_wide_dataframe(data_folder: str, limit_rows: int = None) -> pd.DataFrame:
    """
    Reads all .txt files in `data_folder`, each containing
    <DTYYYYMMDD>, <TIME>, <CLOSE>, etc. The resulting DataFrame
    has:
      - A 'time_step' column (int) formed by concatenating date/time (YYYYMMDDHHMMSS).
      - One column per currency pair (derived from filename).
      - Each cell is the <CLOSE> value for that (time_step, currency_pair).
    
    Only reads up to `limit_rows` from each file (if provided).

    Then sorts rows by the first 8 digits of time_step (date),
    and then by the remaining digits (time).
    """

    dfs_for_merge = []

    for filename in os.listdir(data_folder):
        if filename.endswith(".txt"):
            filepath = os.path.join(data_folder, filename)
            
            # 1) Optionally limit rows
            if limit_rows is not None:
                df = pd.read_csv(filepath, nrows=limit_rows)
            else:
                df = pd.read_csv(filepath)

            required_cols = {"<DTYYYYMMDD>", "<TIME>", "<CLOSE>"}
            if not required_cols.issubset(df.columns):
                print(f"Warning: Missing required columns in '{filename}'. Skipping.")
                continue

            df["time_step"] = (
                df["<DTYYYYMMDD>"].astype(str) + df["<TIME>"].astype(str)
            ).astype(int)
            
            currency_pair = os.path.splitext(filename)[0]  # e.g. 'AUDJPY.txt' -> 'AUDJPY'

            mini_df = df[["time_step", "<CLOSE>"]].copy()
            mini_df.rename(columns={"<CLOSE>": currency_pair}, inplace=True)
            mini_df.set_index("time_step", inplace=True)

            dfs_for_merge.append(mini_df)

    if not dfs_for_merge:
        print("No valid data found or missing required columns.")
        return pd.DataFrame(columns=["time_step"])

    # 2) Merge all mini DataFrames side-by-side on time_step
    df_wide = pd.concat(dfs_for_merge, axis=1)

    df_wide.reset_index(inplace=True)

    # 3) Sort by date/time
    time_str = df_wide["time_step"].astype(str)
    date_part = time_str.str[:8].astype(int)
    time_part = time_str.str[8:].astype(int)

    df_wide["date_part"] = date_part
    df_wide["time_part"] = time_part

    df_wide.sort_values(by=["date_part", "time_part"], inplace=True, ascending=[True, True])
    df_wide.drop(["date_part", "time_part"], axis=1, inplace=True)

    return df_wide


In [6]:
from typing import List, Tuple
import numpy as np
def build_rate_matrices(df_wide: pd.DataFrame) -> Tuple[List[np.ndarray], List[int]]:
    """
    Convert each row of the wide df into an adjacency matrix.
    Returns a list of matrices (one per row) and
    the corresponding list of time_steps for reference.
    """
    # 1) Identify all currency pairs
    all_pairs = [col for col in df_wide.columns if col != "time_step"]
    
    # 2) Extract unique currencies
    currency_set = set()
    for pair in all_pairs:
        base = pair[:3]
        quote = pair[3:]
        currency_set.add(base)
        currency_set.add(quote)
    currency_list = sorted(list(currency_set))
    currency_to_idx = {cur: i for i, cur in enumerate(currency_list)}
    
    # 3) We'll build a list of adjacency matrices, one per row
    rate_matrices = []
    time_steps = []

    for _, row in df_wide.iterrows():
        # Initialize adjacency
        n_c = len(currency_list)
        mat = np.zeros((n_c, n_c), dtype=np.float32)
        # set diagonal to 1.0
        np.fill_diagonal(mat, 1.0)
        
        for pair in all_pairs:
            rate = row[pair]
            if pd.isna(rate):
                continue
            base = pair[:3]
            quote = pair[3:]
            
            i = currency_to_idx[base]
            j = currency_to_idx[quote]
            
            mat[i, j] = rate
            
            # if you also want to fill the reciprocal:
            if rate != 0:
                mat[j, i] = 1.0 / rate
        
        rate_matrices.append(mat)
        time_steps.append(row["time_step"])
    
    return rate_matrices, time_steps, currency_list


In [13]:
import gym
import numpy as np
from gym import spaces

class ForexTradingEnv(gym.Env):
    def __init__(self, rate_matrices, currency_list, base_currency='USD'):
        """
        :param rate_matrices: list of shape [n_c, n_c] adjacency matrices
        :param currency_list: list of currency codes, e.g. ['AUD','CAD','CHF','EUR','GBP','JPY','NZD','USD',...]
        :param base_currency: which currency we measure final PnL in
        """
        super(ForexTradingEnv, self).__init__()
        
        self.rate_matrices = rate_matrices
        self.currency_list = currency_list
        self.n_c = len(currency_list)
        
        # Index of the base currency in currency_list
        self.base_idx = currency_list.index(base_currency)
        
        self.num_steps = len(rate_matrices)
        self.current_step = 0
        
        # The agent's holdings in each currency
        self.portfolio = np.zeros(self.n_c, dtype=np.float32)
        self.portfolio[self.base_idx] = 1.0  # start with 1 unit of base currency only
        
        # Action space: choose from among n_c*n_c possible conversions i->j
        self.action_space = spaces.Discrete(self.n_c * self.n_c)
        
        # Observation space: (n_c, n_c) matrix of exchange rates
        # If you want the portfolio included, you can flatten or handle it differently:
        self.observation_space = spaces.Box(
            low=0, high=np.inf,
            shape=(self.n_c, self.n_c),
            dtype=np.float32
        )
    
    def reset(self):
        self.current_step = 0
        self.portfolio[:] = 0
        self.portfolio[self.base_idx] = 1.0  # start with 1 in base
        return self._get_obs()
    
    def _get_obs(self):
        # Possibly just return the adjacency matrix.  If you also want to include
        # the portfolio in the observation, you can flatten them together, e.g.:
        # matrix_flat = self.rate_matrices[self.current_step].flatten()
        # obs = np.concatenate([matrix_flat, self.portfolio], axis=0)
        # But for now, we'll just return the matrix:
        return self.rate_matrices[self.current_step]
    
def step(self, action):
    i = action // self.n_c
    j = action % self.n_c

    current_matrix = self.rate_matrices[self.current_step]
    old_val_base = self._value_in_base(current_matrix)

    # Perform conversion
    if i != j and current_matrix[i, j] > 0:
        amount_i = self.portfolio[i]
        if amount_i > 0:
            self.portfolio[i] = 0.0
            self.portfolio[j] += amount_i * current_matrix[i, j]

    self.current_step += 1

    # Check if we are at the end
    done = (self.current_step >= self.num_steps)
    reward = 0.0

    if done:
        # Final reward = final portfolio value - initial
        final_val_base = self._value_in_base(current_matrix)
        reward = final_val_base - 1.0
        # Return a valid observation (e.g. the last one) so SB3 doesn't crash
        return self.rate_matrices[-1], reward, done, {}
    else:
        # Compute incremental reward
        new_matrix = self.rate_matrices[self.current_step]
        new_val_base = self._value_in_base(new_matrix)
        reward = new_val_base - old_val_base
        return self.rate_matrices[self.current_step], reward, done, {}
    
        # inside ForexTradingEnv.step():
    info = {"portfolio": self.portfolio.copy()}
    return obs, reward, done, info


    def _value_in_base(self, matrix):
        """
        Convert the entire portfolio to base currency using the given adjacency matrix.
        If rate=0 or NaN, treat it as unconvertible (skipped).
        """
        total_base = 0.0
        for c_idx, amt in enumerate(self.portfolio):
            if c_idx == self.base_idx:
                total_base += amt
            else:
                rate = matrix[c_idx, self.base_idx]
                if rate > 0:
                    total_base += amt * rate
                else:
                    # missing or invalid, treat as 0
                    pass
        return total_base


In [9]:
if __name__ == "__main__":
    # 1) Build your wide DataFrame
    df_wide = create_wide_dataframe(data_url, limit_rows=10000)
    
    # 2) Convert df_wide into adjacency matrices
    rate_matrices, time_steps, currency_list = build_rate_matrices(df_wide)

    # 3) Create environment
    env = ForexTradingEnv(rate_matrices, currency_list, base_currency='USD')
    
    # 4) Quick random-policy run
    obs = env.reset()
    done = False
    total_reward = 0.0
    
    while not done:
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        total_reward += reward
    
    print("Random policy total reward:", total_reward)


Random policy total reward: -2.0


In [None]:
import pandas as pd
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

# 1) Create the wide DataFrame
df_wide = create_wide_dataframe(data_url)

# Optionally limit to the first 1000 rows for faster experiments:
df_wide = df_wide.head(1000).copy()

# 2) Convert df_wide to adjacency matrices
rate_matrices, time_steps, currency_list = build_rate_matrices(df_wide)

# 3) Create an instance of your environment
env_instance = ForexTradingEnv(rate_matrices, currency_list, base_currency='USD')

# 4) Wrap the environment in a DummyVecEnv so SB3 can handle it
env = DummyVecEnv([lambda: env_instance])

# 5) Instantiate a DQN model
model = DQN(
    policy="MlpPolicy",    # Use a simple MLP
    env=env,
    learning_rate=1e-3,
    buffer_size=50000,
    batch_size=64,
    exploration_fraction=0.1,
    exploration_final_eps=0.02,
    verbose=1,
    tensorboard_log="./dqn_fx_log"  # optional, for TensorBoard
)

# 6) Train the model
model.learn(total_timesteps=10000)  # adjust as needed

# 7) Evaluate or test the trained model
obs = env.reset()
done = False
total_reward = 0.0

while True:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    total_reward += reward[0]  # stable-baselines uses vectorized env, so reward is array-like
    if done:
        break

print("Total reward (deterministic) after training:", total_reward)


In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

# Suppose 'model' is your trained DQN and 'env' is your DummyVecEnv
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5, render=False)

print(f"Evaluation over 5 episodes: mean_reward={mean_reward:.2f} +/- {std_reward:.2f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def run_eval_episodes(model, env, num_episodes=5):
    """
    Runs `num_episodes` episodes in the given vec-env (env),
    always acting deterministically. Returns a list of total rewards.
    """
    ep_rewards = []
    
    for e in range(num_episodes):
        obs = env.reset()
        done = False
        total_r = 0.0
        
        while not done:
            # model.predict() returns (action, state), we only need 'action'
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            
            # because this is a vectorized env, reward is typically an array
            total_r += reward[0]  
        
        ep_rewards.append(total_r)
    
    return ep_rewards


# After model is trained:
episode_rewards = run_eval_episodes(model, env, num_episodes=10)

plt.figure(figsize=(8, 5))
plt.plot(episode_rewards, marker='o')
plt.title("Deterministic Policy Rewards over 10 Episodes")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.grid(True)
plt.show()


In [None]:
def run_eval_with_portfolio(model, env, num_episodes=1):
    obs = env.reset()
    done = False
    portfolio_history = [env.envs[0].portfolio.copy()]  # for vectorized env
    rewards_history = []
    
    total_r = 0.0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_r += reward[0]
        # Grab the updated portfolio from info:
        portfolio_history.append(info[0]["portfolio"].copy())
        rewards_history.append(reward[0])
    
    return np.array(portfolio_history), rewards_history, total_r

# Example usage, for 1 episode
portfolio_hist, rewards_hist, final_r = run_eval_with_portfolio(model, env, num_episodes=1)

plt.figure(figsize=(10,4))
for c_idx, cur_name in enumerate(env.envs[0].currency_list):
    plt.plot(portfolio_hist[:, c_idx], label=cur_name)
plt.title(f"Portfolio Holdings Over 1 Episode (Final Reward = {final_r:.2f})")
plt.xlabel("Time Step")
plt.ylabel("Amount Held")
plt.legend()
plt.show()
