<a href="https://colab.research.google.com/github/gogog01-29-2021/241102-FBAQuant-1st-Capstone/blob/main/20241220.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
!pip install QuantLib

  and should_run_async(code)




In [26]:
import QuantLib as ql

In [27]:
def option_pricing(today, expiry):
    date = today
    ql.Settings.instance().evaluationDate = date
    calendar = ql.SouthKorea()
    dayCount = ql.ActualActual(ql.ActualActual.ISDA)

    # SimpleQuote Objects
    underlying_qt = ql.SimpleQuote(350) #기초자산 초기 가격
    dividend_qt = ql.SimpleQuote(0.0) # 배당
    riskfreerate_qt = ql.SimpleQuote(0.01)  #무위험 이자율
    volatility_qt = ql.SimpleQuote(0.2)

    # Quote Handle Objects
    u_qhd = ql.QuoteHandle(underlying_qt)
    d_qhd = ql.QuoteHandle(dividend_qt)
    r_qhd = ql.QuoteHandle(riskfreerate_qt)
    v_qhd = ql.QuoteHandle(volatility_qt)

    # Term Structure Objects
    r_ts = ql.FlatForward(date, r_qhd, dayCount) # 무위험 이자율 flatforward
    d_ts = ql.FlatForward(date, d_qhd, dayCount) # 배당률 flatforward
    v_ts = ql.BlackConstantVol(date, calendar, v_qhd, dayCount) # 변동성 ??

    # Term structure Handle Objects
    r_thd = ql.YieldTermStructureHandle(r_ts)
    d_thd = ql.YieldTermStructureHandle(d_ts)
    v_thd = ql.BlackVolTermStructureHandle(v_ts)

    # Process & Engine
    process = ql.BlackScholesMertonProcess(u_qhd, d_thd, r_thd, v_thd)
    engine = ql.AnalyticEuropeanEngine(process)

    # Option Objects
    option_type = ql.Option.Put
    K = 360
    expiry_date = expiry
    exercise = ql.EuropeanExercise(expiry_date)
    payoff = ql.PlainVanillaPayoff(option_type, K)
    option = ql.VanillaOption(payoff, exercise)

    option.setPricingEngine(engine)

    return option.NPV()

In [28]:
option_pricing(ql.Date(22,5,2022), ql.Date(22,11,2022))

24.402475043536924

In [29]:
#Underlying Path
import QuantLib as ql
import numpy as np

# 설정할 날짜
date = ql.Date(22, 10, 2024)
ql.Settings.instance().evaluationDate = date
calendar = ql.SouthKorea()
dayCount = ql.ActualActual(ql.ActualActual.ISDA)

# 초기 파라미터 설정
S0 = 350  # 초기 주가
mu = 0.05  # 기대 수익률
sigma = 0.2  # 변동성
r = 0.01  # 무위험 이자율
dividend = 0.0  # 배당률
T = 1.0  # 만기 시간 (1년)
dt = 1 / 252  # 일일 간격 (252 거래일 기준)
N = int(T / dt)  # 총 시뮬레이션 스텝 수
M = 100  # 시뮬레이션 횟수

# 초기 주가와 무위험 이자율을 직접 사용하여 GeometricBrownianMotionProcess 설정
process = ql.GeometricBrownianMotionProcess(S0, r, sigma)

# 난수 생성기
rng = ql.UniformRandomGenerator(seed=0)
uniform_rng = ql.UniformRandomSequenceGenerator(N, rng)
gaussian_rng = ql.GaussianRandomSequenceGenerator(uniform_rng)

# 경로 생성기
path_generator = ql.GaussianPathGenerator(process, T, N, gaussian_rng, False)

# 시뮬레이션 저장
simulations = []

for _ in range(M):
    sample_path = path_generator.next()
    path = sample_path.value()
    # path는 float 값의 리스트이므로, 직접 추가
    simulations.append([path[i] for i in range(len(path))])

In [30]:
simulations = np.array(simulations)
# Check the dimensions and a sample of the generated data
print("Simulations data shape:", simulations.shape)
print("Sample path (first simulation):", simulations[0][:10])  # Display first 10 values of the first simulation path


Simulations data shape: (100, 253)
Sample path (first simulation): [350.         347.61805758 350.44179516 349.54165088 349.82416241
 343.19535271 349.99342123 349.01581316 344.76272204 345.16878391]


In [31]:
from scipy.stats import norm
import QuantLib as ql
import numpy as np

# Simulation parameters for QuantLib
date = ql.Date(22, 10, 2024)
ql.Settings.instance().evaluationDate = date
calendar = ql.SouthKorea()
dayCount = ql.ActualActual(ql.ActualActual.ISDA)

S0 = 350  # Initial stock price
mu = 0.05  # Expected return
sigma = 0.2  # Volatility
r = 0.01  # Risk-free rate
T = 1.0  # Maturity in years
dt = 1 / 252  # Daily intervals (252 trading days in a year)
N = int(T / dt)  # Total steps
M = 100  # Number of simulation paths

# Initialize the GBM process for the underlying price
process = ql.GeometricBrownianMotionProcess(S0, r, sigma)
rng = ql.UniformRandomGenerator(seed=0)
uniform_rng = ql.UniformRandomSequenceGenerator(N, rng)
gaussian_rng = ql.GaussianRandomSequenceGenerator(uniform_rng)
path_generator = ql.GaussianPathGenerator(process, T, N, gaussian_rng, False)

# Run simulations for underlying price paths
simulations = []
for _ in range(M):
    sample_path = path_generator.next()
    path = sample_path.value()
    simulations.append([path[i] for i in range(len(path))])
simulations = np.array(simulations)

# Function to calculate option price using Black-Scholes model
def calculate_option_price(S, K=350, T=1.0, r=0.01, sigma=0.2):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    option_price = S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    return option_price

# Initialize state for each path in the simulations
def initialize_state_from_simulation(simulations, option_weight=0.5):
    initial_states = []
    for path in simulations:
        underlying_price = path[0]  # Use first price in path as initial Underlying Price
        option_price = calculate_option_price(underlying_price)  # Compute initial Option Price
        underlying_weight = 1 - option_weight  # Set Underlying Weight as complement to Option Weight
        state = [option_weight, underlying_weight, option_price, underlying_price]
        initial_states.append(state)
    return initial_states

# Generate initial states
initial_states = initialize_state_from_simulation(simulations)

# Check the structure of initial states
print("Initial States Sample:", initial_states[:5])


Initial States Sample: [[0.5, 0.5, 29.516615415383598, 350.0], [0.5, 0.5, 29.516615415383598, 350.0], [0.5, 0.5, 29.516615415383598, 350.0], [0.5, 0.5, 29.516615415383598, 350.0], [0.5, 0.5, 29.516615415383598, 350.0]]


In [32]:
import gym
import numpy as np

class DeltaHedgeEnv(gym.Env):
    def __init__(self, simulations, option_weight=0.5):
        super(DeltaHedgeEnv, self).__init__()

        # Store simulations and initial weights
        self.simulations = simulations
        self.option_weight = option_weight  # Initial option weight
        self.underlying_weight = 1 - option_weight  # Complementary underlying weight

        # Action space for DDPG: continuous range [-0.1, 0.1]
        self.action_space = gym.spaces.Box(low=-0.1, high=0.1, shape=(1,), dtype=np.float32)

        # Observation space for state: [Option Weight, Underlying Weight, Option Price, Underlying Price]
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32)

        # Initialize state variables
        self.day_index = 0
        self.sim_index = 0
        self.P0 = None  # Initial portfolio value for the episode

    def reset(self):
        # Reset at start of episode: Randomly choose a simulation path and reset day index
        self.sim_index = np.random.randint(len(self.simulations))
        self.day_index = 0
        underlying_price = self.simulations[self.sim_index][self.day_index]
        option_price = calculate_option_price(underlying_price)

        # Initialize ef: [Option Weight, Underlying Weight, Option Price, Underlying Price]
        self.num_options=100
        self.portfolio_value=self.num_options*option_price
        underlying_position=0 #Assume Starting with no underyling asset?
        self.state = [underlying_position,self.num_options,self.portfolio_value,option_price,underlying_price]

        return np.array(self.state)

    def step(self, action):
        # Apply the action to adjust the option weight
        hedge_adjustment = action[0]
        underlying_position=self.state[0]+hedge_adjustment
        self.state[0]=underlying_position
        # Advance to the next time step in the selected simulation path
        self.day_index += 1
        done = self.day_index >= len(self.simulations[self.sim_index])  # Check if episode is done

        # Get next day's underlying price and calculate option price
        if not done:
            underlying_price_next = self.simulations[self.sim_index][self.day_index]
            option_price_next = calculate_option_price(underlying_price_next)

            portfolio_value_next=(underlying_position*underlying_price_next+self.num_options*option_price_next)

            reward = portfolio_value_next-self.state[2] #self.calculate_reward(self.state, option_price_next, underlying_price_next)
            self.state = [underlying_position,self.num_options,portfolio_value_next,option_price_next,underlying_price_next]
        else:
            reward = 0  # No reward if episode is done

        return np.array(self.state), reward, done, {}

    def calculate_reward(self, portfolio_value_next, portfolio_value_t):#self, state, option_price_next, underlying_price_next):
        stability_reward=-abs(portfolio_value_next-portfolio_value_t)
        if portfolio_value_next>0:
            growth_reward=np.log(portfolio_value_next/portfolio_value_t)
        else:
            growth_reward=-np.inf
        return 0.5*stability_reward+0.5*growth_reward
        # Calculate stability reward
        #balance_reward=-abs(self.option_weight-0.5)
    # Calculate portfolio value
    #     portfolio_value_t = self.state[2] * self.option_weight + self.state[3] * self.underlying_weight
    #     portfolio_value_next = option_price_next * self.option_weight + underlying_price_next * self.underlying_weight

    # # Stability term (as before)
    #     delta_portfolio = portfolio_value_next - portfolio_value_t
    #     delta_underlying = underlying_price_next - self.state[3]
    #     if abs(delta_underlying) > 1e-6:
    #         stability_reward = -abs(delta_portfolio / delta_underlying)
    #     else:
    #         stability_reward = 0

    # # Return term based on log portfolio growth (Kelly criterion approximation)
    #     if portfolio_value_next > 0 and portfolio_value_t > 0:
    #         return_reward = np.log(portfolio_value_next / portfolio_value_t)
    #     else:
    #         return_reward = -np.inf  # Penalize if portfolio value is non-positive

    # # Balance reward as in the previous section
    #     balance_reward = -abs(self.option_weight - 0.5)

    # # Combine all terms with respective weights
    #     reward = stability_reward + 0.1 * balance_reward + 0.2 * return_reward
    #     return reward



In [33]:
import torch
import torch.nn as nn
import torch.optim as optim

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()
        self.l1 = nn.Linear(state_dim, 256)
        self.l2 = nn.Linear(256, 256)
        self.l3 = nn.Linear(256, action_dim)
        self.max_action = max_action

    def forward(self, state):
        x = torch.relu(self.l1(state))
        x = torch.relu(self.l2(x))
        action = self.max_action * torch.tanh(self.l3(x))
        return action

class Critic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Critic, self).__init__()
        self.l1 = nn.Linear(state_dim + action_dim, 256)
        self.l2 = nn.Linear(256, 256)
        self.l3 = nn.Linear(256, 1)

    def forward(self, state, action):
        x = torch.relu(self.l1(torch.cat([state, action], 1)))
        x = torch.relu(self.l2(x))
        return self.l3(x)


In [34]:
from collections import deque
import random

class ReplayBuffer:
    def __init__(self, max_size=100000):
        self.buffer = deque(maxlen=max_size)

    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        return np.array(states), np.array(actions), np.array(rewards), np.array(next_states), np.array(dones)

class DDPGAgent:
    def __init__(self, state_dim, action_dim, max_action):
        self.actor = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=1e-4)
        self.critic = Critic(state_dim, action_dim).to(device)
        self.critic_target = Critic(state_dim, action_dim).to(device)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=1e-3)
        self.critic_target.load_state_dict(self.critic.state_dict())

        self.replay_buffer = ReplayBuffer()
        self.max_action = max_action
        self.discount = 0.99
        self.tau = 0.005

    def select_action(self, state):
        state = torch.FloatTensor(state.reshape(1, -1)).to(device)
        return self.actor(state).cpu().data.numpy().flatten()

    def train(self, batch_size=64):
        states, actions, rewards, next_states, dones = self.replay_buffer.sample(batch_size)

        # Convert to torch tensors
        states = torch.FloatTensor(states).to(device)
        actions = torch.FloatTensor(actions).to(device)
        rewards = torch.FloatTensor(rewards).reshape(-1, 1).to(device)
        next_states = torch.FloatTensor(next_states).to(device)
        dones = torch.FloatTensor(dones).reshape(-1, 1).to(device)

        # Critic training
        with torch.no_grad():
            next_actions = self.actor_target(next_states)
            target_q = self.critic_target(next_states, next_actions)
            target_q = rewards + (1 - dones) * self.discount * target_q

        current_q = self.critic(states, actions)
        critic_loss = nn.MSELoss()(current_q, target_q)

        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # Actor training
        actor_loss = -self.critic(states, self.actor(states)).mean()
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # Update target networks
        for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
            target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)

        for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
            target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)


In [35]:
import os

def train_ddpg(env, agent, episodes=1000, batch_size=64, save_interval=100, save_path='./ddpg_model'):
    for episode in range(episodes):
        state = env.reset()
        episode_reward = 0
        done = False

        while not done:
            action = agent.select_action(state)
            next_state, reward, done, _ = env.step(action)
            agent.replay_buffer.add(state, action, reward, next_state, done)
            state = next_state
            episode_reward += reward

            if len(agent.replay_buffer.buffer) > batch_size:
                agent.train(batch_size)

        print(f"Episode {episode + 1}, Reward: {episode_reward}")

        # Save model at intervals
        if (episode + 1) % save_interval == 0:
            save_model(agent, save_path, episode + 1)

def save_model(agent, path, episode):
    if not os.path.exists(path):
        os.makedirs(path)
    torch.save(agent.actor.state_dict(), os.path.join(path, f'actor_{episode}.pth'))
    torch.save(agent.critic.state_dict(), os.path.join(path, f'critic_{episode}.pth'))


In [None]:
import torch
import os
from google.colab import files
# Set device for computation (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = DeltaHedgeEnv(simulations)

# Define state and action dimensions based on the environment's spaces
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = env.action_space.high[0]

# Instantiate the DDPG agent
agent = DDPGAgent(state_dim, action_dim, max_action)

# Filepath for saving and downloading checkpoints
checkpoint_path = '/content/ddpg_checkpoint.pth'
final_model_path = '/content/ddpg_final_model.pth'

# Function to save and download the model checkpoint
def save_checkpoint(agent, episode, filepath=checkpoint_path):
    torch.save({
        'actor_state_dict': agent.actor.state_dict(),
        'critic_state_dict': agent.critic.state_dict(),
        'target_actor_state_dict': agent.actor_target.state_dict(),
        'target_critic_state_dict': agent.critic_target.state_dict(),
        'optimizer_actor_state_dict': agent.actor_optimizer.state_dict(),
        'optimizer_critic_state_dict': agent.critic_optimizer.state_dict(),
        'episode': episode,
    }, filepath)
    print(f"Checkpoint saved at episode {episode}.")
    # Download checkpoint file
    files.download(filepath)
    print("Checkpoint downloaded.")

# Function to load the model checkpoint if it exists
def load_checkpoint(agent, filepath=checkpoint_path):
    if os.path.exists(filepath):
        checkpoint = torch.load(filepath)
        agent.actor.load_state_dict(checkpoint['actor_state_dict'])
        agent.critic.load_state_dict(checkpoint['critic_state_dict'])
        agent.target_actor.load_state_dict(checkpoint['actor_target_state_dict'])
        agent.target_critic.load_state_dict(checkpoint['critic_target_state_dict'])
        agent.actor_optimizer.load_state_dict(checkpoint['optimizer_actor_state_dict'])
        agent.critic_optimizer.load_state_dict(checkpoint['optimizer_critic_state_dict'])
        episode = checkpoint['episode']
        print(f"Checkpoint loaded, resuming from episode {episode}")
        return episode
    else:
        print("No checkpoint found, starting from episode 0")
        return 0

# Load from checkpoint if available
start_episode = load_checkpoint(agent)

# Training loop with periodic checkpoint saving and interruption handling
try:
    for episode in range(start_episode, 1000):  # Run for the specified number of episodes
        train_ddpg(env, agent, episodes=1)  # Train for one episode

        # Save checkpoint every 100 episodes
        if (episode + 1) % 100 == 0:
            save_checkpoint(agent, episode + 1)

except KeyboardInterrupt:
    # Save checkpoint if training is interrupted
    print("Training interrupted. Saving current progress...")
    save_checkpoint(agent, episode)

finally:
    # Save the final model at the end of training and download it
    torch.save(agent.actor.state_dict(), final_model_path)
    print(f"Final model saved at {final_model_path}")
    files.download(final_model_path)
    print("Final model downloaded.")


No checkpoint found, starting from episode 0
Episode 1, Reward: -7504.677837247262
Episode 1, Reward: -8419.243935016302
Episode 1, Reward: -3987.7548608606808
Episode 1, Reward: -9425.911062634012
Episode 1, Reward: -8656.202772107066
Episode 1, Reward: -8740.508576579876
Episode 1, Reward: -5282.507448661553
Episode 1, Reward: -8041.462699458167
Episode 1, Reward: -6815.183636082178
Episode 1, Reward: -9546.25015677069
Episode 1, Reward: -5452.451117881712
Episode 1, Reward: -8933.875518508767
Episode 1, Reward: -9607.632337850082
Episode 1, Reward: -9573.082571576891
Episode 1, Reward: -456.7306380369482
Episode 1, Reward: -9625.371758696536
Episode 1, Reward: -5282.507448661553
Episode 1, Reward: -1924.3263063489858
Episode 1, Reward: -9242.100411474486
Episode 1, Reward: -9625.371758696536
Episode 1, Reward: -9523.362735591138
Episode 1, Reward: -7900.337909496468
Episode 1, Reward: -8806.69759563691
Episode 1, Reward: -8388.413405043935
Episode 1, Reward: -5452.451117881712
Episo

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Checkpoint downloaded.
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -9568.339676272963
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -8753.739274280979
Episode 1, Reward: -9561.059338297171
Episode 1, Reward: -7172.06310620106
Episode 1, Reward: -9624.961650875688
Episode 1, Reward: -9551.360558140066
Episode 1, Reward: -9546.25015677069
Episode 1, Reward: -7504.677837247262
Episode 1, Reward: -9607.632337850082
Episode 1, Reward: -9621.949982704582
Episode 1, Reward: -9625.371758696536
Episode 1, Reward: -9473.400897957024
Episode 1, Reward: -5701.129927771363
Episode 1, Reward: -2179.731923034248
Episode 1, Reward: -8806.69759563691
Episode 1, Reward: -9553.265194014584
Episode 1, Reward: -9338.542391332276
Episode 1, Reward: -9315.850157550716
Episode 1, Reward: -9605.654061218764
Episode 1, Reward: -8419.243935016302
Episode 1, Reward: -8766.909289728528
Episode 1, Reward: -8231.446891001942
Episode 1, Reward: -9384.84882013982
Episode 1, Reward: -8947.063839

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Checkpoint downloaded.
Episode 1, Reward: -8504.188154609514
Episode 1, Reward: -9596.763848005843
Episode 1, Reward: -537.423802847064
Episode 1, Reward: -6665.575437670759
Episode 1, Reward: -8096.421567297562
Episode 1, Reward: -9511.768415837909
Episode 1, Reward: -9242.100411474486
Episode 1, Reward: -9473.400897957024
Episode 1, Reward: -9546.25015677069
Episode 1, Reward: -9568.339676272963
Episode 1, Reward: -9561.059338297171
Episode 1, Reward: -9391.361205013043
Episode 1, Reward: -9611.62919145043
Episode 1, Reward: -9539.956439176085
Episode 1, Reward: -9611.62919145043
Episode 1, Reward: -1924.3263063489858
Episode 1, Reward: -8231.446891001942
Episode 1, Reward: -8538.127410473477
Episode 1, Reward: -8799.996746136938
Episode 1, Reward: -9338.0961993151
Episode 1, Reward: 2935.088773348659
Episode 1, Reward: -8740.508576579876
Episode 1, Reward: -9185.319257319006
Episode 1, Reward: -8947.063839563529
Episode 1, Reward: -537.423802847064
Episode 1, Reward: -5452.451117881

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Checkpoint downloaded.
Episode 1, Reward: -9624.961650875688
Episode 1, Reward: -8536.731747156358
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -9597.597171789705
Episode 1, Reward: -7504.677837247262
Episode 1, Reward: -456.7306380369482
Episode 1, Reward: -8933.875518508767
Episode 1, Reward: -9546.25015677069
Episode 1, Reward: -9625.613829679169
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -6182.184462287189
Episode 1, Reward: -9625.991083594125
Episode 1, Reward: -9573.082571576891
Episode 1, Reward: -8041.462699458167
Episode 1, Reward: -5701.129927771363
Episode 1, Reward: -456.7306380369482
Episode 1, Reward: -8766.909289728528
Episode 1, Reward: -8096.421567297562
Episode 1, Reward: -9185.319257319006
Episode 1, Reward: -8022.824932526093
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -1924.3263063489858
Episode 1, Reward: -8349.985985176643
Episode 1, Reward: -8323.255305307826
Episode 1, Reward: -8740.508576579876
Episode 1, Reward: -9566.45

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Checkpoint downloaded.
Episode 1, Reward: -7719.934177210235
Episode 1, Reward: -9596.763848005843
Episode 1, Reward: -9624.961650875688
Episode 1, Reward: -9212.215779347436
Episode 1, Reward: -9511.768415837909
Episode 1, Reward: -9566.451891670606
Episode 1, Reward: -9052.888382956215
Episode 1, Reward: -9338.542391332276
Episode 1, Reward: -8880.237066764861
Episode 1, Reward: -456.7306380369482
Episode 1, Reward: -9127.565227367097
Episode 1, Reward: -9425.911062634012
Episode 1, Reward: -9338.0961993151
Episode 1, Reward: -7172.655785126776
Episode 1, Reward: -8933.875518508767
Episode 1, Reward: -5968.889307365417
Episode 1, Reward: -9625.613829679169
Episode 1, Reward: -9625.371758696536
Episode 1, Reward: -9391.361205013043
Episode 1, Reward: -7985.530830538722
Episode 1, Reward: -9341.443626842422
Episode 1, Reward: -6182.184462287189
Episode 1, Reward: 812.6623583184264
Episode 1, Reward: -5968.889307365417
Episode 1, Reward: -9425.911062634012
Episode 1, Reward: -9597.61704

In [None]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Parameters for synthetic data generation
num_days = 100  # Number of days in the simulation
initial_underlying_price = 100  # Starting price of the underlying asset
volatility = 0.02  # Daily volatility for underlying asset
option_sensitivity = 0.5  # Sensitivity of option price to changes in underlying
initial_option_price = 10  # Starting price of the option

# Generate synthetic underlying prices with random daily changes
underlying_prices = [initial_underlying_price]
for i in range(1, num_days):
    daily_return = np.random.normal(0, volatility)
    new_price = underlying_prices[-1] * (1 + daily_return)
    underlying_prices.append(new_price)

# Generate synthetic option prices with some correlation to underlying prices
option_prices = [initial_option_price]
for i in range(1, num_days):
    # Calculate option price based on the underlying with added random noise
    option_price = option_prices[-1] * (1 + option_sensitivity * (underlying_prices[i] - underlying_prices[i - 1]) / underlying_prices[i - 1])
    option_price += np.random.normal(0, 0.5)  # Adding some noise
    option_prices.append(option_price)

# Combine into a DataFrame
synthetic_data = pd.DataFrame({
    'Day': np.arange(num_days),
    'Underlying_Price': underlying_prices,
    'Option_Price': option_prices
})

# Display the first few rows to check the synthetic dataset
print(synthetic_data.head())

# Save to CSV for later use
synthetic_data.to_csv('synthetic_test_data.csv', index=False)


In [None]:
import torch
import numpy as np

# Load the trained model weights from the final saved model file
def load_final_model(agent, filepath=final_model_path):
    if os.path.exists(filepath):
        agent.actor.load_state_dict(torch.load(filepath))
        print(f"Final model loaded from {filepath}")
    else:
        print("No saved model found.")

# Testing function to run the model on a test dataset
def test_ddpg(env, agent, test_simulations, num_episodes=10):
    agent.actor.eval()  # Set the model to evaluation mode
    rewards = []

    for episode in range(num_episodes):
        state = env.reset()  # Reset environment for each test episode
        done = False
        total_reward = 0

        while not done:
            # Convert state to tensor and predict action
            state_tensor = torch.FloatTensor(state).to(device)
            with torch.no_grad():
                action = agent.actor(state_tensor).cpu().numpy()

            # Step in the environment using the selected action
            next_state, reward, done, _ = env.step(action)
            total_reward += reward

            # Update state
            state = next_state

        rewards.append(total_reward)
        print(f"Episode {episode + 1}: Total Reward = {total_reward}")

    # Summary of test results
    avg_reward = np.mean(rewards)
    print(f"Average Reward over {num_episodes} test episodes: {avg_reward}")
    return avg_reward

# Load the final model
load_final_model(agent)

# Define or load your test simulations data
# Here, `test_simulations` should be defined similar to `simulations` but used for testing
test_simulations = synthetic_data[['Underlying_Price', 'Option_Price']].values
  # Replace with actual test data

# Create a test environment with the test simulations
test_env = DeltaHedgeEnv(test_simulations)

# Run the test on the loaded model
average_test_reward = test_ddpg(test_env, agent, test_simulations, num_episodes=10)
print(f"Average Test Reward: {average_test_reward}")


In [None]:
"""import pandas as pd

# Load historical price data for the underlying asset
underlying_data = pd.read_csv('underlying_prices.csv', parse_dates=['Date'])

# Load historical options data
options_data = pd.read_csv('options_prices.csv', parse_dates=['Date'])

# Merge datasets on the Date column
merged_data = pd.merge(underlying_data, options_data, on='Date', how='inner')

# Select relevant columns
# Assume 'Underlying_Price' and 'Option_Price' are columns in the merged dataset
data = merged_data[['Date', 'Underlying_Price', 'Option_Price']]

# Save the prepared dataset
data.to_csv('prepared_test_data.csv', index=False)
"""