In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import time

In [67]:
r=0
d=2
s_0=100
delta=0.05
rho=0.6
sigma=0.2
mean=np.zeros(shape=(d,))
cov=np.ones(shape=(d,d))*rho
for i in range(d):
    cov[i,i]=1
F=100
B=70
K=100
c=7/12
T=1
T_i=1/2
N=12
N_i=6
lr=5e-4
delta_t=T/N
sqrt_delta_t=np.sqrt(delta_t)
batch_size=1024
M=10000

In [98]:
def BSmodel(batch_size):
    S=np.ones(shape=(batch_size,d,N+1))*s_0
    for i in range(0,N):
        brown=np.random.multivariate_normal(mean=mean,cov=cov,size=(batch_size,))*sqrt_delta_t
        S[:,:,i+1]=S[:,:,i]*np.exp((r-0.5*sigma**2)*delta_t+sigma*brown)
    for i in range(N_i,N+1):
        S[:,:,i]*=(1-delta)
    return S

In [99]:
def makesample(S):
    bs=S.shape[0]
    X=np.zeros(shape=(bs,d+2,N+1))
    X[:,:d,:]=S
    for i in range(1,N+1):
        X[:,d,i]=X[:,d,i-1]+(np.amin(S[:,:,i],axis=1)<B).astype(np.int)
    X[:,d,:]=(X[:,d,:]>0).astype(np.int)
    X[:,d+1,0]=F
    for i in range(1,N+1):
        X[:,d+1,i]=np.exp(-1*r*delta_t)*X[:,d+1,i-1]+np.exp(-1*r*delta_t)*c
    price=np.amin(S[:,:,N],axis=1)
    X[:,d+1,N]+=np.exp(-1*r*T)*(X[:,d,N]==1)*(price<K)*(price-F)
    return X

In [100]:
def Bermudian(x):
    return torch.nn.functional.relu(torch.max(x,dim=1)[0]-K)

In [101]:
class one_time_net(torch.nn.Module):
    def __init__(self,d):
        super(one_time_net,self).__init__()
        self.n_neuron=[d,d+40,d+40,1]
        self.norm=torch.nn.BatchNorm1d(self.n_neuron[0])
        #self.layer1=self._one_layer(self.n_neuron[0],self.n_neuron[1],torch.nn.ReLU())
        #self.layer2=self._one_layer(self.n_neuron[1],self.n_neuron[2],torch.nn.ReLU())
        #self.layer3=self._one_layer(self.n_neuron[2],self.n_neuron[3],None)
        self.layers=torch.nn.ModuleList([self._one_layer(self.n_neuron[0],self.n_neuron[1],torch.nn.ReLU()),self._one_layer(self.n_neuron[1],self.n_neuron[2],torch.nn.ReLU()),
                                        self._one_layer(self.n_neuron[2],self.n_neuron[3],torch.nn.Sigmoid())])
    def _one_layer(self,input_dim,output_dim,activation_fn=torch.nn.ReLU()):
        one_layer=torch.nn.Sequential()
        one_layer.add_module('Linear',torch.nn.Linear(input_dim,output_dim))
        one_layer.add_module('Norm',torch.nn.BatchNorm1d(output_dim))
        if activation_fn != None:
            one_layer.add_module('activation',activation_fn)
        return one_layer
    def forward(self,x):
        norm=self.norm(x)
        l1=self.layers[0](x)
        l2=self.layers[1](l1)
        out=self.layers[2](l2)
        return out      

In [102]:
stopping_rule={}
for i in range(1,N):
    stopping_rule['t{}'.format(i)]=one_time_net(d+2)
optimizers={}
for i in range(1,N):
    optimizers['t{}'.format(i)]=torch.optim.Adam(stopping_rule['t{}'.format(i)].parameters(),lr=lr)
Y0=torch.nn.Parameter(torch.tensor(95,dtype=torch.float32))
Yoptimizer=torch.optim.Adam(list([Y0]),lr=lr)
for i in list(stopping_rule):
    for name,para in stopping_rule[i].named_parameters():
        if 'Linear' in name and 'weight' in name:
            torch.nn.init.xavier_normal_(para)

In [103]:
since=time.time()
for i in range(M):
    S=BSmodel(batch_size)
    X=makesample(S)
    X=torch.tensor(X,dtype=torch.float32,requires_grad=False)
    continue_value=X[:,-1,N]
    for k in reversed(range(1,N)):
        stopping_value=X[:,-1,k]
        f=stopping_rule['t{}'.format(k)](X[:,:,k])
        loss=(stopping_value*f.squeeze()+continue_value*(1-f.squeeze())).mean()
        optimizer=optimizers['t{}'.format(k)]
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        newF=stopping_rule['t{}'.format(k)](X[:,:,k])
        index=(newF>0.5).reshape((-1,))
        continue_value[index]=stopping_value[index]
    loss=(Y0-continue_value.mean()).pow(2)
    if (i+1)%500==0:
        print('Episode {} loss:{},Y0:{}'.format(i+1,loss.item(),Y0.item()))
        #print(loss.item())
        #print(Y0.item())
    Yoptimizer.zero_grad()
    loss.backward()
    Yoptimizer.step()
print('Training Finish. Use {}s'.format(time.time()-since))

Episode 500 loss:14.373245239257812,Y0:95.22394561767578
Episode 1000 loss:11.951170921325684,Y0:95.44950103759766
Episode 1500 loss:13.368833541870117,Y0:95.6738510131836
Episode 2000 loss:8.339753150939941,Y0:95.896484375
Episode 2500 loss:7.7689995765686035,Y0:96.11275482177734
Episode 3000 loss:8.199078559875488,Y0:96.32814025878906
Episode 3500 loss:6.265344619750977,Y0:96.53997802734375
Episode 4000 loss:4.849088191986084,Y0:96.75102996826172
Episode 4500 loss:4.56123161315918,Y0:96.9554443359375
Episode 5000 loss:2.4713234901428223,Y0:97.15675354003906
Episode 5500 loss:2.1380627155303955,Y0:97.35161590576172
Episode 6000 loss:1.975286602973938,Y0:97.54026794433594
Episode 6500 loss:1.4469773769378662,Y0:97.72142791748047
Episode 7000 loss:0.576478898525238,Y0:97.89400482177734
Episode 7500 loss:1.1899888515472412,Y0:98.0608139038086
Episode 8000 loss:0.08156228810548782,Y0:98.21061706542969
Episode 8500 loss:0.32949063181877136,Y0:98.3551254272461
Episode 9000 loss:0.0765222683

In [104]:
S=BSmodel(4096*100)
X=makesample(S)
X=torch.tensor(X,dtype=torch.float32,requires_grad=False)
continue_value=X[:,-1,N]
for k in reversed(range(1,N)):
    stopping_value=X[:,-1,k]
    newF=stopping_rule['t{}'.format(k)](X[:,:,k])
    index=(newF>0.5).reshape((-1,))
    continue_value[index]=stopping_value[index]

In [105]:
print(continue_value.mean().item())

98.8381118774414
