<a href="https://colab.research.google.com/github/frtrigg5/A-new-signature-model/blob/Code/DatasetGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **TIME SERIES OBTAINED BY STOCHASTIC PROCESSES**

In [None]:
def MB_sample(begin,end,number): # samples brownian motion trajectories in the interval [begin,end]
  timesteps=np.linspace(begin,end,number)
  s=(end-begin)/(number-1)
  sqrtS=math.sqrt(s)  
  sample=np.zeros(shape=(number))
  sample[0]=math.sqrt(begin)*np.random.randn(1)
  for i in range(1,number):
    sample[i]=sample[i-1]+sqrtS*np.random.randn(1)   
  
  return sample,timesteps

In [None]:
#Geometric Brownian motion sampling procedure
# GBM: dX_t=mu X_t dt + sigma X_t dW_t
def GMB_sample(begin,end,InitCond, mu,sigma,number): 
  timesteps=np.linspace(begin,end,number)
  s=(end-begin)/(number-1)
  sqrtS=math.sqrt(s)
  const=mu-(sigma**2)/2
  sample=np.zeros(shape=(number))
  MB=np.zeros(shape=(number))
  sample[0]=InitCond
  MB[0]=math.sqrt(begin)*np.random.randn(1)
  for i in range(1,number):
    MB[i]=MB[i-1]+sqrtS*np.random.randn(1)
    sample[i]=InitCond*math.exp(const*(timesteps[i])+sigma*(MB[i]))

  return sample,timesteps

In [None]:
#sampling procedure from dX_t= (sqrt(1+X_t^2)+X_t/2) dt + sqrt(1+X_t^2) dW_t
def Sinh_sample(begin,end,InitCond,number):
  timesteps=np.linspace(begin,end,number)
  s=(end-begin)/(number-1)
  sqrtS=math.sqrt(s)
  sample=np.zeros(shape=(number))
  MB=np.zeros(shape=(number))
  sample[0]=InitCond
  constant=math.log(math.sqrt(1+InitCond**2)+InitCond)
  MB[0]=math.sqrt(begin)*np.random.randn(1)
  for i in range(1,number):
    MB[i]=MB[i-1]+sqrtS*np.random.randn(1)
    sample[i]=math.sinh(constant+timesteps[i]+MB[i])

  return sample,timesteps


In [None]:
#Ornstein-Uhlenbeck : dX_t=alpha(gamma-X_t)dt+beta dW_t  
def OU_sample(begin,end,InitCond,alpha,gamma,beta,number):
  timesteps=np.linspace(begin,end,number)
  s=(end-begin)/(number-1)
  sqrtS=math.sqrt(s)
  sample=np.zeros(shape=(number))
  MB=np.zeros(shape=(number))
  sample[0]=InitCond
  MB[0]=math.sqrt(begin)*np.random.randn(1)
  StochIntApprox=0
  for i in range(1,number):
    TimeVar1=timesteps[i]
    MB[i]=MB[i-1]+sqrtS*np.random.randn(1)
    if i>1:
       StochIntApprox=StochIntApprox+(math.exp(alpha*timesteps[i-1])-math.exp(alpha*timesteps[i-2]))*(MB[i]-MB[i-1])/(alpha*s) #This approximation comes from the theory of elementary processes
    sample[i]=InitCond*math.exp(-alpha*TimeVar1)+gamma*(1-math.exp(-alpha*TimeVar1))+beta*math.exp(-alpha*TimeVar1)*StochIntApprox

  return sample,timesteps

In order to sample from Fractional Brownian Motion we can use the package fbm

# **EXAMPLE OF DATASET: FBM PROBLEM**

In [None]:
import fbm

In [None]:
#construction of the dataset
d=1 #dimension of the timeseries
begin=0 #first timestep
end=1 #last timestep
number=100 #length of each timeseries
trShape,vlShape,testShape=1000,400,600
H=0.26 # Hurst exponent
f=fbm.FBM(number-1,H)

Known_times=np.linspace(begin,end,number) #istanti temporali noti
div=1 #quanti nuovi istanti temporali prendere tra due istanti noti
New_times=np.zeros(division*(number-1))
for i in range(0,(number-1)):
  New_times[(div*i):(div*(i+1))]=np.linspace(Known_times[i],Known_times[i+1],(div+2))[1:(1+div)]

L1=Known_times.size
L2=New_times.size

timestamps=np.concatenate((Known_times,New_times),axis=0)
#time series train
dataset_value=np.zeros(shape=[1000,number])
for i in range(0,500):
  dataset_value[i]=MB_sample(begin,end,number)[0]
  dataset_value[i+500]=f.fbm()

#time series validation
dataset_value2=np.zeros(shape=[400,number])
for i in range(0,200):
  dataset_value2[i]=MB_sample(begin,end,number)[0]
  dataset_value2[i+200]=f.fbm()

#time series test
dataset_value3=np.zeros(shape=[600,number])
for i in range(0,300):
  dataset_value3[i]=MB_sample(begin,end,number)[0]
  dataset_value3[i+300]=f.fbm()
 
# adding Known and unknown time stamps
time_data=np.zeros((trShape,L1+L2))
for i in range(0,trShape):
  time_data[i]=timestamps

time_data2=np.zeros((vlShape,L1+L2))
for i in range(0,vlShape):
  time_data2[i]=timestamps

time_data3=np.zeros((testShape,L1+L2))
for i in range(0,testShape):
  time_data3[i]=timestamps  

#full dataset train
dataset=np.concatenate((time_data,dataset_value),axis=-1) #full dataset
dataset=dataset.astype('float32')

#full dataset validation
dataset2=np.concatenate((time_data2,dataset_value2),axis=-1) #full dataset
dataset2=dataset2.astype('float32')

#full dataset test
dataset3=np.concatenate((time_data3,dataset_value3),axis=-1) #full dataset
dataset3=dataset3.astype('float32')

#label construction
y=np.zeros((trShape,1),dtype='uint8') #label o for Brownian Motion, 1 for FBM
y[500:]=np.ones((500,1),dtype='uint8') 

#label di validation
y2=np.zeros((vlShape,1),dtype='uint8')
y2[200:]=np.ones((200,1),dtype='uint8') 

#label di test
y3=np.zeros((testShape,1),dtype='uint8') 
y3[300:]=np.ones((300,1),dtype='uint8') 

In [None]:
batch=60
class MyDataset(torch.utils.data.Dataset):

  def __init__(self,labels,dataset):
    self.labels=labels
    self.dataset=dataset

  def __len__(self):
    return len(self.labels)
  
  def __getitem__(self,index):
    sample=self.dataset[index]
    label=self.labels[index]
    return sample, label
  
training_data=MyDataset(y,dataset)
train_dataloader=torch.utils.data.DataLoader(training_data,batch_size=batch,shuffle=True)
validation_data=MyDataset(y2,dataset2)
validation_dataloader=torch.utils.data.DataLoader(validation_data,batch_size=vlshape,shuffle=False)