In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import sympy as sp

In [200]:
# The times used for training (those in the loss function)

t_i, T_i = 0, 3

In [201]:
#Import the Underlying price dataset 
csv_file = 'Security_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Remove the first column and extract the first 80000 rows for train
x_train = df.iloc[:28800, 1:]

y_train = x_train.iloc[:, t_i].to_numpy()

# Print the first few rows of the DataFrame
print(x_train.head())

      0     1     2     3     4     5     6     7     8     9  ...  42  43  \
0   598   606   619   636   649   655   648   644   652   638  ...   0   0   
1   681   685   694   696   704     0     0     0     0     0  ...   0   0   
2   704   707   700   724   731   743   755   745   740   731  ...   0   0   
3  1101  1108  1101  1119  1130  1094  1104  1118  1119  1092  ...   0   0   
4  1107  1132  1148  1166  1174  1164  1125  1081  1084  1098  ...   0   0   

   44  45  46  47  48  49  50  51  
0   0   0   0   0   0   0   0   0  
1   0   0   0   0   0   0   0   0  
2   0   0   0   0   0   0   0   0  
3   0   0   0   0   0   0   0   0  
4   0   0   0   0   0   0   0   0  

[5 rows x 52 columns]


In [202]:
#Import the Underlying price dataset 
csv_file = 'Strike_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Remove the first column and extract the first 80000 rows for train
Strike_train = df.iloc[:28800, 1:]

y_train = np.vstack([y_train, Strike_train.iloc[:, t_i].to_numpy()])       #This will be used to compute the intrinsic value of the option

# Print the first few rows of the DataFrame
print(Strike_train.head())

      0     1     2     3     4     5     6     7     8     9  ...  42  43  \
0   600   600   600   600   600   600   600   600   600   600  ...   0   0   
1   630   630   630   630   630     0     0     0     0     0  ...   0   0   
2   620   620   620   620   620   620   620   620   620   620  ...   0   0   
3  1005  1005  1005  1005  1005  1005  1005  1005  1005  1005  ...   0   0   
4   850   850   850   850   850   850   850   850   850   850  ...   0   0   

   44  45  46  47  48  49  50  51  
0   0   0   0   0   0   0   0   0  
1   0   0   0   0   0   0   0   0  
2   0   0   0   0   0   0   0   0  
3   0   0   0   0   0   0   0   0  
4   0   0   0   0   0   0   0   0  

[5 rows x 52 columns]


In [203]:
#Import the Price dataset 
csv_file = 'Price_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Remove the first column
Price_train = df.iloc[:28800, 1:]

y_train = np.vstack([y_train, Price_train.iloc[:, t_i].to_numpy()])
y_train = y_train.T

# Print the first few rows of the DataFrame
print(Price_train.head())

     0    1    2    3    4    5    6    7    8    9  ...  42  43  44  45  46  \
0   14   16   25   38   52   56   51   44   51   37  ...   0   0   0   0   0   
1   54   57   65   66   74    0    0    0    0    0  ...   0   0   0   0   0   
2   89   92   84  109  113  125  135  127  119  111  ...   0   0   0   0   0   
3  175  182  178  189  194  168  176  187  186  166  ...   0   0   0   0   0   
4  267  290  300  318  327  316  274  234  237  249  ...   0   0   0   0   0   

   47  48  49  50  51  
0   0   0   0   0   0  
1   0   0   0   0   0  
2   0   0   0   0   0  
3   0   0   0   0   0  
4   0   0   0   0   0  

[5 rows x 52 columns]


In [204]:
y_train

array([[ 598,  600,   14],
       [ 681,  630,   54],
       [ 704,  620,   89],
       ...,
       [1325, 1430,    5],
       [1408,  400, 1000],
       [1391, 1675,    3]], dtype=int64)

In [472]:
#Define the loss function, then it will be necessary:
#Simulate the GBM trajectories.
#Simulate the Gaussian variables.

mini_batch = 64
TSlenght = 52

frequencies = np.arange(1,30,1)  #REMEMBER TO THEN ADD THE COSINES FREQUENCIES


#FIRST IMPLEMENTATION WITH NO k, FOR EACH t_i WE HAVE ONLY ONE T_i UP TO NOW, ALSO WE ARE TRYING WITH ONLY ONE TIME

times = [t_i]             #the t_i
maturities = [T_i]        #the T_i

'''
def quad_var_calc(freq, eval_time, horizon):
    # Function that evaluates the integral of sin^2(...), that is our single frequency variance
    result = eval_time/2 - (horizon*np.sin((4*freq*np.pi*eval_time)/horizon))/(8*freq*np.pi)

    return result
'''

def quad_var_calc(weights, t, T, frequencies):
    num_steps = 100  # Number of steps for numerical integration
    delta_s = t / num_steps  # Step size

    N = len(frequencies)

    integral_sum = tf.constant(0.0, dtype=tf.float32)

    for i in range(1, num_steps + 1):
        s = i * delta_s

        # Calculate the term in the sum for the given s
        term = tf.reduce_sum(
            tf.multiply(weights, tf.sin((2 * np.pi / T) * tf.range(1, N+1, dtype=tf.float32) * s))
        )

        integral_sum += term**2 * delta_s

    integral_approx = integral_sum

    return integral_approx


def GBM(mu = 0.05, sigma = 0.2, n = 50, dt = 0.001, x0 = 100, batch_size = 64):

    #np.random.seed(1)

    result = tf.TensorArray(dtype = tf.float32, size = batch_size)

    x0 = tf.cast(x0, tf.float32)

    for i in range(batch_size):
        x = np.exp(
            (mu - sigma ** 2 / 2) * dt
            + sigma * np.random.normal(0, np.sqrt(dt), size=(1, n)).T
        )
        x = np.vstack([1, x])
        #x = np.concatenate(x, axis=0)
        partial = x.cumprod(axis=0) 
        partial = x0[i] * partial[-1]     #return only last point of the GBM
        result = result.write(i, partial)
    #tf.print(result)

    result = result.stack()

    return result

'''
def Martingale(frequencies, weights, eval_time=1, horizon=10, batch_size=64):

    # Calculate the integral of the variance function m(t) in order to obtain the variance
    quad_var = tf.zeros([batch_size, len(frequencies)])

    # We calculate separately the variances and multiply them by the weights tensor
    for i in range(len(frequencies)):
        for j in range(batch_size):
            quad_var = tf.tensor_scatter_nd_update(quad_var, [[j, i]], [quad_var_calc(frequencies[i], eval_time, horizon)])

    variance = tf.multiply(weights, quad_var)

    # Define a Normal distribution with TensorFlow
    random_value = tf.random.normal([batch_size, len(frequencies)], mean=0.0, stddev=tf.sqrt(variance), dtype=tf.dtypes.float32)

    return random_value
'''

def Martingale(frequencies, weights, eval_time=1, horizon=10, batch_size=64):
    # Calculate the integral of the variance function m(t) in order to obtain the variance
    variances = []
    
    for j in range(batch_size):
        variance_j = quad_var_calc(weights[j], eval_time, horizon, frequencies)
        variances.append(variance_j)
    
    variance = tf.stack(variances)
    
    # Define a Normal distribution with TensorFlow
    random_value = tf.random.normal([batch_size], mean=0.0, stddev=tf.sqrt(variance), dtype=tf.float32)

    return random_value


class Q_Loss(tf.keras.losses.Loss):
    def __init__(self, times, maturity, lambd = 0, name="Q_loss", **kwargs):
        super().__init__(name=name, **kwargs)
        self.times = times
        self.maturity = maturity
        self.lamb = lambd

    def call(self, y_true, y_pred):
        #These are arrays of lenght = self.times and size = mini_batch with all the x_i, strike_i and g_i needed
        ind = tf.constant([0])
        x = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        ind = tf.constant([1])
        strike = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        
        ind = tf.constant([2])
        g = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        # convert them to float64
        x = tf.cast(x, tf.dtypes.float64)
        strike = tf.cast(strike, tf.dtypes.float64)
        g = tf.cast(g, tf.dtypes.float64)

        # Reshape from (64,1) to (64,)
        strike = tf.reshape(strike, (mini_batch,))


        #for i in range(len(self.times)):       #We'll have to update in order to consider more t_i than just one

        # Compute the underlying quantity through GBM

        x_star = tf.math.log(GBM(mu = 0.05, sigma = 0.2, n = self.maturity[0], dt = 0.001, x0 = x, batch_size = mini_batch))
        x_star = tf.cast(x_star, tf.dtypes.float64)

        # In order to have [0,1,2,...] instead of [[0],[1],[2],...]
        x_star = tf.squeeze(x_star)
        g = tf.squeeze(g)

        # Compute the martingale
        
        mart = Martingale(frequencies, y_pred, self.maturity[0], TSlenght, mini_batch)
        #mart = tf.math.reduce_sum(mart,1)       #Sum all the frequencies together
        mart = tf.cast(mart, tf.dtypes.float64)

        # We try as G_i,k the option intrinsic value
        loss = tf.square(tf.math.maximum(tf.exp(x_star + mart)-strike, tf.constant([0], dtype=tf.float64)) - g)     #+ self.lambd*Martingale(variance = y_pred)  # we'll have to add the regularization term
        #tf.print(tf.math.maximum(tf.exp(x_star + mart)-strike, tf.constant([0], dtype=tf.float64)) - g)
        return tf.math.reduce_mean(loss)

    def get_config(self):
        config = {
            'times': self.times,
            'maturity': self.maturity,
            'lambda': self.lambd
        }
        base_config = super().get_config()
        return {**base_config, **config}

In [473]:
#Define the model, there will be 3 steps:
#1) Only Layer of ReLU
#2) Layer of ReLU + sin
#3) Try the fancy activation functions

def custom_activation(x):
    return tf.minimum(tf.maximum(x, -0.8), 0.8)

# Create a simple model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='sigmoid', input_shape=(TSlenght,)),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.Dense(32, activation='sigmoid'),
    tf.keras.layers.Dense(len(frequencies), activation = custom_activation)  # Softmax activation for probability distribution
])

In [474]:
#Train the model on the dataset and test it.

loss_fn = Q_Loss(times, maturities)

# Compile the model with our personalized loss function

model.compile(optimizer='adam', loss=loss_fn)

# Train the model with our personalized loss function
model.fit(x_train, y_train, epochs=30, batch_size=mini_batch)

Epoch 1/30
 88/450 [====>.........................] - ETA: 4:04 - loss: 866559.0625

In [None]:
# Code for making a prediction

predictions = model.predict(x_train)

tensor_pred = tf.convert_to_tensor(predictions[0])
tensor_pred = tf.reshape(tensor_pred, (1,len(frequencies)))
tensor_pred



<tf.Tensor: shape=(1, 29), dtype=float32, numpy=
array([[ 0.8       , -0.49280283,  0.4969293 ,  0.8       , -0.3966284 ,
         0.60507387, -0.8       , -0.33343518, -0.03114109, -0.12552714,
        -0.7483919 ,  0.8       , -0.18543218,  0.8       , -0.06704858,
         0.0066129 , -0.09024712, -0.08749251,  0.15876706, -0.02691003,
        -0.8       , -0.22111495,  0.43936592,  0.39044264,  0.45710498,
        -0.38128135, -0.1811505 , -0.12903973,  0.13731556]],
      dtype=float32)>

In [None]:
# Code for testing the model

average, average_mart = 0, 0

x_0 = tf.convert_to_tensor([x_train.iloc[0,0]], dtype = tf.float64)

for i in range(1000):

    # delta between predicted underlying with GBM and actual value

    x_star = tf.math.log(GBM(mu = 0.05, sigma = 0.02, n = 3, dt = 0.001, x0 = x_0, batch_size = 1))

    average += (np.exp(x_star[0].numpy())- x_train.iloc[0,3])**2

    mart = Martingale(frequencies, tensor_pred, 3, TSlenght, 1)

    # delta between predicted underlying with GBM + martingale and actual value

    average_mart += (np.exp(x_star[0].numpy() + mart.numpy()) - x_train.iloc[0,3])**2

average = average/1000
average_mart = average_mart/1000
print(average)
print(average_mart)

[1438.0201]
[1459.542]
