In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import sympy as sp

In [2]:
# The times used for training (those in the loss function)

t_i = [0,20,30]
T_i = [3,3,3]

In [3]:
# Linear interpolation for TS
num_elements = 13

def interpolate_dataframe(df, num_elements):
    new_df = pd.DataFrame()  # Create an empty DataFrame to store the interpolated values

    # Loop through the columns of the original DataFrame
    for col_idx in range(1, len(df.columns) - 1):
        col1 = df.iloc[:, col_idx]  # Get the first column
        col2 = df.iloc[:, col_idx + 1]  # Get the second column

        # Calculate the step size for interpolation
        step_size = 1 / (num_elements + 1)

        # Create an array of interpolated values between the two columns
        interpolated_values = [col1 + i * (col2 - col1) * step_size for i in range(1, num_elements + 1)]

        # Concatenate the original column and the interpolated values into a new DataFrame
        new_col = pd.concat([col1] + interpolated_values, axis=1)
        new_df = pd.concat([new_df, new_col], axis=1)

    return new_df

In [4]:
#Import the Underlying price dataset
csv_file = '/content/Neural_martingale/Security_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

#Interpolate the df
df_interp = interpolate_dataframe(df, num_elements)

# Remove the first column and extract the first 28800 rows and 65 columns for train
x_train = df_interp.iloc[:28800, :65]

y_train = x_train.iloc[:, t_i].to_numpy()

# Print the first few rows of the DataFrame
#print(x_train.head())

In [5]:
#Import the Underlying price dataset
csv_file = '/content/Neural_martingale/Strike_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

#Interpolate the df
df_interp = interpolate_dataframe(df, num_elements)

# Remove the first column and extract the first 28800 rows and 65 columns for train
Strike_train = df_interp.iloc[:28800, :65]

y_train = np.hstack([y_train, Strike_train.iloc[:, t_i].to_numpy()])       #This will be used to compute the intrinsic value of the option

# Print the first few rows of the DataFrame
#print(Strike_train.head())

In [6]:
#Import the Price dataset
csv_file = '/content/Neural_martingale/Price_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

#Interpolate the df
df_interp = interpolate_dataframe(df, num_elements)

# Remove the first column and extract the first 28800 rows and 65 columns for train
Price_train = df_interp.iloc[:28800, :65]


y_train = np.hstack([y_train, Price_train.iloc[:, t_i].to_numpy()])

# Print the first few rows of the DataFrame
#print(Price_train.head())

In [7]:
y_train

array([[ 598.        ,  611.57142857,  621.42857143, ...,   14.        ,
          19.85714286,   26.85714286],
       [ 681.        ,  688.85714286,  694.28571429, ...,   54.        ,
          60.42857143,   65.14285714],
       [ 704.        ,  704.        ,  703.42857143, ...,   89.        ,
          88.57142857,   87.57142857],
       ...,
       [1325.        , 1285.14285714, 1240.        , ...,    5.        ,
           1.71428571,    0.        ],
       [1408.        , 1404.28571429, 1388.42857143, ..., 1000.        ,
         998.57142857,  984.14285714],
       [1391.        , 1409.        , 1411.71428571, ...,    3.        ,
           3.42857143,    4.14285714]])

In [8]:
#Define the loss function, then it will be necessary:
#Simulate the GBM trajectories.
#Simulate the Gaussian variables.

mini_batch = 16
TSlenght = 65

frequencies = np.arange(10,30,1)  #REMEMBER TO THEN ADD THE COSINES FREQUENCIES


#FIRST IMPLEMENTATION WITH NO k, FOR EACH t_i WE HAVE ONLY ONE T_i UP TO NOW, ALSO WE ARE TRYING WITH ONLY ONE TIME

times = t_i             #the t_i
maturities = T_i        #the T_i


def quad_var_calc(weights, freq, t, T):
    # Function that evaluates the integral of sin^2(...) and 2sin(...)sin(...), that are our frequencies variances

    N = len(freq)

    covar = tf.TensorArray(dtype = tf.float64, size = N**2)

    for k in range(N):
        for j in range(N):
            if j < k:
                covar = covar.write(k*N+j, tf.cast(weights[j]*weights[k],tf.float64)*(T*(tf.sin((2*np.pi*t*(freq[j] - freq[k]))/T)/(freq[j] - freq[k]) - tf.sin((2*np.pi*t*(freq[j] + freq[k]))/T)/(freq[j] + freq[k])))/(4*np.pi))
            elif j==k:
                covar = covar.write(k*N+j, tf.cast((weights[j]**2),tf.float64)*(t/2 - (T*tf.sin((4*freq[j]*np.pi*t)/T))/(8*freq[j]*np.pi)))

    # Extract tensors from the covar tensor array
    covar_tensors = covar.stack()

    # Compute the sum of elements with k == j only once and the others 2 times
    result = 0.0

    for k in range(N):
        for j in range(N):
            if j < k:
                result += 2*covar_tensors[k*N+j]
            elif j==k:
                result += covar_tensors[k*N+j]

    return tf.cast(result, tf.float32)


def GBM(mu = 0.05, sigma = 0.02, n = 50, dt = 0.001, x0 = 100, batch_size = 64):

    #np.random.seed(1)

    result = tf.TensorArray(dtype = tf.float32, size = batch_size)

    x0 = tf.cast(x0, tf.float32)

    for i in range(batch_size):
        x = np.exp(
            (mu - sigma ** 2 / 2) * dt
            + sigma * np.random.normal(0, np.sqrt(dt), size=(1, n)).T
        )
        x = np.vstack([1, x])
        #x = np.concatenate(x, axis=0)
        partial = x.cumprod(axis=0)
        partial = x0[i] * partial[-1]     #return only last point of the GBM
        result = result.write(i, partial)
    #tf.print(result)

    result = result.stack()

    return result


def Martingale(frequencies, weights, eval_time=1, horizon=10, batch_size=64):
    # Calculate the integral of the variance function m(t) in order to obtain the variance
    variances = []

    for j in range(batch_size):
        variance_j = quad_var_calc(weights[j], frequencies, eval_time, horizon)
        variances.append(variance_j)

    variance = tf.stack(variances)

    # Define a Normal distribution with TensorFlow
    random_value = tf.random.normal([batch_size], mean=0.0, stddev=tf.sqrt(variance), dtype=tf.float32)

    return random_value


class Q_Loss(tf.keras.losses.Loss):
    def __init__(self, times, maturity, lambd = 0, name="Q_loss", **kwargs):
        super().__init__(name=name, **kwargs)
        self.times = times
        self.maturity = maturity
        self.lamb = lambd

    def call(self, y_true, y_pred):

        N = len(self.times)

        #These are arrays of lenght = self.times and size = mini_batch with all the x_i, strike_i and g_i needed
        ind = tf.constant(np.linspace(0,N-1,N, dtype=int))
        x = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        ind = tf.constant(np.linspace(N,2*N-1,N, dtype=int))
        strike = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        ind = tf.constant(np.linspace(2*N,3*N-1,N, dtype=int))
        g = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        # convert them to float64
        x = tf.cast(x, tf.dtypes.float64)
        strike = tf.cast(strike, tf.dtypes.float64)
        g = tf.cast(g, tf.dtypes.float64)

        #tf.print(strike)
        #print(tf.shape(strike))

        # Reshape from (64,1) to (64,)
        #strike = tf.reshape(strike, (mini_batch,))


        max_losses = []  # Create an array to store the maximum loss for each iteration


        for i in range(N):       #We'll have to update in order to consider more t_i than just one

            # Compute the underlying quantity through GBM

            x_star = tf.math.log(GBM(mu = 1.2, sigma = 0.02, n = self.maturity[i], dt = 0.001, x0 = x[:,i], batch_size = mini_batch))
            x_star = tf.cast(x_star, tf.dtypes.float64)

            # In order to have [0,1,2,...] instead of [[0],[1],[2],...]
            x_star = tf.squeeze(x_star)
            g = tf.squeeze(g)

            # Compute the martingale

            mart = Martingale(frequencies, y_pred, self.maturity[i], TSlenght, mini_batch)
            mart = tf.cast(mart, tf.dtypes.float64)

            # We try as G_i,k the option intrinsic value
            loss = tf.square(tf.math.maximum(tf.exp(x_star + mart)-strike[:,i], tf.constant([0], dtype=tf.float64)) - g[:,i])     #+ self.lambd*Martingale(variance = y_pred)  # we'll have to add the regularization term
            loss = tf.math.reduce_mean(loss)

            # Append the loss to the array of losses
            max_losses.append(loss)

        # Take the maximum of the loss array
        max_loss = tf.reduce_max(max_losses)

        return max_loss

    def get_config(self):
        config = {
            'times': self.times,
            'maturity': self.maturity,
            'lambda': self.lambd
        }
        base_config = super().get_config()
        return {**base_config, **config}

In [None]:
#Define the model, there will be 3 steps:
#1) Only Layer of ReLU
#2) Layer of ReLU + sin
#3) Try the fancy activation functions

def custom_activation(x):
    return tf.minimum(tf.maximum(x, -0.8), 0.8)

# Create a simple model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='sigmoid', input_shape=(TSlenght,)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64, activation='sigmoid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(32, activation='sigmoid'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(len(frequencies), activation = custom_activation)
])

In [None]:
#Train the model on the dataset and test it.

loss_fn = Q_Loss(times, maturities)

# Compile the model with our personalized loss function

model.compile(optimizer='adam', loss=loss_fn)

# Train the model with our personalized loss function
model.fit(x_train, y_train, epochs=150, batch_size=mini_batch)

In [None]:
# Code for making a prediction

predictions = model.predict(x_train)

tensor_pred = tf.convert_to_tensor(predictions[0])
tensor_pred = tf.reshape(tensor_pred, (1,len(frequencies)))
tensor_pred

In [None]:
# Code for testing the model

average, average_mart = 0, 0

t = 3
N = 1000

x_0 = tf.convert_to_tensor([x_train.iloc[0,0]], dtype = tf.float64)

# For visualizing the progresses
import tqdm
it = tqdm.tqdm(total = N)

for i in range(N):

    # delta between predicted underlying with GBM and actual value

    x_star = tf.math.log(GBM(mu = 1.2, sigma = 0.02, n = t, dt = 0.001, x0 = x_0, batch_size = 1))

    average += (np.exp(x_star[0].numpy())- x_train.iloc[0,t])**2

    mart = Martingale(frequencies, tensor_pred, t, TSlenght, 1)

    # delta between predicted underlying with GBM + martingale and actual value

    average_mart += (np.exp(x_star[0].numpy() + mart.numpy()) - x_train.iloc[0,t])**2

    it.update(1)
it.close()

average = average/N
average_mart = average_mart/N
print(average)
print(average_mart)

In [None]:
tf.print(x_0)
tf.print(GBM(mu = 1.2, sigma = 0.02, n = 3, dt = 0.001, x0 = x_0, batch_size = 1))
tf.print(Martingale(frequencies, tensor_pred, 3, TSlenght, 1))
print(x_train.iloc[0,3])

[598]
[[600.815125]]
[-0.355288118]
599.7142857142857


In [None]:
# Code for testing the model

average, average_mart = 0, 0

x_0 = tf.convert_to_tensor([x_train.iloc[0,0]], dtype = tf.float64)

for i in range(1000):

    # delta between predicted underlying with GBM and actual value

    x_star = tf.math.log(GBM(mu = 0.05, sigma = 0.02, n = 3, dt = 0.001, x0 = x_0, batch_size = 1))

    average += (np.exp(x_star[0].numpy())- x_train.iloc[0,3])**2

    mart = Martingale(frequencies, tensor_pred, 3, TSlenght, 1)

    # delta between predicted underlying with GBM + martingale and actual value

    average_mart += (np.exp(x_star[0].numpy() + mart.numpy()) - x_train.iloc[0,3])**2

average = average/1000
average_mart = average_mart/1000
print(average)
print(average_mart)


[1438.0952]
[1436.9066]
