In [153]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import sympy as sp

In [106]:
#Import the Underlying price dataset 
csv_file = 'Security_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Remove the first column and extract the first 80000 rows for train
x_train = df.iloc[:80000, 1:]

y_train = x_train.iloc[:, 1].to_numpy()

# Print the first few rows of the DataFrame
print(x_train.head())

     0    1    2    3    4    5    6    7    8    9  ...  86  87  88  89  90  \
0  598  606  619  636  649  655  648  644  652  638  ...   0   0   0   0   0   
1  655  648  644  652  638    0    0    0    0    0  ...   0   0   0   0   0   
2  649  648  655  633  641  650  654  644  665  678  ...   0   0   0   0   0   
3  672  656  634  626  639  664  662  665  664  655  ...   0   0   0   0   0   
4  681  685  694  696  704    0    0    0    0    0  ...   0   0   0   0   0   

   91  92  93  94  95  
0   0   0   0   0   0  
1   0   0   0   0   0  
2   0   0   0   0   0  
3   0   0   0   0   0  
4   0   0   0   0   0  

[5 rows x 96 columns]


In [107]:
#Import the Price dataset 
csv_file = 'Price_adj.csv'

#Use pandas to read the CSV file into a DataFrame
df = pd.read_csv(csv_file)

# Remove the first column
Price_train = df.iloc[:80000, 1:]

y_train = np.vstack([y_train, Price_train.iloc[:, 1].to_numpy()])

# Print the first few rows of the DataFrame
print(Price.head())

    0   1   2   3   4   5   6   7   8   9  ...  86  87  88  89  90  91  92  \
0  14  16  25  38  52  56  51  44  51  37  ...   0   0   0   0   0   0   0   
1   9   9  14   8  17   0   0   0   0   0  ...   0   0   0   0   0   0   0   
2   9  10   8  11   9   8   8   9   6   5  ...   0   0   0   0   0   0   0   
3  37  49  71  78  67  44  46  44  44  53  ...   0   0   0   0   0   0   0   
4  54  57  65  66  74   0   0   0   0   0  ...   0   0   0   0   0   0   0   

   93  94  95  
0   0   0   0  
1   0   0   0  
2   0   0   0  
3   0   0   0  
4   0   0   0  

[5 rows x 96 columns]


In [556]:
#Definire la loss function, quindi sarà necessario:
#Simulare le traiettorie del GBM
#Simulare le variabili Gaussiane

frequencies = [1,2,3,4,5]   #REMEMBER TO THEN ADD THE COSINES FREQUENCIES


#FIRST IMPLEMENTATION WITH NO k, FOR EACH t_i WE HAVE ONLY ONE T_i UP TO NOW, ALSO WE ARE TRYING WITH ONLY ONE TIME

times = [1]             #the t_i
maturities = [3]        #the T_i


def quad_var_calc(freq, eval_time, horizon):
    # Function that evaluates the integral of sin^2(...), that is our single frequency variance
    result = eval_time/2 - (horizon*np.sin((4*freq*np.pi*eval_time)/horizon))/(8*freq*np.pi)

    return result


def GBM(mu = 1, sigma = 1, n = 50, dt = 0.01, x0 = 100):

    #np.random.seed(1)

    result = np.zeros([32])
    for i in range(32):
        x = np.exp(
            (mu - sigma ** 2 / 2) * dt
            + sigma * np.random.normal(0, np.sqrt(dt), size=(1, n)).T
        )
        x = np.vstack([1, x])
        x = np.concatenate(x, axis=0)
        partial = x0[i].numpy() * x.cumprod(axis=0)         #we'll have to fix the numpy conversion
        result[i] = partial[-1]     #return only last point of the GBM

    result = tf.convert_to_tensor(result)
    
    return result

def Martingale(frequencies, weights, eval_time = 1, horizon = 10):

    #Calculate the integral of the variance function m(t) in order to obtain the variance
    variance = 0
    quad_var = np.zeros(len(frequencies)*32).reshape(32,len(frequencies))

    #we calculate separately the variancies and multiply them by the weights tensor, then we'll sum the frequencies (in the Q_loss code)
    for i in range(len(frequencies)):
        for j in range(32):
            quad_var[j][i] = quad_var_calc(frequencies[i], eval_time, horizon)


    variance = tf.keras.layers.Multiply()([weights,quad_var.reshape(32, len(frequencies))])
    #tf.print(variance)

    # Define a Normal distribution with tensorflow
    random_value = tf.random.normal([32,len(frequencies)], mean=0.0, stddev=tf.sqrt(variance), dtype=tf.dtypes.float32)
    #tf.print(random_value)
    return random_value


class Q_Loss(tf.keras.losses.Loss):
    def __init__(self, times, maturity, lambd = 0, name="Q_loss", **kwargs):
        super().__init__(name=name, **kwargs)
        self.times = times
        self.maturity = maturity
        self.lamb = lambd

    def call(self, y_true, y_pred):
        #These are arrays of lenght = self.times with all the x_i and g_i needed
        ind = tf.constant([0])
        x = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        ind = tf.constant([1])
        g = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_true), ind))

        # convert them to float64
        x = tf.cast(x, tf.dtypes.float64)
        g = tf.cast(g, tf.dtypes.float64)

        # define the arrays of all the i underlying values
        x_star = np.zeros(len(self.times))
        mart = np.zeros(len(self.times))

        #for i in range(len(self.times)):       #We'll have to update in order to consider more t_i than just one

        # compute the underlying quantity
        x_star = tf.math.log(GBM(mu = 1, sigma = 1, n = self.maturity[0], dt = 0.01, x0 = x))
        x_star = tf.cast(x_star, tf.dtypes.float64)
        
        mart = Martingale(frequencies, y_pred, self.maturity[0], 96)
        mart = tf.math.reduce_sum(mart,1)
        mart = tf.cast(mart, tf.dtypes.float64)
        
        loss = tf.square(tf.exp(x_star + mart) - g)     #+ self.lambd*Martingale(variance = y_pred)  # we'll have to add the regularization term
        #tf.print(tf.math.reduce_mean(loss))
        return tf.math.reduce_mean(loss)

    def get_config(self):
        config = {
            'times': self.times,
            'maturity': self.maturity,
            'lambda': self.lambd
        }
        base_config = super().get_config()
        return {**base_config, **config}

In [557]:
#Definire il modello, ci saranno 3 steps:
#1) Solo Layer di ReLU
#2) Layer di ReLU + sin
#3) Provare le fancy activation functions

# Create a simple model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='sigmoid', input_shape=(96,)),
    tf.keras.layers.Dense(512, activation='sigmoid'),
    tf.keras.layers.Dense(512, activation='sigmoid'),
    tf.keras.layers.Dense(256, activation='sigmoid'),
    tf.keras.layers.Dense(5, activation='softmax')  # Softmax activation for probability distribution
])

In [None]:
#Trainare il modello sul dataset e testarlo

loss_fn = Q_Loss(times, maturities)

# Compile the model with your personalized loss function
# when we'll have a working loss function we'll have to remove the run_eagerly and rewrite properly the GBM function
# since it is used in order to convert x0 to numpy and then convert it back, but we can do this all only with tensors
# speeding up a lot the calculations
model.compile(optimizer='adam', loss=loss_fn, run_eagerly=True)

# Train the model with your personalized loss function
model.fit(x_train, y_train, epochs=10, batch_size=32)