## Data preparation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import StandardScaler
from scipy.stats import skew, kurtosis

# GLOBAL VARIABLES

# time horizon in years
T = 20  

# number of time steps
N = int(T * 252 * 7)  

# change remige's lenght
l_regime = int(0.5  * 252 * 7)

# time interval
dt = T / N

# GBM parameters
gbm_par = np.array(
    [[0.02, 0.2], #mu,sigma bull-regime
    [-0.02, 0.3]]) #mu,sigma bear-regime

# array of all the timesteps
timestep = np.linspace(0, T, N)

In [None]:
def data_par(h_1, h_2):
    '''
    Given the hyper parameters h_1 and h_2 it returns the number of sub-sequences M and the effective number of log-returns that
    are involved in the analysis N_prime.
    
    '''
    
    # check the number of possible sub sequences M
    i = 0
    # N - 2 (-1:from price to log-return and -1:becuase the last index is lenght of the array -1)
    while ((h_1 - h_2) * i + h_1) <= (N-2):
        i = i + 1

    # IMPORTANT parameters
    M = i 
    N_prime = (h_1 - h_2) * (M-1) + h_1 + 1
    
    return N_prime, M

h_1 = 35
h_2 = 28

N_prime, M = data_par(h_1, h_2)
t = timestep[: N_prime + 1]

print(f"price values not included in the analysis = {len(timestep) - len(t)}")

In [None]:
def generate_regimes(N_prime):
    '''
    It generates randomly 10 different time interval of the same same lenght.
    
    '''

    A = np.arange(0, N_prime+1)

    # Parametri delle sottosequenze
    num_subsequences = 10
    subseq_length = l_regime 

    # Set per memorizzare gli indici di partenza usati
    used_indices = set()

    # Funzione per generare un indice di partenza valido
    def generate_start_index(random_state=17):
        np.random.seed(random_state)
        while True:
            # Genera un indice di partenza casuale
            start_index = np.random.randint(0, len(A) - subseq_length - 1)
            # Controlla se l'indice di partenza e l'indice finale (con buffer di 1) sono validi
            if all((start_index + i) not in used_indices for i in range(subseq_length + 1)):
                for i in range(subseq_length + 1):
                    used_indices.add(start_index + i)
                return start_index

    # Generazione delle sottosequenze random non sovrapposte con almeno un elemento di distanza
    subsequences = []
    for _ in range(num_subsequences):
        start_index = generate_start_index()
        subsequences.append(A[start_index:start_index + subseq_length])

    subsequences = np.sort(np.array(subsequences), axis=0)
    
    # label for the log-returns
    B = np.zeros(N_prime)
    for sub in subsequences:
        B[sub[0]: sub[-1]] = 1    
    B = B.astype(int)

    # label for prices
    C = np.zeros(N_prime+1)
    for sub in subsequences:
        C[sub] = 1    
    C = C.astype(int)


    
    return subsequences, B, C

subsequences, theo_labels, labels_prices = generate_regimes(N_prime)

# plot of the regimes
plt.figure(figsize=(10, 6))
for i in range(10):
    plt.axvspan(timestep[subsequences[i][0]], timestep[subsequences[i][-1]], color='red', alpha=0.3)
plt.show()

In [None]:
def gbm(S0, mu, sigma, n, dt):
    """
    Simulates a Geometric Brownian Motion (GBM).

    Parameters:
    S0 (float): Initial stock price
    mu (float): Drift coefficient
    sigma (float): Volatility coefficient
    T (float): Time horizon
    n (int): Number of time steps

    Returns:
    np.ndarray: Simulated stock prices

    """
    t = np.arange(1, n) * dt
    W = np.random.standard_normal(size=n-1) 
    W = np.cumsum(W) * np.sqrt(dt) # cumulative sum to simulate the Brownian motion
    X = (mu - 0.5 * sigma**2) * t + sigma * W
    S = np.zeros(n)
    S[0] = S0
    S[1:] = S0 * np.exp(X)
    return S

def gbm_path(N_prime, C, t):
    
#     np.random.seed(17)
    '''
    It simulates the entire path of a GBM with regimes switch.
    
    '''
    # array of prices
    s = np.zeros(N_prime + 1)
    # initial stock price
    s[0] = 1
    s_0 = s[0]
    start_index = 0
    stop_index = 1

    for k in range(1, N_prime+1):
        if k == N_prime:
            s[start_index : stop_index + 1] = gbm(s_0, gbm_par[C[k]][0], gbm_par[C[k]][1], len(t[start_index : stop_index + 1]), dt)

        elif C[k] == C[k+1]:
            stop_index = k+1

        else:
            s[start_index : stop_index + 1] = gbm(s_0, gbm_par[C[k]][0], gbm_par[C[k]][1], len(t[start_index : stop_index + 1]), dt)
            #updates
            start_index = k
            s_0 = s[k]
            stop_index = k + 1
            
    return s

# to ensure reproducibility
seed_path = 15
np.random.seed(seed_path)

# relevant time series
prices = gbm_path(N_prime, labels_prices, t)  
log_returns = np.diff(np.log(prices))

print(f'mean_path = {np.mean(prices)} \nstd_path = {np.std(prices)}')

# plot price path
plt.figure(figsize=(10, 6))
plt.plot(t,prices)
for i in range(10):
    if i == 0:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3, label='regime switch')
        
    else:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3)
        
    
#plt.title("Geometric Brownian Motion Simulation")
plt.xlabel("time (years)")
plt.ylabel("stock price")
plt.grid()
plt.legend()
plt.show()
plt.show()

In [None]:
def lift_function(h_1, h_2, log_returns, M):
    '''
    It returns a matrix (and the sorted version) in which the rows are the subsequences.
    
    '''

    # creation of the sub-sequences
    lift_matrix = np.ndarray((M, h_1 + 1))

    for j in range(0, M):
        lift_matrix[j] = log_returns[(h_1 - h_2) * j : (h_1 - h_2) * j + h_1 + 1]

    sorted_lift_matrix = np.sort(lift_matrix)
    return lift_matrix, sorted_lift_matrix

lift_matrix, sorted_lift_matrix = lift_function(h_1, h_2, log_returns, M)
print(f'number of sub sequences = {M}')

## MK-means

In [None]:
class MKMeans:
    
    def __init__(self, max_iter, tol, n_clusters=2, random_state=None):
        
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol
        self.random_state = random_state

    def fit(self, X):
        np.random.seed(self.random_state)

        n_samples = X.shape[0]

        # Initialize cluster centers
        indices = np.random.choice(n_samples, self.n_clusters, replace=False)
        self.cluster_centers_ = X[indices]

        for i in range(self.max_iter):
            
            # Compute distances and assign clusters
            distances = pairwise_distances(X, self.cluster_centers_, metric='euclidean')
            labels = np.argmin(distances, axis=1)

            # Compute new cluster centers
            new_centers = np.array([np.mean(X[labels == j] ,axis=0) for j in range(self.n_clusters)])
            
            # Check for convergence
            loss = 0
            for j in range(self.n_clusters):
                loss = loss + np.linalg.norm(self.cluster_centers_[j] - new_centers[j])
                
            if loss < self.tol:
                break

            self.cluster_centers_ = new_centers

        self.labels_ = labels
        return self

    def predict(self, X):
        distances = pairwise_distances(X, self.cluster_centers_, metric=euclidean_distance)
        return np.argmin(distances, axis=1)


In [None]:
# costruction of a suitable dataset

# from each empiracal cdf we take the firsts q moments (a vector of dim. q for each empirical cdf)
q = 4

# Function to compute the k-th raw moment along a specified axis
def raw_moment_nd(values, k, axis=None):
    return np.mean(values**k, axis=axis)


# compute raw moments along the specified axis (axis=None computes the raw moments over the entire array)
X_moments = np.array([raw_moment_nd(lift_matrix, k, axis=1) for k in range(1, q+1)]).T

# initialize the StandardScaler
scaler = StandardScaler()

# fit and transform the data
standardized_X_moments = scaler.fit_transform(X_moments)

# print the standardized data
print(np.mean(standardized_X_moments, axis=0))
print(np.std(standardized_X_moments, axis=0))

In [None]:
# Fit the  MK-means
max_iter = 600
tol = 1e-8
seed_clustering = 1

mkmeans = MKMeans(max_iter=max_iter, tol=tol, random_state=seed_clustering)
mkmeans.fit(standardized_X_moments)

# centroids in the real space
centroids = scaler.inverse_transform(mkmeans.cluster_centers_) 

# off-regime-> higher number of elements
off_regime_index = 0 
# on-regime-> lower number of elements
on_regime_index = 1 
# check regime
if (mkmeans.labels_ == 0).sum() < (mkmeans.labels_ == 1).sum():
    off_regime_index = 1
    on_regime_index = 0

# scatter plot of empirical cdf
point_size = 4
plt.scatter(
    np.std(lift_matrix[mkmeans.labels_ == off_regime_index], axis=1),
    np.mean(lift_matrix[mkmeans.labels_ == off_regime_index], axis=1),
    marker='.', color='green', alpha=0.3, s=point_size)
plt.scatter(
    np.std(lift_matrix[mkmeans.labels_ == on_regime_index], axis=1),
    np.mean(lift_matrix[mkmeans.labels_ == on_regime_index], axis=1),  
    marker='.', color='orange', alpha=0.4, s=point_size)

# scatter plot of centroids

plt.scatter(np.sqrt(centroids[off_regime_index][1] - (centroids[off_regime_index][0])**2),
            centroids[off_regime_index][0],
            color='blue', marker='x', label='centroid 0')
plt.scatter(np.sqrt(centroids[on_regime_index][1] - (centroids[on_regime_index][0])**2),
            centroids[on_regime_index][0],
            color='red', marker='x', label='centroid 1')

plt.xlabel(f'$\sigma$', size=13)
plt.ylabel(f'$\mu$', size=13)
plt.title(f'M k-means with p={q}')
plt.legend()
# plt.savefig(f'figures/{q}_M_means_{seed_clustering}_h_{h_1}_{h_2}_GBM_{seed_path}_ite_{max_iter}_tol_{tol}_mu_std.pdf', bbox_inches='tight')
plt.show()

In [None]:
def skewness_and_kurtosis(M):
    """
    Calculate skewness and excess kurtosis using raw moments.
    
    Parameters:
    M1: First raw moment (mean)
    M2: Second raw moment (variance-related)
    M3: Third raw moment
    M4: Fourth raw moment
    
    Returns:
    skewness, excess kurtosis
    """
    M1 = M[0]
    M2 = M[1]
    M3 = M[2]
    M4 = M[3]
    
    # Calculate variance (second central moment, which is just variance)
    mu2 = M2 - M1**2

    # Calculate third central moment
    mu3 = M3 - 3 * M1 * M2 + 2 * M1**3

    # Calculate fourth central moment
    mu4 = M4 - 4 * M1 * M3 + 6 * M1**2 * M2 - 3 * M1**4

    # Calculate skewness
    skewness = mu3 / mu2**(3/2)

    # Calculate excess kurtosis (subtract 3 from kurtosis)
    excess_kurtosis = (mu4 / mu2**2) - 3

    return skewness, excess_kurtosis


In [None]:
# WARNING: RUN THIS IF q IS AT LEAST 4

# scatter plot of empirical cdf
point_size = 4
plt.scatter(
    skew(lift_matrix[mkmeans.labels_ == off_regime_index], axis=1),
    kurtosis(lift_matrix[mkmeans.labels_ == off_regime_index], axis=1),
    marker='.', color='green', alpha=0.3, s=point_size)
plt.scatter(
    skew(lift_matrix[mkmeans.labels_ == on_regime_index], axis=1),
    kurtosis(lift_matrix[mkmeans.labels_ == on_regime_index], axis=1),  
    marker='.', color='orange', alpha=0.4, s=point_size)
# scatter plot of centroids
skewness_0, excess_kurtosis_0 = skewness_and_kurtosis(centroids[off_regime_index])
plt.scatter(skewness_0,
            excess_kurtosis_0,
            color='blue', marker='x', label='centroid 0')

skewness_1, excess_kurtosis_1 = skewness_and_kurtosis(centroids[on_regime_index])
plt.scatter(skewness_1,
            excess_kurtosis_1,
            color='red', marker='x', label='centroid 1')

plt.xlabel(f'skew', size=13)
plt.ylabel(f'excess kurtosis', size=13)
plt.title(f'M k-means p={q}')
plt.legend()
# plt.savefig(f'figures/{q}_M_means_{seed_clustering}_h_{h_1}_{h_2}_GBM_{seed_path}_ite_{max_iter}_tol_{tol}_kurt_skew.pdf', bbox_inches='tight')
plt.show()

## Accuracy scores

In [None]:
def opt_counter(kmeans, n, M, h_1, h_2):


    # Define the time indices for the sliding window
    time_indices = np.arange(n)[:, None] - (h_1 - h_2) * np.arange(M)[None, :]

    # Mask invalid indices
    valid_mask = (time_indices >= 0) & (time_indices <= h_1)

    # Use the valid_mask to filter time indices
    filtered_time_indices = time_indices * valid_mask

    # Create the labels array, repeated across all k for efficient processing
    labels_repeated = np.tile(kmeans.labels_, (n, 1))

    # Use the valid mask to apply the labels where indices are valid
    filtered_labels = np.where(valid_mask, labels_repeated, -1)

    # Count occurrences of each label
    r_counter_0 = np.sum(filtered_labels == 0, axis=1)
    r_counter_1 = np.sum(filtered_labels == 1, axis=1)

    # Combine the counts into a single array
    r_counter = np.stack((r_counter_0, r_counter_1), axis=1)
    
    # Initialize s_counter with the same shape as r_counter
    s_counter = np.zeros((n+1, 2))

    # Handle the first element
    s_counter[0] = r_counter[0]

    # Handle the last element
    s_counter[-1] = r_counter[-1]

    # For all other elements, sum the current and previous elements
    s_counter[1:-1] = r_counter[:-1] + r_counter[1:]

    
    return r_counter, s_counter


In [None]:
%%time
r_counter, s_counter = opt_counter(mkmeans, len(log_returns), M, h_1, h_2)

dec = 2
# regime-off accuracy score (ROFS)
ROFS = np.sum(r_counter[theo_labels == 0].T[off_regime_index])/np.sum(r_counter[theo_labels == 0])
print(f'ROFS = {round(ROFS, dec)}')

# regime-off accuracy score (ROFS)
RONS = np.sum(r_counter[theo_labels == 1].T[on_regime_index])/np.sum(r_counter[theo_labels == 1])
print(f'RONS = {round(RONS, dec)}')

# total accuracy (TA)
TA = (np.sum(r_counter[theo_labels == 0].T[off_regime_index]) + np.sum(r_counter[theo_labels == 1].T[on_regime_index]))/np.sum(r_counter)
print(f'TA = {round(TA, dec)}')

## log-returns

In [None]:
# two important functions to allow a correct way to plot data
def compare_columns(A):
    
    B = np.where(A[:, 0] > A[:, 1], 0, np.where(A[:, 0] < A[:, 1], 1, 2))
    
    if off_regime_index == 1:
        B = np.where(B == 0, 1, np.where(B == 1, 0, B))
    return B

In [None]:
b = compare_columns(r_counter)
color = ['green', 'red', 'blue']
start_j = 0
end_j = 0
m_size = 1

if not 2 in b:
    print('no ambiguos clustering')
else:
    print('ambiguos clustering')
    
plt.figure(figsize=(10, 6))
for i in range(0, len(log_returns)):
    
    if i == (len(log_returns) - 1):
        plt.plot(t[start_j: end_j + 1], log_returns[start_j: end_j + 1], 
                 color=color[b[i]], marker='.', linewidth=m_size, markersize=m_size)
    
    elif b[i] == b[i+1]:
        end_j = i + 1
        
    else:
        plt.plot(t[start_j: end_j + 1], log_returns[start_j: end_j + 1], 
                 color=color[b[i]], marker='.', linewidth=m_size, markersize=m_size)
        start_j = i + 1
        end_j = i + 1
        
for i in range(10):
    if i == 0:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3, label='regime switch')
        
    else:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3)        


plt.legend()  
plt.ylabel('log-returns')
plt.xlabel('time (years)')
plt.show()    

## price path

In [None]:
b = compare_columns(r_counter)
color = ['green', 'red', 'blue']
start_j = 1
end_j = 1
m_size = 0.5

if not 2 in b:
    print('no ambiguos clustering')
else:
    print('ambiguos clustering')
    
plt.figure(figsize=(10, 6))
for i in range(0, len(log_returns)):
    
    if i == (len(log_returns) - 1):
        plt.plot(t[start_j: end_j + 1], prices[start_j: end_j + 1], 
                 color=color[b[i]], marker='.', linewidth=m_size, markersize=m_size)
    
    elif b[i] == b[i+1]:
        end_j = i + 2
        
    else:
        plt.plot(t[start_j: end_j + 1], prices[start_j: end_j + 1], 
                 color=color[b[i]], marker='.', linewidth=m_size, markersize=m_size)
        start_j = i + 2
        end_j = i + 2
        
for i in range(10):
    if i == 0:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3, label='regime switch')
        
    else:
        plt.axvspan(t[subsequences[i][0]], t[subsequences[i][-1]], color='red', alpha=0.3)        
      
        
plt.legend()  
plt.ylabel('price')
plt.xlabel('time (years)')
plt.show()    

# CLUSTERING VALIDATION

In [None]:
def clustering_validation(h_1, h_2, q, max_iter, tol, n_runs):
    
    rofs = np.zeros(n_runs)
    rons = np.zeros(n_runs)
    ta = np.zeros(n_runs)
    iteration_times = np.zeros(n_runs)
    
    N_prime, M = data_par(h_1, h_2)
    t = timestep[: N_prime + 1]
    subs, theo_labels, price_labels = generate_regimes(N_prime)
    
    for j in range(n_runs): 
        
        # data preparation
        np.random.seed(j)
        log_returns = np.diff(np.log(gbm_path(N_prime, price_labels, t)))
        # start timing
        start = time.time()
        lift_matrix = lift_function(h_1, h_2, log_returns, M)[0]
        
        X_moments = np.array([raw_moment_nd(lift_matrix, k, axis=1) for k in range(1, q+1)]).T
        # initialize the StandardScaler
        scaler = StandardScaler()
        # fit and transform the data
        standardized_X_moments = scaler.fit_transform(X_moments)

        # clustering
        mkmeans = MKMeans(max_iter=max_iter, tol=tol)
        mkmeans.fit(standardized_X_moments)

        # centroids in the real space
        centroids = scaler.inverse_transform(mkmeans.cluster_centers_) 

        # off-regime-> higher number of elements
        off_regime_index = 0 
        # on-regime-> lower number of elements
        on_regime_index = 1 
        # check regime
        if (mkmeans.labels_ == 0).sum() < (mkmeans.labels_ == 1).sum():
            off_regime_index = 1
            on_regime_index = 0
            
        # counter    
        r_counter = opt_counter(mkmeans, len(log_returns), M, h_1, h_2)[0]

        # regime-off accuracy score (ROFS)
        rofs[j] = np.sum(r_counter[theo_labels == 0].T[off_regime_index])/np.sum(r_counter[theo_labels == 0])

        # regime-off accuracy score (ROFS)
        rons[j] = np.sum(r_counter[theo_labels == 1].T[on_regime_index])/np.sum(r_counter[theo_labels == 1])

        # total accuracy (TA)
        ta[j] = (np.sum(r_counter[theo_labels == 0].T[off_regime_index]) + np.sum(r_counter[theo_labels == 1].T[on_regime_index]))/np.sum(r_counter)
        
        iteration_times[j] = time.time() - start

    return rofs, rons, ta, iteration_times

In [None]:
%%time
Q = 4
max_iter = 600
tol = 1e-8
n_runs = 50

rofs, rons, ta, iteration_times = clustering_validation(h_1, h_2, Q, max_iter, tol, n_runs)

dec = 4
print(f"ROFS = {round(np.mean(rofs), dec)} -+ {round(np.std(rofs), dec)}")
print(f"RONS = {round(np.mean(rons), dec)} -+ {round(np.std(rons), dec)}")
print(f"TA = {round(np.mean(ta), dec)} -+ {round(np.std(ta), dec)}")
print(f"RUN TIME = {round(np.mean(iteration_times), dec)} -+ {round(np.std(iteration_times), dec)}")

In [None]:
# print the results as txt file

df = pd.DataFrame({
    'ROFS': rofs,
    'RONS': rons,
    'TA': ta,
    'RUNTIME': iteration_times
})


df.to_csv(f'numerical_results/{Q}_M_means_h_{h_1}_{h_2}_GBM_n_{n_runs}_ite_{max_iter}_tol_{tol}.txt', index=False)

In [None]:
# read the results
df = pd.read_csv('numerical_results/')

rofs = df['ROFS'].values
rons = df['RONS'].values
ta = df['TA'].values
iteration_times = df['RUNTIME'].values

dec = 4
print(f"ROFS = {round(np.mean(rofs), dec)} -+ {round(np.std(rofs), dec)}")
print(f"RONS = {round(np.mean(rons), dec)} -+ {round(np.std(rons), dec)}")
print(f"TA = {round(np.mean(ta), dec)} -+ {round(np.std(ta), dec)}")
print(f"RUN TIME = {round(np.mean(iteration_times), dec)} -+ {round(np.std(iteration_times), dec)}")

# dependences by the seed

In [None]:
# WARNING: run again if you change something in the section Data preparation !!!

def mk_means_function(X, max_iter, tol, random_seed):

    # Fit the  MK-means
    mkmeans = MKMeans(max_iter=max_iter, tol=tol, random_state=random_seed)
    mkmeans.fit(X)

    # off-regime-> higher number of elements
    off_regime_index = 0 
    # on-regime-> lower number of elements
    on_regime_index = 1 
    # check regime
    if (mkmeans.labels_ == 0).sum() < (mkmeans.labels_ == 1).sum():
        off_regime_index = 1
        on_regime_index = 0
        
        
    r_counter, s_counter = opt_counter(mkmeans, len(log_returns), M, h_1, h_2)

    # regime-off accuracy score (ROFS)
    ROFS = np.sum(r_counter[theo_labels == 0].T[off_regime_index])/np.sum(r_counter[theo_labels == 0])

    # regime-off accuracy score (ROFS)
    RONS = np.sum(r_counter[theo_labels == 1].T[on_regime_index])/np.sum(r_counter[theo_labels == 1])

    # total accuracy (TA)
    TA = (np.sum(r_counter[theo_labels == 0].T[off_regime_index]) + np.sum(r_counter[theo_labels == 1].T[on_regime_index]))/np.sum(r_counter)
    
    return ROFS, RONS, TA


def convert_seconds(seconds):
    minutes = int(seconds // 60)
    remaining_seconds = seconds % 60
    return f"{minutes} min {int(remaining_seconds)} seconds"

In [None]:
n_trials = 50

# clustering parameters
Q = 4
max_iter = 600
tol = 1e-8

# compute raw moments along the specified axis (axis=None computes the raw moments over the entire array)
X_moments = np.array([raw_moment_nd(lift_matrix, k, axis=1) for k in range(1, Q+1)]).T

# initialize the StandardScaler
scaler = StandardScaler()

# fit and transform the data
standardized_X_moments = scaler.fit_transform(X_moments)

# stability analysis
rofs = np.zeros(n_trials)
rons = np.zeros(n_trials)
ta = np.zeros(n_trials)
iteration_times = np.zeros(n_trials)

start_time_tot = time.time()
for i in range(n_trials):
    # start
    start_time = time.time()
    # real computation
    rofs[i], rons[i], ta[i] = mk_means_function(standardized_X_moments, max_iter, tol, i+1)
    # end
    end_time = time.time()
    # save data
    iteration_times[i] = end_time - start_time
end_time_tot = time.time()

input_seconds = float(end_time_tot - start_time_tot)
print(f'time to complete all the iterations = {convert_seconds(input_seconds)}')

dec = 4
print(f"ROFS = {round(np.mean(rofs), dec)} -+ {round(np.std(rofs), dec)}")
print(f"RONS = {round(np.mean(rons), dec)} -+ {round(np.std(rons), dec)}")
print(f"TA = {round(np.mean(ta), dec)} -+ {round(np.std(ta), dec)}")
print(f"RUN TIME = {round(np.mean(iteration_times), dec)} -+ {round(np.std(iteration_times), dec)}")

In [None]:
# print the results as txt file

df = pd.DataFrame({
    'ROFS': rofs,
    'RONS': rons,
    'TA': ta,
    'RUNTIME': iteration_times
})


df.to_csv(f'numerical_results_stability/{Q}_M_means_h_{h_1}_{h_2}_GBM_{seed_path}_n_{n_trials}_ite_{max_iter}_tol_{tol}.txt', index=False)

In [None]:
# read the results
df = pd.read_csv('numerical_results_stability/')

rofs = df['ROFS'].values
rons = df['RONS'].values
ta = df['TA'].values
iteration_times = df['RUNTIME'].values

n_trials = len(ta)

dec = 4
print(f"ROFS = {round(np.mean(rofs), dec)} -+ {round(np.std(rofs), dec)}")
print(f"RONS = {round(np.mean(rons), dec)} -+ {round(np.std(rons), dec)}")
print(f"TA = {round(np.mean(ta), dec)} -+ {round(np.std(ta), dec)}")
print(f"RUN TIME = {round(np.mean(iteration_times), dec)} -+ {round(np.std(iteration_times), dec)}")

In [None]:
nn_bins = int(np.sqrt(n_trials))
# nn_bins = n_trials

plt.figure(1)
plt.hist(rofs, bins=nn_bins, density=True)
plt.xlabel('ROFS')

plt.figure(2)
plt.hist(rons, bins=nn_bins, density=True)
plt.xlabel('RONS')

plt.figure(3)
plt.hist(ta, bins=nn_bins, density=True)
plt.xlabel('TA')

plt.figure(4)
plt.hist(iteration_times, bins=nn_bins, density=True)
plt.xlabel('RUN TIME (seconds)')

plt.show()

# (normalized) histograms

In [None]:
#formulas from the theory
theo_mean_bull = (gbm_par[0][0] - (gbm_par[0][1]**2)/2)*dt
theo_mean_bear = (gbm_par[1][0] - (gbm_par[1][1]**2)/2)*dt

theo_variance_bull = (gbm_par[0][1]**2)*dt
theo_variance_bear = (gbm_par[1][1]**2)*dt

theo_std_bull = np.sqrt(theo_variance_bull)
theo_std_bear = np.sqrt(theo_variance_bear)

# print values
print(f"mean bull = {theo_mean_bull}")
print(f"mean centroid 0 = {centroids[off_regime_index][0]}")

print(f"\nvariance bull = {theo_variance_bull}")
print(f"variance centroid 0 = {centroids[off_regime_index][1] - (centroids[off_regime_index][0])**2}")


print(f"\nmean bear = {theo_mean_bear}")
print(f"mean centroid 1 = {centroids[on_regime_index][0]}")

print(f"\nvariance bear = {theo_variance_bear}")
print(f"variance centroid 1 = {centroids[on_regime_index][1] - (centroids[on_regime_index][0])**2}")

### (normalized) histogram of the mean

In [None]:
# Generate some sample data
data = np.mean(lift_matrix, axis=1)
n_bins = int(np.sqrt(M))
# Create the histogram
plt.hist(data, bins=n_bins, alpha=0.6, color='b', density=True) 

# Add vertical lines
plt.axvline(x=theo_mean_bull, color='green', linestyle='-', label='theo_bull')
plt.axvline(x=theo_mean_bear, color='red', linestyle='-', label='theo_bear')
plt.axvline(x=centroids[off_regime_index][0], color='green', linestyle='--', label='centroid_0')
plt.axvline(x=centroids[on_regime_index][0], color='red', linestyle='--', label='centroid_1')

# Add labels and legend
# plt.title('Distribution')
plt.xlabel('μ')
# plt.ylabel('f(x)')
plt.legend()

# Show the plot
plt.show()

### (normalized) histogram of the std

In [None]:
# Generate some sample data
data = np.std(lift_matrix, axis=1)
n_bins = int(np.sqrt(M))
# Create the histogram
plt.hist(data, bins=n_bins, alpha=0.6, color='b', density=True)  

# Add vertical lines
plt.axvline(x=theo_std_bull, color='green', linestyle='-', label='theo_bull')
plt.axvline(x=theo_std_bear, color='red', linestyle='-', label='theo_bear')
plt.axvline(x=np.sqrt(centroids[off_regime_index][1] - (centroids[off_regime_index][0])**2), color='green', linestyle='--', label='centroid_0')
plt.axvline(x=np.sqrt(centroids[on_regime_index][1] - (centroids[on_regime_index][0])**2), color='red', linestyle='--', label='centroid_1')

# Add labels and legend
plt.xlabel(f'$\sigma$')
# plt.ylabel('f(x)')
plt.legend()

# Show the plot
plt.show()