## Project Title: To determine if data augmentation using the method proposed in 'Finding Order in Chaos: A Novel Data Augmentation Method for Time Series in Contrastive Learning' will lead to better 1 day prediction results.



In [1]:
import numpy as np
import tensorflow as tf
import random
import os

# Seed value
seed_value= 42

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
tf.random.set_seed(seed_value)


In [2]:
import tensorflow as tf
import pandas as pd
import yfinance as yf
import seaborn as sb
from tensorflow.keras import layers, Model
import numpy as np
import torch
import matplotlib.pyplot as plt
from scipy.fft import rfft, rfftfreq, irfft
from IPython.display import display, HTML
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
# from tcn import TCN  # If you have the tcn p /ackage installed
from sklearn.metrics import mean_squared_error

import optuna
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from pykalman import KalmanFilter
from sklearn.manifold import TSNE

display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
np.random.seed(42)  # Set the random seed for reproducibility

# Function to import stock data
def get_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    return data

def z_score_normalize(series):
    mean = series.mean()
    std = series.std()
    return (series - mean) / std

def denormalize_z_score(normalized_series, original_mean, original_std):
    return (normalized_series * original_std) + original_mean

# Function to create sequences and corresponding returns
def create_sequences(data, returns, sequence_length=20):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(returns[i+sequence_length])
    return np.array(X), np.array(y)

In [4]:
# def cut_mix(data1, data2, alpha=0.2):
#     assert len(data1) == len(data2)
#     size = len(data1)
#     cut_point = np.random.randint(0, size)
#     cut_length = int(size * alpha)
    
#     mixed_data = np.copy(data1)
#     mixed_data[cut_point:cut_point+cut_length] = data2[cut_point:cut_point+cut_length]
    
#     return mixed_data

def cut_mix(df1, df2, alpha=0.2):
    assert df1.shape == df2.shape
    size = len(df1)
    cut_point = np.random.randint(0, size)
    cut_length = int(size * alpha)
    
    mixed_df = df1.copy()
    mixed_df.iloc[cut_point:cut_point+cut_length] = df2.iloc[cut_point:cut_point+cut_length]
    
    return mixed_df

def binary_mix(data1, data2, alpha=0.2):
    assert len(data1) == len(data2)
    size = data1.shape
    mask = np.random.binomial(1, alpha, size=size).astype(bool)
    
    mixed_data = np.where(mask, data1, data2)
    
    return pd.DataFrame(mixed_data, columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])

def linear_mix(data1, data2, alpha=0.2):
    assert len(data1) == len(data2)
    
    mixed_data = alpha * data1 + (1 - alpha) * data2
    
    return mixed_data

def geometric_mix(data1, data2, alpha=0.2):
    assert len(data1) == len(data2)
    
    mixed_data = data1**alpha * data2**(1 - alpha)
    
    return mixed_data

def amplitude_mix(data1, data2, alpha=0.2):
    assert len(data1) == len(data2)
    
    fft1 = np.fft.rfft(data1)
    fft2 = np.fft.rfft(data2)
    
    # Mix the magnitudes
    magnitude1 = np.abs(fft1)
    magnitude2 = np.abs(fft2)
    mixed_magnitude = alpha * magnitude1 + (1 - alpha) * magnitude2
    
    # Keep the phase of the first data
    phase1 = np.angle(fft1)
    mixed_fft = mixed_magnitude * np.exp(1j * phase1)
    
    mixed_data = np.fft.irfft(mixed_fft)
    
    return pd.DataFrame(mixed_data, columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])

### PROPOSE TECHNIQUE BELOW

def proposed_mixup(df1, df2, threshold=0.1, alpha=0.5):
    
    def proposed_mixup_feature(data1, data2, threshold, alpha):
        
        def get_significant_frequencies(data, threshold):
            """
            Perform Fourier Transform on data and identify frequencies with significant amplitude.

            Args:
            - data: Time series data.
            - threshold: Threshold for significance, relative to the max amplitude.

            Returns:
            - significant_freq: Frequencies with significant amplitude.
            - significant_ampl: Amplitude of the significant frequencies.
            - full_spectrum: Full Fourier spectrum for all frequencies.
            """
            # Perform Fourier Transform
            spectrum = rfft(data)
            frequencies = rfftfreq(data.size, d=1)  # Assuming unit time interval between samples

            # Find significant amplitudes
            amplitude = np.abs(spectrum)
            significant_indices = amplitude > (amplitude.max() * threshold)
            significant_freq = frequencies[significant_indices]
            significant_ampl = amplitude[significant_indices]

            return significant_freq, significant_ampl, spectrum

        def phase_mixup(sig_freq1, sig_ampl1, spectrum1, sig_freq2, sig_ampl2, spectrum2, alpha):
            mixed_spectrum = np.copy(spectrum1)
            freqs1 = rfftfreq(spectrum1.size, d=1)
            freqs2 = rfftfreq(spectrum2.size, d=1)

            for freq in sig_freq1:
                index1 = np.argmin(np.abs(freqs1 - freq))
                index2 = np.argmin(np.abs(freqs2 - freq))

                if index1 >= len(sig_ampl1) or index2 >= len(sig_ampl2):
                    continue  # Skip the frequency if the index is out of bounds

                phase1 = np.angle(spectrum1[index1])
                phase2 = np.angle(spectrum2[index2])

                phase_diff = (phase2 - phase1) % (2 * np.pi)
                phase_diff = phase_diff - 2 * np.pi if phase_diff > np.pi else phase_diff

                new_amplitude = alpha * sig_ampl1[index1] + (1 - alpha) * sig_ampl2[index2]
                new_phase = phase1 + alpha * phase_diff

                mixed_spectrum[index1] = new_amplitude * np.exp(1j * new_phase)

            return mixed_spectrum


        def reconstruct_time_series(mixed_spectrum):
            """
            Reconstruct time series from mixed spectrum using inverse Fourier Transform.

            Returns:
            - mixed_time_series: The reconstructed time series.
            """
            # Perform inverse Fourier Transform
            mixed_time_series = irfft(mixed_spectrum)

            return mixed_time_series

        # Step 1: Get significant frequencies and amplitude for both time series
        sig_freq1, sig_ampl1, spectrum1 = get_significant_frequencies(data1, threshold)
        sig_freq2, sig_ampl2, spectrum2 = get_significant_frequencies(data2, threshold)

        # Step 2: Identify significant frequencies (already done in step 1)

        # Step 3: Phase and Magnitude Mixup
        mixed_spectrum = phase_mixup(sig_freq1, sig_ampl1, spectrum1, sig_freq2, sig_ampl2, spectrum2, alpha)

        # Step 4: Reconstruction of the time series
        mixed_time_series = reconstruct_time_series(mixed_spectrum)
        return mixed_time_series
    
    output_df = pd.DataFrame()
    
    for feature in df1.columns:
        output_df[feature] = proposed_mixup_feature(df1[feature].values, df2[feature].values, threshold, alpha)
        
    return output_df

# def proposed_mixup(data1, data2, threshold=0.1, alpha=0.5):
#     def get_significant_frequencies(data, threshold=0.1, axis=0):
#         """
#         Perform Fourier Transform on data along the specified axis and identify frequencies 
#         with significant amplitude for each feature.

#         Args:
#         - data: Time series data (can be multidimensional).
#         - threshold: Threshold for significance, relative to the max amplitude.
#         - axis: Axis along which the Fourier Transform is applied.

#         Returns:
#         - significant_freq: Frequencies with significant amplitude for each feature.
#         - significant_ampl: Amplitude of the significant frequencies for each feature.
#         - full_spectrum: Full Fourier spectrum for all frequencies and features.
#         """
#         # Perform Fourier Transform along the specified axis
#         spectrum = rfft(data, axis=axis)
#         frequencies = rfftfreq(data.shape[axis], d=1)  # Assuming unit time interval between samples

#         # Find significant amplitudes for each feature
#         amplitude = np.abs(spectrum)
#         significant_indices = amplitude > (np.max(amplitude, axis=axis, keepdims=True) * threshold)

#         # Use broadcasting to expand dimensions for proper indexing
#         significant_freq = np.expand_dims(frequencies, axis=1) * significant_indices
#         significant_ampl = amplitude * significant_indices

#         return significant_freq, significant_ampl, spectrum

#     def phase_mixup(sig_freq1, sig_ampl1, spectrum1, sig_freq2, sig_ampl2, spectrum2, alpha=0.5):
#         # Initialize the mixed spectrum with the same shape as the input spectrums
#         mixed_spectrum = np.zeros_like(spectrum1)

#         # Iterate over each feature (assuming features are along axis 1)
#         for feature_index in range(spectrum1.shape[1]):
#             freqs1 = rfftfreq(spectrum1.shape[0], d=1)
#             freqs2 = rfftfreq(spectrum2.shape[0], d=1)

#             # Assuming sig_freq1 and sig_freq2 are lists of arrays, one per feature
#             for freq in sig_freq1[feature_index]:
#                 index1 = np.argmin(np.abs(freqs1 - freq))
#                 index2 = np.argmin(np.abs(freqs2 - freq))

#                 if index1 >= len(sig_ampl1[feature_index]) or index2 >= len(sig_ampl2[feature_index]):
#                     continue  # Skip the frequency if the index is out of bounds

#                 phase1 = np.angle(spectrum1[index1, feature_index])
#                 phase2 = np.angle(spectrum2[index2, feature_index])

#                 phase_diff = (phase2 - phase1) % (2 * np.pi)
#                 phase_diff = phase_diff - 2 * np.pi if phase_diff > np.pi else phase_diff

#                 new_amplitude = alpha * sig_ampl1[feature_index][index1] + (1 - alpha) * sig_ampl2[feature_index][index2]
#                 new_phase = phase1 + alpha * phase_diff

#                 mixed_spectrum[index1, feature_index] = new_amplitude * np.exp(1j * new_phase)
                
#         print(pd.DataFrame(mixed_spectrum, columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']))

#         return mixed_spectrum

#     def reconstruct_time_series(mixed_spectrum):
#         """
#         Reconstruct time series from mixed spectrum using inverse Fourier Transform.
#         Each column in mixed_spectrum corresponds to a feature.

#         Args:
#         - mixed_spectrum: 2D array where each column is the mixed spectrum of a feature.

#         Returns:
#         - mixed_time_series: 2D array where each column is the reconstructed time series of a feature.
#         """
#         # Initialize an empty list to hold the reconstructed time series for each feature
#         reconstructed_series = []

#         # Perform inverse Fourier Transform for each column (feature)
#         for i in range(mixed_spectrum.shape[1]):
#             mixed_time_series = irfft(mixed_spectrum[:, i])
#             reconstructed_series.append(mixed_time_series)

#         # Convert the list of arrays into a 2D array where columns are features
#         mixed_time_series = np.column_stack(reconstructed_series)
#         return pd.DataFrame(mixed_time_series, columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
    
#     # Step 1: Get significant frequencies and amplitude for both time series
#     sig_freq1, sig_ampl1, spectrum1 = get_significant_frequencies(data1)
#     sig_freq2, sig_ampl2, spectrum2 = get_significant_frequencies(data2)

#     # Step 2: Identify significant frequencies (already done in step 1)

#     # Step 3: Phase and Magnitude Mixup
#     mixed_spectrum = phase_mixup(sig_freq1, sig_ampl1, spectrum1, sig_freq2, sig_ampl2, spectrum2)

#     # Step 4: Reconstruction of the time series
#     mixed_time_series = reconstruct_time_series(mixed_spectrum)

#     return mixed_time_series

In [5]:
from statsmodels.nonparametric.smoothers_lowess import lowess

def apply_lowess_smoothing(df, frac=0.1):
    smoothed_data = pd.DataFrame(index=df.index)
    
    # Apply LOWESS to each column
    for column in df.columns:
        smoothed_values = lowess(df[column], df.index, frac=frac, return_sorted=False)
        smoothed_data[column] = smoothed_values
    
    return smoothed_data

In [6]:
def create_augmented_data(rets, df1, df2, method, alpha, window_size=20):
    if method == 'cut_mix':
         df = cut_mix(df1, df2, alpha)
    elif method == 'binary_mix':
         df = binary_mix(df1, df2, alpha)
    elif method == 'linear_mix':
         df = linear_mix(df1, df2, alpha)
    elif method == 'geometrix_mix':
         df = geometric_mix(df1, df2, alpha)
    elif method == 'amplitude_mix':
         df = amplitude_mix(df1, df2, alpha)
    elif method == 'proposed_mix':
         df = proposed_mixup(df1, df2, alpha)

    # Original
    else:
        df = df1.copy()
        
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[df.columns].values)
        
    # Create sequences
    X, y = create_sequences(scaled_features, rets, window_size)
    
    return X, y, df

In [7]:
def objective(trial):
    # Hyperparameters to be tuned by Optuna
    n_layers = trial.suggest_int('n_layers', 1, 3)
    lstm_units = trial.suggest_categorical('lstm_units', [50, 100, 150])
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    # Data preparation
    scaler = StandardScaler()
    feature_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    scaled_features = scaler.fit_transform(df[feature_columns])
    sequence_length = 20  # Using 20 timesteps
    X, y = create_sequences(scaled_features, df['Returns'].values, sequence_length)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    # Model architecture
    model = Sequential()
    for i in range(n_layers):
        model.add(LSTM(units=lstm_units, return_sequences=(i < n_layers - 1)))
        model.add(Dropout(rate=dropout_rate))
    model.add(Dense(units=1))
    
    # Compilation
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    
    # Model training
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=50,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose=0
    )
    
    # Evaluation
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return mse

In [8]:
def create_model(best_params, input_shape):
    model = Sequential()
    model.add(LSTM(best_params['lstm_units'], input_shape=input_shape))
    model.add(Dense(1))  # Assuming we are predicting one value
    model.compile(optimizer='adam', loss='mse')
    return model

In [9]:
def plot_TSNE(df1, df2):
    df1_log = np.log(df1 + 1)  # Adding 1 to avoid log(0)
    df2_log = np.log(df2 + 1)

    combined_data = pd.concat([df1_log, df2_log])

    # Apply t-SNE
    tsne = TSNE(n_components=2, random_state=0, perplexity=100, n_iter=1000)
    tsne_results = tsne.fit_transform(combined_data)

    # Now we split the t-SNE results back into original and augmented parts
    tsne_df1 = tsne_results[:len(df1), :]
    tsne_df2 = tsne_results[len(df1):, :]

    # Plot the results
    plt.figure(figsize=(12,8))
    plt.scatter(tsne_df1[:, 0], tsne_df1[:, 1], label='Original', alpha=0.5)
    plt.scatter(tsne_df2[:, 0], tsne_df2[:, 1], label='Augmented', alpha=0.5)
    plt.legend()
    plt.show()

### Pull Data from Yahoo Finance

In [10]:
start_date = '2010-01-01'
end_date = '2023-01-01'

# Define the list of Dow Jones Industrial Average companies
tickers = [
    "MMM", "AXP", "AMGN", "AAPL", "BA", "CAT", "CVX", "CSCO", "KO", "DIS",
    "DOW", "GS", "HD", "HON", "IBM", "INTC", "JNJ", "JPM", "MCD", "MRK",
    "MSFT", "NKE", "PG", "CRM", "TRV", "UNH", "V", "WBA", "WMT"
]

# Create a dictionary to store historical data for each company
historical_data = {}

# Loop through the Dow companies and retrieve historical data
for ticker in tickers:
    stock_data = get_stock_data(ticker, start_date, end_date)
    historical_data[ticker] = stock_data

## Original - Not needed here

In [11]:
seq_len = 20
alpha = 0.2

In [12]:
df = historical_data['AAPL'].copy()
rets = df['Close'].pct_change()

## Create df2 - LOWESS Smoothing

In [13]:
df1 = historical_data['AAPL']
df2 = apply_lowess_smoothing(df1)

## CutMix

In [14]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'cut_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)

In [15]:
df3

In [16]:
# # Best hyperparameters
# print('Number of finished trials:', len(study.trials))
# print('Best trial:', study.best_trial.params)

Best trial: {'n_layers': 1, 'lstm_units': 50, 'dropout_rate': 0.3754947803049919, 'learning_rate': 0.007042149702171292, 'batch_size': 128}

In [17]:
best_params = {'n_layers': 1, 
                'lstm_units': 50, 
                'dropout_rate': 0.5, 
                'learning_rate': 0.001, 
                'batch_size': 128,
                'epochs': 50}

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_cut = np.sqrt(mean_squared_error(y_test, predictions))

In [18]:
plot_TSNE(df1,df3)

## Binary Mix

In [19]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'binary_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)
# print('Best trial:', study.best_trial.params)

In [20]:
best_params = {'n_layers': 2, 
               'lstm_units': 50, 
               'dropout_rate': 0.25, 
               'learning_rate': 0.005, 
               'batch_size': 64}
best_params['epochs'] = 50

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_binary = np.sqrt(mean_squared_error(y_test, predictions))

In [21]:
plot_TSNE(df1,df3)

## Linear Mix

In [22]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'linear_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)
# print('Best trial:', study.best_trial.params)

In [23]:
best_params = {'n_layers': 1, 
               'lstm_units': 150, 
               'dropout_rate': 0.45, 
               'learning_rate': 0.0025, 
               'batch_size': 32}
best_params['epochs'] = 50

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_linear = np.sqrt(mean_squared_error(y_test, predictions))

In [24]:
plot_TSNE(df1,df3)

## Geommetric Mix

In [25]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'geometrix_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)
# print('Best trial:', study.best_trial.params)

In [26]:
best_params = {'n_layers': 1, 
               'lstm_units': 50, 
               'dropout_rate': 0.25, 
               'learning_rate': 0.006, 
               'batch_size': 64}
best_params['epochs'] = 50

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_geom = np.sqrt(mean_squared_error(y_test, predictions))

In [27]:
plot_TSNE(df1,df3)

## Amplitude Mix

In [28]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'amplitude_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)
# print('Best trial:', study.best_trial.params)

In [29]:
best_params = {'n_layers': 2, 
               'lstm_units': 100, 
               'dropout_rate': 0.28, 
               'learning_rate': 0.0035, 
               'batch_size': 64}
best_params['epochs'] = 50

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_amplitude = np.sqrt(mean_squared_error(y_test, predictions))

In [30]:
plot_TSNE(df1,df3)

## Proposed Mix

In [31]:
# Create X, y for LSTM
X, y, df3 = create_augmented_data(rets, df1, df2, 'proposed_mix', alpha, seq_len)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)
# print('Best trial:', study.best_trial.params)

In [32]:
best_params = {'n_layers': 2, 
               'lstm_units': 100, 
               'dropout_rate': 0.4, 
               'learning_rate': 0.002, 
               'batch_size': 64}
best_params['epochs'] = 50

input_shape = (X_train.shape[1], X_train.shape[2])
model = create_model(best_params, input_shape)
model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])
predictions = model.predict(X_test)

# Compute RMSE
rmse_proposed = np.sqrt(mean_squared_error(y_test, predictions))

In [33]:
plot_TSNE(df1,df3)

## Results

In [34]:
# print('====    Original RMSE  ====')
# print(f'The RMSE of original is: {round(rmse_og,5)}\n')
print('====    Augmented/Mixup RMSE  ====')
print(f'The RMSE of cut_mix is: {round(rmse_cut,5)}')
print(f'The RMSE of binary_mix is: {round(rmse_binary,5)}')
print(f'The RMSE of linear_mix is: {round(rmse_linear,5)}')
print(f'The RMSE of geometric_mix is: {round(rmse_geom,5)}')
print(f'The RMSE of amplitude_mix is: {round(rmse_amplitude,5)}')
print(f'The RMSE of proposed_mix is: {round(rmse_proposed,5)}')

# Next

Use mutual information to determine if the augmented data really contains more information which is the epistemic one. Aleatoric is handled by data augmentation already.