#### Assignment 4

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import word2number as wn
import inflect

# Visualization Libraries
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.subplots as sp
import plotly.graph_objects as go

# Finance Libraries
import yfinance as yf

# Timeseries Libraries
import pmdarima # Python wrapper for R's auto.arima
from statsmodels.tsa.stattools import adfuller, kpss, acf, q_stat
from statsmodels.tsa.seasonal import seasonal_decompose # Decomposition
from statsmodels.stats.diagnostic import kstest_normal, lilliefors, acorr_ljungbox, het_breuschpagan, het_arch
from statsmodels.compat import lzip
from arch.unitroot import PhillipsPerron
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from prophet import Prophet

# Machine Learning Libraries
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error # Shared error metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU, Bidirectional

In [2]:
# Silence Interpolation Warnings for displaying small p-values
import warnings
from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.filterwarnings('ignore', category=InterpolationWarning)

In [3]:
def plot_histogram_qq(series):
    # Create a figure with subplots
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))

    # Plot histogram
    sns.histplot(series, ax=axes[0], kde=True)
    axes[0].set_title("Histogram")

    # Plot Q-Q plot
    stats.probplot(series, dist="norm", plot=axes[1])
    axes[1].set_title("Q-Q Plot")

    plt.tight_layout()
    plt.show()

In [4]:
def plot_acf_pacf(time_series):
    """
    Plots ACF and PACF plots for a given time series.
    
    Parameters:
        time_series (pd.Series): The input time series data.
    """
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Plot ACF
    plot_acf(time_series, ax=axes[0], lags=30)
    axes[0].set_title('Autocorrelation Function (ACF)')
    axes[0].set_ylim(-1.1, 1.1)  # Adjust the y-axis limits
    
    # Plot PACF
    plot_pacf(time_series, ax=axes[1], lags=30)
    axes[1].set_title('Partial Autocorrelation Function (PACF)')
    axes[1].set_ylim(-1.1, 1.1)  # Adjust the y-axis limits
    
    plt.tight_layout()
    plt.show()

In [5]:
def skew_and_kurtosis_confidence_intervals(timeseries):

    # Calculate n
    n = len(timeseries.dropna())

    # Calculate skew
    skew = timeseries.skew()
    SE_skew = np.sqrt((6*n*(n-1)) / ((n-2)*(n+1)*(n+3)))

    # Calculate kurtosis
    kurtosis = timeseries.kurtosis()
    SE_kurt = 2*SE_skew*np.sqrt((n**2-1)/(n-3))

    # Create dataframe with 95% confidence intervals
    normality_stats = pd.DataFrame({'Lower Bound':[skew - 1.96*SE_skew,kurtosis - 1.96*SE_kurt],
                                    'Data':[skew,kurtosis],
                                    'Upper Bound':[skew + 1.96*SE_skew,kurtosis + 1.96*SE_kurt]},
                                    index=['Skew','Kurtosis'])

    return(normality_stats)

In [6]:
def ComprehensiveTests(ts: pd.Series) -> pd.DataFrame:

    # Normality tests
    shapiro_stat, shapiro_p = stats.shapiro(ts)
    ks_stat, ks_p = stats.kstest(ts, 'norm', args=(ts.mean(), ts.std()))
    ad_result = stats.anderson(ts, dist='norm')
    jb_stat, jb_p = stats.jarque_bera(ts)
    ad_stat = ad_result.statistic
    ad_p = ad_result.critical_values[2]

    # Stationarity tests
    adf_stat, adf_p, _, _, _, _ = adfuller(ts)
    kpss_stat, kpss_p, _, _ = kpss(ts, nlags='auto', regression='c')
    pp_test = PhillipsPerron(ts)
    pp_stat = pp_test.stat
    pp_p = pp_test.pvalue

    # Autocorrelation tests
    ljung_box = acorr_ljungbox(ts, lags=1)
    ljung_box_stat = ljung_box.iloc[0, 0]
    ljung_box_p_value = ljung_box.iloc[0, 1]
    box_pierce_stat, box_pierce_p_value = q_stat(acf(ts, nlags=10, fft=True), len(ts))

    # Variance tests
    X = np.vstack([np.ones(len(ts)), range(len(ts))]).T
    bp_test_stat, bp_test_p_value, _, _ = het_breuschpagan(ts, X)
    ml_test_stat, ml_test_p_value, _, _ = het_arch(ts)
    
    # Determine results for each test
    shapiro_result = 'Normal' if shapiro_p > 0.05 else 'Non-Normal'
    ks_result = 'Normal' if ks_p > 0.05 else 'Non-Normal'
    ad_result = 'Normal' if ad_stat < ad_p else 'Non-Normal'
    jb_result = 'Normal' if jb_p > 0.05 else 'Non-Normal'
    adf_result = 'Stationary' if adf_p < 0.05 else 'Non-Stationary'
    kpss_result = 'Stationary' if kpss_p > 0.05 else 'Non-Stationary'
    pp_result = 'Stationary' if pp_p > 0.05 else 'Non-Stationary'
    ljung_box_result = 'Autocorrelated' if ljung_box_p_value < 0.05 else 'Non-Autocorrelated'
    box_pierce_result = 'Autocorrelated' if box_pierce_p_value[0] < 0.05 else 'Non-Autocorrelated'
    bp_result = 'Homoskedastic' if bp_test_p_value > 0.05 else 'Heteroskedastic'
    ml_result = 'Homoskedastic' if ml_test_p_value > 0.05 else 'Heteroskedastic'
    
    # Compile results
    Results = pd.DataFrame({
        'Test Name': ['Shapiro-Wilk', 'Kolmogorov-Smirnov', 'Anderson-Darling', 'Jarque-Bera', 'Augmented Dickey-Fuller', 'Kwiatkowski-Phillips-Schmidt-Shin', 'Phillips-Perron',
                    'Ljung-Box', 'Box-Pierce', 'Breusch-Pagan', 'McLeod-Li'],
        'Type': ['Normality', 'Normality', 'Normality', 'Normality', 'Stationarity', 'Stationarity', 'Stationarity', 
                'Autocorrelation', 'Autocorrelation', 'Variance', 'Variance'],
        'Test Statistic': [shapiro_stat, ks_stat, ad_stat, jb_stat, adf_stat, kpss_stat, pp_stat, 
                        ljung_box_stat, box_pierce_stat[0], bp_test_stat, ml_test_stat],
        'p-value': [shapiro_p, ks_p, np.nan, jb_p, adf_p, kpss_p, pp_p, 
                    ljung_box_p_value, box_pierce_p_value[0], bp_test_p_value, ml_test_p_value],
        'Result': [shapiro_result, ks_result, ad_result, jb_result,
        adf_result, kpss_result, pp_result, ljung_box_result,box_pierce_result,
        bp_result,ml_result]
    })
    
    return Results.round(3)

In [7]:
def decomposition(timeseries):

    timeseries_copy = timeseries.copy(deep=True)
    timeseries_copy = timeseries_copy.resample('D').ffill()
    decomp = seasonal_decompose(timeseries_copy)

    fig = make_subplots(rows=1,cols=3,subplot_titles=('Trend','Seasonal','Residuals'))

    fig.add_trace(px.line(decomp.trend).data[0],row=1,col=1)
    fig.add_trace(px.line(decomp.seasonal).data[0],row=1,col=2)
    fig.add_trace(px.line(decomp.resid).data[0],row=1,col=3)

    fig.update_layout(width=1950)

    fig.show()

In [145]:
def compute_error_metrics(Y_test, y_preds,n=0):
    def MAPE(true, pred):
        true, pred = np.array(true), np.array(pred)
        return np.mean(np.abs((true - pred) / true)) * 100
    
    metrics_data = []
    
    for y_pred in y_preds:
        MSE = mean_squared_error(Y_test, y_pred)
        RMSE = np.sqrt(MSE)
        MAE = mean_absolute_error(Y_test, y_pred)
        mape = MAPE(Y_test, y_pred)
        
        metrics_data.append([MSE, RMSE, MAE, mape])
    
    df = pd.DataFrame(metrics_data, columns=['MSE', 'RMSE', 'MAE', 'MAPE'])
    
    # Adding "Model" column

    df.insert(0, "Model", ["model_" + inflect.engine().number_to_words(i+1+n) for i in range((len(y_preds)))])
    
    return df.round(2)

In [146]:
def univariate_predictions_to_dataframe(y_true, y_preds):

    # Initialize dictionary for DataFrame
    df_dict = {'y_true': list(y_true)}
    
    # Add each prediction series to the dictionary with appropriate name
    for i, y_pred in enumerate(y_preds, start=1):
        column_name = f"model_{inflect.engine().number_to_words(i)}"
        df_dict[column_name] = list(y_pred)

    # Convert dictionary to DataFrame
    df = pd.DataFrame(df_dict)
    
    return df

In [147]:
def plot_spectral_density(time_series):
    # Compute FFT and frequencies
    sp = np.fft.fft(time_series)
    freq = np.fft.fftfreq(len(time_series))
    
    # Only consider the positive frequencies (real)
    mask = freq > 0

    # Convert frequencies to periods (days)
    periods = 1 / freq[mask]

    # Plot the power spectrum with periods
    plt.figure(figsize=(10, 5))
    plt.plot(periods, np.abs(sp[mask])**2)
    plt.title('Power Spectrum')
    plt.xlabel('Period (days)')
    plt.ylabel('Power')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [148]:
def create_multivariate_dataset(data, seq_length):
    x, y = [], []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        if seq_length == 1:
            x.append([seq[0]])  # make it a 2D array even if seq_length is 1
        else:
            x.append(seq[:-1])
        y.append(seq[-1])
    return np.array(x), np.array(y)

In [149]:
def create_univariate_dataset(dataset, look_back=10):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

#### Retrieve tickers

In [253]:
nikkei = yf.ticker.Ticker('nikkei') # Nikkei 225 - EWJ
hang_seng = yf.ticker.Ticker('EWH') # Hang Seng - EWH
csi = yf.ticker.Ticker('PEK') # CSI 300 - PEK
kospi = yf.ticker.Ticker('kospi') # Kospi Composite - EWY

#### Retrieve ticker history

In [254]:
nikkei_history = nikkei.history(start='2016-01-01',end='2018-01-01')
hang_seng_history = hang_seng.history(start='2016-01-01',end='2018-01-01')
csi_history = csi.history(start='2016-01-01',end='2018-01-01')
kospi_history = kospi.history(start='2016-01-01',end='2018-01-01')

Got error from yahoo api for ticker NIKKEI, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- NIKKEI: No timezone found, symbol may be delisted
Got error from yahoo api for ticker KOSPI, Error: {'code': 'Not Found', 'description': 'No data found, symbol may be delisted'}
- KOSPI: No timezone found, symbol may be delisted


#### Retrieve ticker price history

In [152]:
nikkei_price = nikkei_history.loc[:,'Close']
hang_seng_price = hang_seng_history.loc[:,'Close']
csi_price = csi_history.loc[:,'Close']
kospi_price = kospi_history.loc[:,'Close']

#### EDA

In [153]:
# Line Plots

fig = make_subplots(rows=2,cols=2,subplot_titles=('Nikkei 225','Hang Seng','CSI 300','KOSPI'))

fig.add_trace(px.line(nikkei_price).data[0],row=1,col=1)
fig.add_trace(px.line(hang_seng_price).data[0],row=1,col=2)
fig.add_trace(px.line(csi_price).data[0],row=2,col=1)
fig.add_trace(px.line(kospi_price).data[0],row=2,col=2)

fig.update_layout(height=800,width=1950)

fig.show()

In [249]:
px.line(nikkei_price)

In [251]:
# Line plot of normalize prices

closing_prices = pd.DataFrame({
    'Nikkei 225':nikkei_price,
    'Hang Seng':hang_seng_price,
    'CSI 300':csi_price,
    'KOSPI':kospi_price})

min_max_scaler = MinMaxScaler()

min_max_scaler.fit(closing_prices)

noramlized_closing_prices = pd.DataFrame(min_max_scaler.transform(closing_prices),index=nikkei_price.index)

noramlized_closing_prices.columns = closing_prices.columns

display(px.line(closing_prices),px.line(noramlized_closing_prices))

#### Univariate Preprocessing

In [181]:
# Provided code
training, test = train_test_split(nikkei_price, test_size=0.25, shuffle=False)

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
training_scaled = scaler.fit_transform(training.values.reshape(-1, 1))
test_scaled = scaler.transform(test.values.reshape(-1, 1))

look_back = 10
X_train, y_train = create_univariate_dataset(training_scaled, look_back)
X_test, y_test = create_univariate_dataset(test_scaled, look_back)

# Reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

#### Univariate Models

In [244]:
# Model One
model_one = Sequential()
model_one.add(SimpleRNN(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model_one.add(Dense(1))
model_one.compile(optimizer='adam', loss='mse')
model_one.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_one = model_one.predict(X_test)
pred_one = scaler.inverse_transform(pred_one)



In [183]:
# Model Two
model_two = Sequential()
model_two.add(Bidirectional(SimpleRNN(50, input_shape=(X_train.shape[1], X_train.shape[2]))))
model_two.add(Dense(1))
model_two.compile(optimizer='adam', loss='mse')
model_two.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_two = model_two.predict(X_test)
pred_two = scaler.inverse_transform(pred_two)



In [184]:
# Model Three
model_three = Sequential()
model_three.add(Bidirectional((SimpleRNN(50, input_shape=(X_train.shape[1], X_train.shape[2]),recurrent_dropout=0.2))))
model_three.add(Dense(1))
model_three.compile(optimizer='adam', loss='mse')
model_three.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_three = model_three.predict(X_test)
pred_three = scaler.inverse_transform(pred_three)



In [185]:
# Model Four
model_four = Sequential()
model_four.add((LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]))))
model_four.add(Dense(1))
model_four.compile(optimizer='adam', loss='mse')
model_four.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_four = model_four.predict(X_test)
pred_four = scaler.inverse_transform(pred_four)



In [186]:
# Model Five
model_five = Sequential()
model_five.add(Bidirectional(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]))))
model_five.add(Dense(1))
model_five.compile(optimizer='adam', loss='mse')
model_five.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_five = model_five.predict(X_test)
pred_five = scaler.inverse_transform(pred_five)



In [187]:
# Model Six
model_six = Sequential()
model_six.add(Bidirectional(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]),recurrent_dropout=0.2)))
model_six.add(Dense(1))
model_six.compile(optimizer='adam', loss='mse')
model_six.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_six = model_six.predict(X_test)
pred_six = scaler.inverse_transform(pred_six)



In [188]:
# Model Seven
model_seven = Sequential()
model_seven.add((GRU(50, input_shape=(X_train.shape[1], X_train.shape[2]))))
model_seven.add(Dense(1))
model_seven.compile(optimizer='adam', loss='mse')
model_seven.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_seven = model_seven.predict(X_test)
pred_seven = scaler.inverse_transform(pred_seven)



In [189]:
# Model Eight
model_eight = Sequential()
model_eight.add(Bidirectional(GRU(50, input_shape=(X_train.shape[1], X_train.shape[2]))))
model_eight.add(Dense(1))
model_eight.compile(optimizer='adam', loss='mse')
model_eight.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_eight = model_eight.predict(X_test)
pred_eight = scaler.inverse_transform(pred_eight)



In [190]:
# Model Nine
model_nine = Sequential()
model_nine.add(Bidirectional(GRU(50, input_shape=(X_train.shape[1], X_train.shape[2]),recurrent_dropout=0.2)))
model_nine.add(Dense(1))
model_nine.compile(optimizer='adam', loss='mse')
model_nine.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
pred_nine = model_nine.predict(X_test)
pred_nine = scaler.inverse_transform(pred_nine)



#### Univariate Results

In [245]:
nikkei_test = nikkei_price[-len(X_test)-1:-1]

results = compute_error_metrics(nikkei_test,[pred_one,pred_two,pred_three,pred_four,pred_five,pred_six,pred_seven,pred_eight,pred_nine])
results

Unnamed: 0,Model,MSE,RMSE,MAE,MAPE
0,model_one,0.47,0.69,0.53,4.12
1,model_two,0.34,0.58,0.5,4.25
2,model_three,0.17,0.42,0.32,4.2
3,model_four,0.05,0.23,0.18,4.31
4,model_five,0.04,0.2,0.16,4.49
5,model_six,0.07,0.27,0.24,4.52
6,model_seven,0.03,0.18,0.14,4.36
7,model_eight,0.1,0.32,0.28,4.37
8,model_nine,0.02,0.13,0.11,4.43


In [192]:
results_df = univariate_predictions_to_dataframe(nikkei_test,[pred_one,pred_two,pred_three,pred_four,pred_five,pred_six,pred_seven,pred_eight,pred_nine])

for column in results_df.columns:

    results_df.loc[:,column] = results_df.loc[:,column].astype(float)

px.line(results_df)


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



#### Multivariate Preprocessing

In [226]:
# b. Normalize the data
multi_scaler = MinMaxScaler(feature_range=(0, 1))
multi_scaled_data = multi_scaler.fit_transform(closing_prices)

# c. Split data into training and testing
multi_train, multi_test = train_test_split(multi_scaled_data,test_size=0.25,shuffle=False)

multi_look_back = 10  # You can adjust this value as needed
multi_X_train, multi_y_train = create_multivariate_dataset(multi_train, multi_look_back)
multi_X_test, multi_y_test = create_multivariate_dataset(multi_test, multi_look_back)

#### Multivariate Models

In [227]:
model_ten = Sequential()
model_ten.add(SimpleRNN(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2])))
model_ten.add(Dense(multi_X_train.shape[2])) 

model_ten.compile(optimizer='adam', loss='mean_squared_error')
model_ten.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_ten = model_ten.predict(multi_X_test)
pred_ten = multi_scaler.inverse_transform(pred_ten)



In [228]:
model_evelen = Sequential()
model_evelen.add(Bidirectional(SimpleRNN(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]))))
model_evelen.add(Dense(multi_X_train.shape[2])) 

model_evelen.compile(optimizer='adam', loss='mean_squared_error')
model_evelen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_eleven = model_evelen.predict(multi_X_test)
pred_eleven = multi_scaler.inverse_transform(pred_eleven)



In [229]:
model_twelve = Sequential()
model_twelve.add(Bidirectional(SimpleRNN(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]),recurrent_dropout=0.2)))
model_twelve.add(Dense(multi_X_train.shape[2])) 

model_twelve.compile(optimizer='adam', loss='mean_squared_error')
model_twelve.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_twelve = model_twelve.predict(multi_X_test)
pred_twelve = multi_scaler.inverse_transform(pred_twelve)



In [230]:
model_thirteen = Sequential()
model_thirteen.add(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2])))
model_thirteen.add(Dense(multi_X_train.shape[2])) 

model_thirteen.compile(optimizer='adam', loss='mean_squared_error')
model_thirteen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_thirteen = model_thirteen.predict(multi_X_test)
pred_thirteen = multi_scaler.inverse_transform(pred_thirteen)



In [239]:
model_fourteen = Sequential()
model_fourteen.add(Bidirectional(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]))))
model_fourteen.add(Dense(multi_X_train.shape[2])) 

model_fourteen.compile(optimizer='adam', loss='mean_squared_error')
model_fourteen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_fourteen = model_fourteen.predict(multi_X_test)
pred_fourteen = multi_scaler.inverse_transform(pred_fourteen)



In [232]:
model_fifteen = Sequential()
model_fifteen.add(Bidirectional(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]),recurrent_dropout=0.2)))
model_fifteen.add(Dense(multi_X_train.shape[2])) 

model_fifteen.compile(optimizer='adam', loss='mean_squared_error')
model_fifteen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_fifteen = model_fifteen.predict(multi_X_test)
pred_fifteen = multi_scaler.inverse_transform(pred_fifteen)



In [233]:
model_sixteen = Sequential()
model_sixteen.add(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2])))
model_sixteen.add(Dense(multi_X_train.shape[2])) 

model_sixteen.compile(optimizer='adam', loss='mean_squared_error')
model_sixteen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_sixteen = model_sixteen.predict(multi_X_test)
pred_sixteen = multi_scaler.inverse_transform(pred_sixteen)



In [234]:
model_seventeen = Sequential()
model_seventeen.add(Bidirectional(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]))))
model_seventeen.add(Dense(multi_X_train.shape[2])) 

model_seventeen.compile(optimizer='adam', loss='mean_squared_error')
model_seventeen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_seventeen = model_seventeen.predict(multi_X_test)
pred_seventeen = multi_scaler.inverse_transform(pred_seventeen)



In [235]:
model_eighteen = Sequential()
model_eighteen.add(Bidirectional(LSTM(50, activation='relu', input_shape=(multi_X_train.shape[1], multi_X_train.shape[2]),recurrent_dropout=0.2)))
model_eighteen.add(Dense(multi_X_train.shape[2])) 

model_eighteen.compile(optimizer='adam', loss='mean_squared_error')
model_eighteen.fit(multi_X_train, multi_y_train, epochs=100, batch_size=32, verbose=0)

pred_eighteen = model_eighteen.predict(multi_X_test)
pred_eighteen = multi_scaler.inverse_transform(pred_eighteen)



#### Multivariate Results

In [240]:
closing_prices_test = closing_prices[-len(multi_X_test)-1:-1]

compute_error_metrics(closing_prices_test,[pred_ten,pred_eleven,pred_twelve,pred_thirteen,pred_fourteen,pred_fifteen,pred_sixteen,pred_seventeen,pred_eighteen],9)

Unnamed: 0,Model,MSE,RMSE,MAE,MAPE
0,model_ten,0.54,0.73,0.52,1.16
1,model_eleven,1.33,1.15,0.87,1.78
2,model_twelve,0.78,0.88,0.66,1.54
3,model_thirteen,1.35,1.16,0.88,2.58
4,model_fourteen,2.15,1.46,1.2,3.45
5,model_fifteen,0.59,0.77,0.56,1.26
6,model_sixteen,2.69,1.64,1.14,2.66
7,model_seventeen,3.55,1.88,1.52,4.28
8,model_eighteen,3.42,1.85,1.4,2.82


In [242]:
preds = [pred_ten, pred_eleven, pred_twelve, pred_thirteen, pred_fourteen, pred_fifteen, pred_sixteen, pred_seventeen, pred_eighteen]

# Define a color mapping for the columns
color_map = {
    'pred_Nikkei': 'blue',
    'pred_HangSeng': 'red',
    'pred_CSI': 'green',
    'pred_Kospi': 'orange',
    'Nikkei 225': 'blue',
    'Hang Seng': 'red',
    'CSI 300': 'green',
    'KOSPI': 'orange'
}

# Create a 3x3 subplot
fig = make_subplots(rows=3, cols=3, subplot_titles=[f"model {inflect.engine().number_to_words(i+1+9)}" for i in range(9)])

for idx, pred in enumerate(preds):
    pred_df = pd.DataFrame(pred)
    pred_df.columns = ['pred_Nikkei', 'pred_HangSeng', 'pred_CSI', 'pred_Kospi']
    pred_df.index = closing_prices_test.index

    multi_results = closing_prices_test.merge(pred_df, on='Date')

    results_scaler = MinMaxScaler()
    results_scaler.fit(multi_results)
    multi_results_normalized = pd.DataFrame(results_scaler.transform(multi_results), columns=multi_results.columns, index=multi_results.index)
    
    row = (idx // 3) + 1
    col = (idx % 3) + 1
    
    for column in multi_results_normalized.columns:
        fig.add_trace(
            go.Scatter(x=multi_results_normalized.index, y=multi_results_normalized[column], name=column, line=dict(color=color_map[column])),
            row=row, col=col
        )

fig.update_layout(height=1000,width=2000,showlegend=False)
fig.show()


In [300]:
for pred in [pred_ten,pred_eleven,pred_twelve,pred_thirteen,pred_fourteen,pred_fifteen,pred_sixteen,pred_seventeen,pred_eighteen]:

    pred_df = pd.DataFrame(pred)
    pred_df.columns = ['pred_Nikkei','pred_HangSeng','pred_CSI','pred_Kospi']
    pred_df.index = closing_prices_test.index

    multi_results = closing_prices_test.merge(pred_df,on='Date')

    results_scaler = MinMaxScaler()
    results_scaler.fit(multi_results)
    multi_results_normalized = results_scaler.transform(multi_results)
    display(px.line(multi_results_normalized))