In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from utils.finance_metrics import annualized_return
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, median_absolute_error
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from tensorflow.keras.preprocessing import timeseries_dataset_from_array 
from tensorflow.keras import Sequential
from tensorflow.keras.layers import GRU, SimpleRNN, Dense, Dropout, Reshape, Embedding
from tensorflow.keras.metrics import MeanSquaredError, RootMeanSquaredError
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

In [2]:
def build_RNN():
    # Generate the model architecture
    rnn_model = Sequential()
#     rnn_model.add(Reshape((21,44), input_shape=(44,)))
#     rnn_model.add(GRU(128, input_shape=(1,44), dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
#     rnn_model.add(Embedding())
    rnn_model.add(GRU(128, input_shape=(21,44), dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
    #input_shape = [X_train.shape[1], X_train.shape[2]]
    rnn_model.add(SimpleRNN((128)))
    rnn_model.add(Dense(128, activation='relu'))
    rnn_model.add(Dropout(0.3))
    rnn_model.add(Dense(64, activation='relu'))
    rnn_model.add(Dropout(0.3))
    rnn_model.add(Dense(32, activation='relu'))
    rnn_model.add(Dropout(0.3))
    rnn_model.add(Dense(16, activation='relu'))
    rnn_model.add(Dropout(0.3))
    rnn_model.add(Dense(1))
    # Compile the model
    rnn_model.compile(optimizer='adam', loss='mean_squared_error',
                  metrics=[MeanSquaredError(), RootMeanSquaredError()])
    return rnn_model

In [3]:
# Create a 3D input
def create_dataset (X, y, time_steps = 1):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        v = X[i:i+time_steps, :]
        Xs.append(v)
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

In [4]:
data_path = r'data/processed'
data_filenames = os.listdir(data_path)

models = {}
feature_transformers = {}
feature_scalers = {}

In [8]:
for data_file in data_filenames:
    pairs = data_file[:-4].split('-')
    pair = (pairs[0], pairs[1])
    print(pair)
    
    if pair not in [('ADX', 'TY'), ('NNN', 'O')]:
        continue

    pair_df = pd.read_pickle(os.path.join(data_path, data_file))
    X_train = pair_df.loc["2000-01-01":"2014-12-31", :].drop(columns="Return Diff (t+1)")
    y_train = pair_df.loc["2000-01-01":"2014-12-31", ["Return Diff (t+1)"]]
    X_test = pair_df.loc["2015-01-01":, :].drop(columns="Return Diff (t+1)")
    y_test = pair_df.loc["2015-01-01":, ["Return Diff (t+1)"]]

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    y_train[y_train.isin([-np.inf, np.inf, np.nan])] = 0
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1
    y_test[y_test.isin([-np.inf, np.inf, np.nan])] = 0

    # add in quantiles as additional feature columns
    quantile_transformer = QuantileTransformer()
    quantile_transformer.fit(X_train); feature_transformers[pair] = quantile_transformer
    X_train.loc[:, [col + "_QUANTILE" for col in X_train.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_train), index=X_train.index,
                     columns=[col + "_QUANTILE" for col in X_train.columns]))
    X_test.loc[:, [col + "_QUANTILE" for col in X_test.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_test), index=X_test.index,
                     columns=[col + "_QUANTILE" for col in X_test.columns]))

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    y_train[y_train.isin([-np.inf, np.inf, np.nan])] = 0
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1
    y_test[y_test.isin([-np.inf, np.inf, np.nan])] = 0

    # scale features
    X_scaler = StandardScaler()
    X_scaler.fit(X_train); feature_scalers[pair] = X_scaler
    X_train = pd.DataFrame(X_scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(X_scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    assert not any([any(arr) for arr in np.array(np.isinf(X_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(X_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_test))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_test))])

    split = PredefinedSplit(test_fold=[0 if v else -1 for v in X_train.index < '2012-01-01'])
    models[pair] = {
#         'Random Forest': GridSearchCV(estimator=RandomForestRegressor(bootstrap=True), param_grid={
#             "n_estimators": [n for n in range(50, 150, 25)]
#         }, cv=split, scoring="explained_variance", n_jobs=-1),
#         'Adaptive Boost': GridSearchCV(estimator=AdaBoostRegressor(), param_grid={
#             "n_estimators": [n for n in range(50, 250, 50)],
#             "loss": ["linear", "exponential"]
#         }, cv=split, scoring="explained_variance", n_jobs=-1),
#         'Gradient Boost': GridSearchCV(estimator=GradientBoostingRegressor(loss="huber"), param_grid={
#             "n_estimators": [n for n in range(50, 250, 50)]
#         }, cv=split, scoring="explained_variance", n_jobs=-1),
#         'Neural Net': GridSearchCV(estimator=MLPRegressor(solver="lbfgs", max_iter=1000000), param_grid={
#             "hidden_layer_sizes": [(250, h1, h2)
#                                    for h1 in range(100, 200, 50)
#                                    for h2 in range(25, h1//2, 25)]
#         }, cv=split, scoring="explained_variance", n_jobs=-1),
        'Recurrent Neural Net': KerasRegressor(nb_epoch=5000, batch_size=64, build_fn=build_RNN, verbose=False)
    }

    print(f"==================================================\n"
          f"Results for pair {pair}:\n"
          f"- Return Difference Stats:\n"
          f"  - mean: {float(np.mean(y_test)):.06f}\n"
          f"  - std: {float(np.std(y_test)):.06f}\n"
          f"  - quantiles [0.00, 0.25, 0.50, 0.75, 1.00]:\n"
          f"    {[np.round(i, decimals=6) for i in np.quantile(y_test, [0, .25, .50, .75, 1])]}")
    pred_average = None
    for model_type, model in models[pair].items():
#         print(X_train.shape, np.array(y_train).ravel().shape)
        pred = None
    
        if model_type == "Recurrent Neural Net":    
            X_train2 = np.array(X_train)
            y_train2 = np.array(y_train)
            X_test2 = np.array(X_test)
            y_test2 = np.array(y_test)

            
            TIME_STEPS = 21
            X_test2, y_test2 = create_dataset(X_test2, y_test2,   
                                            TIME_STEPS)
            X_train2, y_train2 = create_dataset(X_train2, y_train2, 
                                              TIME_STEPS)
            
#             print('X_train.shape: ', X_test.shape)
#             print('y_train.shape: ', y_train.shape)
#             print('X_test.shape: ', X_test.shape)
#             print('y_test.shape: ', y_train.shape)

            
            model.fit(X_train2, np.array(y_train2).ravel())
            pred = model.predict(X_test2)
            
        else:
            
            model.fit(X_train[:], np.array(y_train[:]).ravel())
            pred = model.predict(X_test[:]) 
            
        if model_type != "Recurrent Neural Net": 
            if pred_average is None: pred_average = pred / 4
            else: pred_average = pred_average + np.array(pred / 4)
        
        if model_type != "Recurrent Neural Net": 
            mse = mean_squared_error(np.array(y_test), np.array(pred).ravel())
            mae = median_absolute_error(np.array(y_test), np.array(pred).ravel())
            print(f"- Model: {model_type}\n"
                  f"  - best parameters: {model.best_params_}\n"
                  f"  - MSE: {mse:.06f}\n"
                  f"  - MAE: {mae:.06f}\n"
                  f"--------------------------------------------------")
        else:
            mse = mean_squared_error(np.array(y_test[21:]), np.array(pred).ravel())
            mae = median_absolute_error(np.array(y_test[21:]), np.array(pred).ravel())
            print(f"- Model: {model_type}\n"
                  f"  - MSE: {mse:.06f}\n"
                  f"  - MAE: {mae:.06f}\n"
                  f"--------------------------------------------------")
            
#     mse = mean_squared_error(np.array(y_test), np.array(pred_average).ravel())
#     mae = median_absolute_error(np.array(y_test), np.array(pred_average).ravel())
#     print(f"- Ensemble of Random Forest, Adaptive Boost, Gradient Boost, and Neural Net\n"
#           f"  - MSE: {mse:.06f}\n"
#           f"  - MAE: {mae:.06f}\n"
#           f"--------------------------------------------------")
#     print(f"==================================================\n")


('ADX', 'TY')
Results for pair ('ADX', 'TY'):
- Return Difference Stats:
  - mean: -0.000025
  - std: 0.005070
  - quantiles [0.00, 0.25, 0.50, 0.75, 1.00]:
    [-0.066671, -0.002313, 6e-06, 0.002432, 0.056949]
- Model: Recurrent Neural Net
  - MSE: 0.000123
  - MAE: 0.008838
--------------------------------------------------
('AEG', 'ING')
('AMAT', 'KLAC')
('APA', 'DVN')
('ARW', 'AVT')
('ASA', 'AU')
('AVB', 'EQR')
('BAC', 'WFC')
('BBVA', 'SAN')
('BEN', 'TROW')
('BK', 'NTRS')
('BMO', 'RY')
('BXP', 'VNO')
('CCL', 'RCL')
('CM', 'TD')
('CMA', 'TFC')
('COP', 'PEO')
('CPT', 'UDR')
('CSX', 'NSC')
('CUZ', 'WRE')
('CVX', 'XOM')
('DHI', 'LEN')
('DRE', 'PLD')
('E', 'TOT')
('ED', 'SO')
('ELS', 'MAA')
('FITB', 'RF')
('FRT', 'REG')
('FULT', 'VLY')
('GAM', 'USA')
('GFI', 'HMY')
('GOLD', 'NEM')
('HAL', 'SLB')
('HES', 'OXY')
('HIW', 'KRC')
('HP', 'PTEN')
('HQH', 'HQL')
('IAC', 'MTCH')
('IFN', 'IIF')
('KBH', 'PHM')
('KIM', 'WRI')
('LSI', 'PSA')
('MAC', 'SPG')
('MRO', 'MUR')
('NNN', 'O')
Results for pai

In [None]:
conf_mat_all = None
for data_file in data_filenames:
    pairs = data_file[:-4].split('-')
    pair = (pairs[0], pairs[1])
    print(pair)

    pair_df = pd.read_pickle(os.path.join(data_path, data_file))
    X_train = pair_df.loc["2000-01-01":"2014-12-31", :].drop(columns="Return Diff (t+1)")
    y_train = pair_df.loc["2000-01-01":"2014-12-31", ["Return Diff (t+1)"]]
    X_test = pair_df.loc["2015-01-01":, :].drop(columns="Return Diff (t+1)")
    y_test = pair_df.loc["2015-01-01":, ["Return Diff (t+1)"]]

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    y_train[y_train.isin([-np.inf, np.inf, np.nan])] = 0
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1
    y_test[y_test.isin([-np.inf, np.inf, np.nan])] = 0

    # add in quantiles as additional feature columns
    quantile_transformer = feature_transformers[pair]
    X_train.loc[:, [col + "_QUANTILE" for col in X_train.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_train), index=X_train.index,
                     columns=[col + "_QUANTILE" for col in X_train.columns]))
    X_test.loc[:, [col + "_QUANTILE" for col in X_test.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_test), index=X_test.index,
                     columns=[col + "_QUANTILE" for col in X_test.columns]))

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1

    # scale features
    X_scaler = feature_scalers[pair]
    X_train = pd.DataFrame(X_scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(X_scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    assert not any([any(arr) for arr in np.array(np.isinf(X_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(X_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_test))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_test))])

    print(f"==================================================\n"
          f"Results for pair {pair}:")
    pred_ensemble = None
    for model_type, model in models[pair].items():
        pred = [-1 if i < 0 else 1 for i in np.array(model.predict(X_test)).ravel()]
        if pred_ensemble is None: pred_ensemble = np.array(pred) / 4
        else: pred_ensemble = pred_ensemble + (np.array(pred) / 4)

        conf_mat = confusion_matrix(
            [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
            [0 if i < 0 else 1 for i in np.array(pred).ravel()])
        accuracy = accuracy_score(
            [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
            [0 if i < 0 else 1 for i in np.array(pred).ravel()])
        precision = precision_score(
            [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
            [0 if i < 0 else 1 for i in np.array(pred).ravel()])
        recall = recall_score(
            [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
            [0 if i < 0 else 1 for i in np.array(pred).ravel()])
        f1 = f1_score(
            [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
            [0 if i < 0 else 1 for i in np.array(pred).ravel()])

        print(f"- Model: {model_type}\n"
              f"  - Confusion Matrix:\n{conf_mat}\n"
              f"  - Accuracy: {accuracy:.06f}\n"
              f"  - Recall: {recall:.06f}\n"
              f"  - Precision: {precision:.06f}\n"
              f"  - F1-Score: {f1:.06f}\n"
              f"--------------------------------------------------")

    conf_mat = confusion_matrix(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    accuracy = accuracy_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    precision = precision_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    recall = recall_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    f1 = f1_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])

    if conf_mat_all is None: conf_mat_all = np.array(conf_mat)
    else: conf_mat_all = conf_mat_all + np.array(conf_mat)

    print(f"- Ensemble of Random Forest, Adaptive Boost, Gradient Boost, and Neural Net\n"
          f"  - Confusion Matrix:\n{conf_mat}\n"
          f"  - Accuracy: {accuracy:.06f}\n"
          f"  - Recall: {recall:.06f}\n"
          f"  - Precision: {precision:.06f}\n"
          f"  - F1-Score: {f1:.06f}\n"
          f"--------------------------------------------------")
    print(f"==================================================\n")


print(f"- Confusion Matrix of ensemble prediction across all pairs\n{conf_mat_all}\n"
      f"  - Accuracy: {(conf_mat_all[0][0]+conf_mat_all[1][1]) / np.sum(conf_mat_all):.06f}\n"
      f"--------------------------------------------------")
print(f"==================================================\n")

In [None]:
def signal_to_multiplier(signal, long):
    if           signal == -1.0: return -0.50 * long
    elif -1.0 <  signal <= -0.5: return -0.25 * long
    elif -0.5 <  signal <= -0.0: return -0.10 * long
    elif  0.0 <= signal <   0.5: return  0.10 * long
    elif  0.5 <= signal <   1.0: return  0.25 * long
    elif         signal ==  1.0: return  0.50 * long
    else: raise ValueError("Out of range signal provided")

returns = {}
for data_file in data_filenames:
    pairs = data_file[:-4].split('-')
    pair = (pairs[0], pairs[1])
    print(pair)

    returns[pair] = pd.read_pickle(f"data/raw/{pair[0]}.zip").loc["2015-01-01":, ["Simple Return"]]
    returns[pair][pair[1]] = pd.read_pickle(f"data/raw/{pair[1]}.zip").loc["2015-01-01":, ["Simple Return"]]
    returns[pair].columns = pair

    pair_df = pd.read_pickle(os.path.join(data_path, data_file))
    X_train = pair_df.loc["2000-01-01":"2014-12-31", :].drop(columns="Return Diff (t+1)")
    y_train = pair_df.loc["2000-01-01":"2014-12-31", ["Return Diff (t+1)"]]
    X_test = pair_df.loc["2015-01-01":, :].drop(columns="Return Diff (t+1)")
    y_test = pair_df.loc["2015-01-01":, ["Return Diff (t+1)"]]

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    y_train[y_train.isin([-np.inf, np.inf, np.nan])] = 0
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1
    y_test[y_test.isin([-np.inf, np.inf, np.nan])] = 0

    # add in quantiles as additional feature columns
    quantile_transformer = feature_transformers[pair]
    X_train.loc[:, [col + "_QUANTILE" for col in X_train.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_train), index=X_train.index,
                     columns=[col + "_QUANTILE" for col in X_train.columns]))
    X_test.loc[:, [col + "_QUANTILE" for col in X_test.columns]] = pd.DataFrame(
        pd.DataFrame(quantile_transformer.transform(X_test), index=X_test.index,
                     columns=[col + "_QUANTILE" for col in X_test.columns]))

    # fill invalid values with 1 (for ratios) or 0 (for differences)
    X_train[X_train.isin([-np.inf, np.inf, np.nan])] = 1
    y_train[y_train.isin([-np.inf, np.inf, np.nan])] = 0
    X_test[X_test.isin([-np.inf, np.inf, np.nan])] = 1
    y_test[y_test.isin([-np.inf, np.inf, np.nan])] = 0

    # scale features and target column
    X_scaler = feature_scalers[pair]
    X_train = pd.DataFrame(X_scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test = pd.DataFrame(X_scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    assert not any([any(arr) for arr in np.array(np.isinf(X_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(X_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(X_test))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_train))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_train))])
    assert not any([any(arr) for arr in np.array(np.isinf(y_test))])
    assert not any([any(arr) for arr in np.array(np.isnan(y_test))])

    print(f"==================================================\n"
          f"Results for pair {pair}:")
    pred_ensemble = None
    for model_type, model in models[pair].items():
        pred = [-1 if i < 0 else 1 for i in np.array(model.predict(X_test)).ravel()]
        if pred_ensemble is None: pred_ensemble = np.array(pred) / 4
        else: pred_ensemble = pred_ensemble + (np.array(pred) / 4)
        returns[pair][model_type] = pred
    returns[pair]["Ensemble"] = np.mean(
        returns[pair].loc[:, ["Random Forest", "Adaptive Boost", "Gradient Boost", "Recurrent Neural Net"]], axis=1)
    returns[pair]["Ensemble"] = pred_ensemble
    returns[pair][pair[0] + "_SIGNAL"] = [signal_to_multiplier(sig,  1) for sig in pred_ensemble]
    returns[pair][pair[1] + "_SIGNAL"] = [signal_to_multiplier(sig, -1) for sig in pred_ensemble]
    returns[pair][pair[0] + "_ADJRET"] = returns[pair][pair[0]] * returns[pair][pair[0] + "_SIGNAL"]
    returns[pair][pair[1] + "_ADJRET"] = returns[pair][pair[1]] * returns[pair][pair[1] + "_SIGNAL"]
    returns[pair]["PAIRS_TRADE_RET"] = np.mean(returns[pair][[p+"_ADJRET" for p in pair]], axis=1)
    ann_ret = annualized_return(returns[pair].loc[:, [pair[0],pair[1],"PAIRS_TRADE_RET"]])

    conf_mat = confusion_matrix(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    accuracy = accuracy_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    precision = precision_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    recall = recall_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])
    f1 = f1_score(
        [0 if i < 0 else 1 for i in y_test.to_numpy().ravel()],
        [0 if i < 0 else 1 for i in np.array(pred_ensemble).ravel()])

    print(f"- Ensemble of Random Forest, Adaptive Boost, Gradient Boost, and Recurrent Neural Net\n"
          f"  - Confusion Matrix:\n{conf_mat}\n"
          f"  - Accuracy: {accuracy:.06f}\n"
          f"  - Recall: {recall:.06f}\n"
          f"  - Precision: {precision:.06f}\n"
          f"  - F1-Score: {f1:.06f}\n"
          f"- Annualized Return:\n"
          f"  - {pair[0]}: {ann_ret[pair[0]]:0.06f}\n"
          f"  - {pair[1]}: {ann_ret[pair[1]]:0.06f}\n"
          f"  - Pairs Trade: {ann_ret['PAIRS_TRADE_RET']:0.06f}\n"
          f"--------------------------------------------------")
    print(f"==================================================\n")

    # Compute the correlation matrix
    corr = returns[pair].loc[:, [pair[0],pair[1],"PAIRS_TRADE_RET"]].corr()
    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr, dtype=bool))
    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(11, 9))
    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(230, 20, as_cmap=True)
    # Draw the heatmap with the mask and correct aspect ratio
    heatmap = sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0, annot=True,
                          square=True, linewidths=.5, cbar_kws={"shrink": .5})
    heatmap.set_title(f"{pair[0]}-{pair[1]} Pairs Trade Correlation")
    fig = heatmap.get_figure()
    fig.savefig(f"out/img/{pair[0]}-{pair[1]}-corr-rnn.jpeg")

    f, ax = plt.subplots(figsize=(11, 9))
    barplot = ann_ret.plot.bar()
    barplot.set_xticklabels(ann_ret.index, rotation=0)
    barplot.set_title(f"{pair[0]}-{pair[1]} Annualized Return")
    fig = barplot.get_figure()
    fig.savefig(f"out/img/{pair[0]}-{pair[1]}-return-rnn.jpeg")
