In [None]:
!pip install git+https://github.com/macrosynergy/macrosynergy@develop

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os


import macrosynergy.management as msm
import macrosynergy.panel as msp
import macrosynergy.signal as mss
import macrosynergy.pnl as msn


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import warnings

warnings.simplefilter("ignore")

In [None]:
# Cross-sections of interest

cids_dm = ["AUD", "CAD", "CHF", "EUR", "GBP", "JPY", "NOK", "NZD", "SEK", "USD"]
cids_em = [
    "CLP",
    "COP",
    "CZK",
    "HUF",
    "IDR",
    "ILS",
    "INR",
    "KRW",
    "MXN",
    "PLN",
    "THB",
    "TRY",
    "TWD",
    "ZAR",
]
cids = cids_dm + cids_em
cids_du = cids_dm + cids_em
cids_dux = list(set(cids_du) - set(["IDR", "NZD"]))
cids_xg2 = list(set(cids_dux) - set(["EUR", "USD"]))

In [None]:
# Quantamental categories of interest

ecos = [
    "CPIC_SA_P1M1ML12",
    "CPIC_SJA_P3M3ML3AR",
    "CPIC_SJA_P6M6ML6AR",
    "CPIH_SA_P1M1ML12",
    "CPIH_SJA_P3M3ML3AR",
    "CPIH_SJA_P6M6ML6AR",
    "INFTEFF_NSA",
    "INTRGDP_NSA_P1M1ML12_3MMA",
    "INTRGDPv5Y_NSA_P1M1ML12_3MMA",
    "PCREDITGDP_SJA_D1M1ML12",
    "PCREDITBN_SJA_P1M1ML12",
    "RGDP_SA_P1Q1QL4_20QMA",
    "RYLDIRS02Y_NSA",
    "RYLDIRS05Y_NSA",
]
mkts = [
    "DU02YXR_NSA",
    "DU05YXR_NSA",
    "DU02YXR_VT10",
    "DU05YXR_VT10",
    "EQXR_NSA",
    "EQXR_VT10",
    "FXXR_NSA",
    "FXXR_VT10",
    "FXCRR_NSA",
    "FXTARGETED_NSA",
    "FXUNTRADABLE_NSA",
]

xcats = ecos + mkts

In [47]:
# load the dataset:

df = pd.read_csv('JPMaQS_Quantamental_Indicators.csv', index_col=0, parse_dates=['real_date'])

scols = ["cid", "xcat", "real_date", "value"]  # required columns
df = df[scols].copy()
display (df.tail(3))
df.info()

Unnamed: 0,cid,xcat,real_date,value
3350268,ZAR,RGDP_SA_P1Q1QL4_20QMA,2023-08-31,0.534737
3350269,ZAR,RYLDIRS02Y_NSA,2023-08-31,3.230619
3350270,ZAR,RYLDIRS05Y_NSA,2023-08-31,3.543863


<class 'pandas.core.frame.DataFrame'>
Index: 3350271 entries, 0 to 3350270
Data columns (total 4 columns):
 #   Column     Dtype         
---  ------     -----         
 0   cid        object        
 1   xcat       object        
 2   real_date  datetime64[ns]
 3   value      float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 127.8+ MB


In [48]:
display(df['xcat'].unique())
display(df['cid'].unique())
df['ticker'] = df['cid'] + "_" + df["xcat"]
df.set_index('real_date', inplace=True)
df.sort_index(inplace=True)

array(['CPIC_SA_P1M1ML12', 'CPIC_SJA_P3M3ML3AR', 'CPIC_SJA_P6M6ML6AR',
       'CPIH_SA_P1M1ML12', 'CPIH_SJA_P3M3ML3AR', 'CPIH_SJA_P6M6ML6AR',
       'FXTARGETED_NSA', 'FXUNTRADABLE_NSA', 'FXXR_NSA', 'FXXR_VT10',
       'INFTEFF_NSA', 'INTRGDP_NSA_P1M1ML12_3MMA',
       'INTRGDPv5Y_NSA_P1M1ML12_3MMA', 'PCREDITBN_SJA_P1M1ML12',
       'PCREDITGDP_SJA_D1M1ML12', 'RGDP_SA_P1Q1QL4_20QMA',
       'RYLDIRS02Y_NSA', 'RYLDIRS05Y_NSA', 'DU02YXR_NSA', 'DU02YXR_VT10',
       'DU05YXR_NSA', 'DU05YXR_VT10', 'EQXR_NSA', 'EQXR_VT10',
       'FXCRR_NSA'], dtype=object)

array(['AUD', 'CAD', 'CHF', 'CLP', 'COP', 'CZK', 'EUR', 'GBP', 'HUF',
       'IDR', 'ILS', 'INR', 'JPY', 'KRW', 'MXN', 'NOK', 'NZD', 'PLN',
       'SEK', 'THB', 'TRY', 'TWD', 'USD', 'ZAR'], dtype=object)

In [55]:
# LSTM particular implementations

from sklearn.model_selection import TimeSeriesSplit
from keras import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras_tuner import RandomSearch
from keras.metrics import mean_squared_error

# only consider the USD dollars for now
# only did USD for now, can be extended to other currencies
df_USD = df[df["cid"] == 'USD'] 

# make the xcat features into columns
df_USD = df_USD.pivot_table(index='real_date', columns='xcat', values='value', aggfunc='first')

df_USD = df_USD.reset_index()

# deals with missing data
df_USD = df_USD.dropna()

# split into training and test data
tss = TimeSeriesSplit(n_splits = 4)

for train_index, test_index in tss.split(df_USD):
    pass

train_set = df_USD.iloc[train_index]
test_set = df_USD.iloc[test_index]

tss = TimeSeriesSplit(n_splits = 3)

for train_index, test_index in tss.split(train_set):
    pass

development_set = train_set.iloc[train_index]
val_set = df_USD.iloc[test_index]




In [60]:
dev_y = np.array(development_set['DU05YXR_VT10'])
dev_y = dev_y.reshape(dev_y.shape[0],1)

dev_X = np.array(development_set.drop(columns=['DU05YXR_VT10', 'real_date']))
dev_X = dev_X.reshape(dev_X.shape[0],dev_X.shape[1],1)

test_y = np.array(test_set['DU05YXR_VT10'])
test_y = test_y.reshape(test_y.shape[0],1)

test_X = np.array(test_set.drop(columns=['DU05YXR_VT10','real_date']))
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1],1)

val_y = np.array(val_set['DU05YXR_VT10'])
val_y = val_y.reshape(val_set.shape[0],1)

val_X = np.array(val_set.drop(columns=['DU05YXR_VT10','real_date']))
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1],1)

In [61]:
# Hyperparameter training part

def build_model(hp):
  model = Sequential()

  model = Sequential()

  model.add(LSTM(hp.Choice('units', [64, 128, 256]), activation='relu', input_shape=(dev_X.shape[1], 1), return_sequences=True))
  model.add(LSTM(hp.Choice('units', [8, 16, 32]), activation='relu', return_sequences=False))
  model.add(Dense(1))
  model.compile(hp.Choice('optimizer', ["adam", "RMSProp", "SGD"]),loss='mse', metrics=[mean_squared_error])
  return model

In [62]:
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5)

In [63]:
tuner.search(dev_X, dev_y, epochs=10, validation_data=(val_X, val_y))
best_model = tuner.get_best_models()[0]

Trial 5 Complete [00h 00m 10s]
val_loss: 0.4786025881767273

Best val_loss So Far: 0.08929534256458282
Total elapsed time: 00h 02m 47s


In [64]:
# Retrain on the optimal hyperparameter and evaluate on the test set

history = best_model.fit(dev_X, dev_y, epochs=10, validation_data=(val_X, val_y))
records = best_model.evaluate(test_X, test_y)

print("Mean Squared Loss is: " + str(records[1]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Mean Squared Loss is: 0.14177067577838898
