# Representation Learning

En esta notebook se implementa un modelo de representation learning a traves de un multilayer perceptron, la idea es que el modelo aprenda representaciones llenas de significado respecto las variables target. Probaremos por un lado la funcion de perdida del error cuadratico y por otro lado la funcion de perdida dada por el coeficiente de Spearman

In [48]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
import pandas as pd 
from scipy.stats import spearmanr

In [49]:
train = pd.read_csv('data/raw_train_data.csv', index_col = 0)
test = pd.read_csv('data/raw_test_data.csv', index_col = 0)

In [74]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

continuous_features = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'MACD', 'RSI', 'BB_High', 'BB_Mid', 'BB_Low',
    'ATR', 'NATR', 'Currency_Volume', 'Adj Close',
    'BETA', 'TSF_7', 'TSF_14', 'TSF_28', 'Angle_7', 'Angle_14',
    'Angle_28', 'Reg_7', 'Reg_14', 'Reg_28'
]
categorical_features = ['Month', 'Weekday', 'Ticker', 'Currency']

preprocessing_pipeline = ColumnTransformer(transformers = [
    ('continuous', StandardScaler(), continuous_features),
    ('categorical', OneHotEncoder(handle_unknown='ignore', sparse = False), categorical_features)
])

In [75]:
X_train, X_test, y_train, y_test = (
    preprocessing_pipeline.fit_transform(train),
    preprocessing_pipeline.transform(test),
    train.Forward_Return_1m,
    test.Forward_Return_1m
)

In [106]:
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential()
model.add(Dense(16))
model.add(Dense(8))
model.add(Dense(1))
model.compile(
    loss = 'mean_squared_error',
    optimizer = 'adam',
    metrics = [
        'RootMeanSquaredError',
    ]
)

stopping = EarlyStopping(patience = 5, restore_best_weights = True)

In [107]:
model.fit(X_train[:256], y_train[:256], epochs = 1, batch_size = 256, validation_data = (X_test[:1000], y_test[:1000]))



<tensorflow.python.keras.callbacks.History at 0x20f1c655608>

In [112]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [111]:
spearmanr(y_test[:1000], model.predict(X_test[:1000]))

SpearmanrResult(correlation=-0.1871191283680306, pvalue=2.483074249516712e-09)