In [51]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [52]:
df = pd.read_csv('./input/copper_log_returns_5d_final.csv')
df.describe()

Unnamed: 0,LMCADS03,LMCADY,DXY,SPX,BCOM,MXWD,XAU,XAG,LMCADY_acu_5d_log,LMCADY_std_5d_log
count,5550.0,5550.0,5550.0,5550.0,5550.0,5550.0,5550.0,5550.0,5550.0,5550.0
mean,0.000331,0.000331,6e-06,0.000326,-5e-06,0.000258,0.000343,0.000321,0.001644,0.014202
std,0.016023,0.016427,0.004791,0.011739,0.010211,0.009828,0.010703,0.019365,0.034657,0.008692
min,-0.104003,-0.10358,-0.027263,-0.127652,-0.064023,-0.099967,-0.095121,-0.203851,-0.252004,0.000803
25%,-0.007485,-0.007702,-0.002649,-0.00393,-0.005347,-0.003789,-0.004888,-0.008032,-0.016593,0.008627
50%,0.0,0.0,0.0,0.00039,0.0,0.000644,0.000509,0.000791,0.00231,0.012146
75%,0.008706,0.008933,0.00265,0.005488,0.00558,0.004882,0.006018,0.009851,0.021674,0.017555
max,0.118805,0.117259,0.025199,0.109572,0.056475,0.089019,0.102451,0.131802,0.191786,0.092058


In [53]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df.drop(['Date', 'LMCADY_std_5d_log', 'LMCADY_acu_5d_log'], axis=1))
print(scaled_features.shape)
scaled_features

(5550, 8)


array([[0.55227417, 0.55884327, 0.72437164, ..., 0.6517713 , 0.45227945,
        0.63091865],
       [0.52567224, 0.53006073, 0.42503138, ..., 0.54897833, 0.55693906,
        0.6515204 ],
       [0.51527382, 0.5154988 , 0.42642861, ..., 0.62711759, 0.48144791,
        0.61189368],
       ...,
       [0.44600323, 0.44822604, 0.71993335, ..., 0.48498343, 0.44097188,
        0.58567092],
       [0.45143493, 0.45403385, 0.52509919, ..., 0.5433245 , 0.56421267,
        0.65969574],
       [0.52193192, 0.52924947, 0.65677493, ..., 0.46569592, 0.42103467,
        0.54740291]])

In [54]:
def crearSecuencias(data, n_steps):
    X, y = [], []
    try:
        data = data.values  # Asegurarse de que 'data' es un array de NumPy
    except:
        pass
    for i in range(n_steps, len(data)):
        X.append(data[i-n_steps:i, :-2])  # las variables excepto los target
        y.append(data[i, -2:])            # los target
    return np.array(X), np.array(y)

In [55]:
n_steps = 25  # ventana modificable
X, y = crearSecuencias(scaled_features, n_steps)
(X.shape, y.shape)


((5525, 25, 6), (5525, 2))

In [56]:
# verificar que haya secuencia
print(X[0])
print(X[1])

[[0.55227417 0.55884327 0.72437164 0.67578672 0.74159531 0.6517713 ]
 [0.52567224 0.53006073 0.42503138 0.53606691 0.64989338 0.54897833]
 [0.51527382 0.5154988  0.42642861 0.63179794 0.39756355 0.62711759]
 [0.44606453 0.44874882 0.63149789 0.51042887 0.50893837 0.4821214 ]
 [0.51086559 0.51327568 0.38913311 0.478309   0.5652901  0.45140603]
 [0.51586387 0.52114596 0.5327604  0.61909187 0.64695536 0.6078601 ]
 [0.41770099 0.41621381 0.38644071 0.53806236 0.41382434 0.54391078]
 [0.49820187 0.49829303 0.56480515 0.53215019 0.56169979 0.53296103]
 [0.51686972 0.51875981 0.47076197 0.56261193 0.52738562 0.56611812]
 [0.49630121 0.4964232  0.54413481 0.4768538  0.61115836 0.47153276]
 [0.48546653 0.49150519 0.41593264 0.52145604 0.60195686 0.51290021]
 [0.42530544 0.42670955 0.47422343 0.47860195 0.52504532 0.46285678]
 [0.52951787 0.53168539 0.50449486 0.53810781 0.5313222  0.50310721]
 [0.46147725 0.46429241 0.47407852 0.47139088 0.54641322 0.47080182]
 [0.48796562 0.49131745 0.51966674

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0, shuffle=False)


In [58]:
y_test

array([[0.50216753, 0.60677057],
       [0.49401487, 0.58090082],
       [0.43348774, 0.6024514 ],
       ...,
       [0.44097188, 0.58567092],
       [0.56421267, 0.65969574],
       [0.42103467, 0.54740291]])

In [59]:
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

In [60]:
model = Sequential([

    Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(filters=64, kernel_size=2, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Conv1D(filters=32, kernel_size=2, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),

    Flatten(),

    Dense(100, activation='relu'),
    Dropout(0.5),

    Dense(50, activation='relu'),
    Dropout(0.5),

    Dense(2)
    
])

initial_learning_rate = 0.0001
optimizer = Adam(learning_rate=initial_learning_rate)

model.compile(optimizer=optimizer, loss='mse', metrics=[rmse])


In [61]:
history = model.fit(X_train, y_train, epochs=50, verbose=1, validation_split=0.15,batch_size=64)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [62]:
loss = model.evaluate(X_test, y_test)
print(f'Loss on test data: {loss}')


Loss on test data: [0.0276373028755188, 0.16521000862121582]


In [63]:
# imprimir solo los primeros elementos de los arreglos dobles

print(y_test[0][:])



[0.50216753 0.60677057]


In [64]:
y_test.std()

0.07702583079247924