In [28]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import numpy as np


In [29]:
# Leer los dataframes con index_col y parse_dates
train = pd.read_csv('train.csv', index_col='date', parse_dates=True)
validation = pd.read_csv('validation.csv', index_col='date', parse_dates=True)
test = pd.read_csv('test.csv', index_col='date', parse_dates=True)


In [30]:
def shift_data(df, targets, shift=1):
    df_shifted = df.copy()
    df_shifted[targets] = df[targets].shift(-shift)
    return df_shifted.dropna()

target_columns = ['eurusd_pct_change', 'jpyusd_pct_change', 'gbpusd_pct_change']

train = shift_data(train, target_columns, shift=7)
validation = shift_data(validation, target_columns, shift=7)
test = shift_data(test, target_columns, shift=7)


In [31]:
def create_sliding_windows(df, target_columns, window_size, forecast_horizon):
    X, y = [], []
    for i in range(len(df) - window_size - forecast_horizon + 1):
        X.append(df.iloc[i:i+window_size].drop(columns=target_columns))
        y.append(df.iloc[i+window_size:i+window_size+forecast_horizon][target_columns])
    
    X_df = pd.concat(X)
    y_df = pd.concat(y)
    
    return X_df, y_df

# Parameters
window_size = 7
forecast_horizon = 7

X_train, y_train = create_sliding_windows(train, target_columns, window_size, forecast_horizon)
X_val, y_val = create_sliding_windows(validation, target_columns, window_size, forecast_horizon)
X_test, y_test = create_sliding_windows(test, target_columns, window_size, forecast_horizon)


In [32]:
def preprocess_data():
    numeric_features = train.select_dtypes(include=['int64', 'float64']).columns.drop(target_columns)
    categorical_features = train.select_dtypes(include=['object']).columns
    
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])
    
    categorical_transformer = Pipeline(steps=[
        ('encoder', OrdinalEncoder())
    ])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])
    
    return preprocessor


In [33]:
def make_model():
    xgb = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
    model = MultiOutputRegressor(xgb)
    return model


In [34]:
preprocessor = preprocess_data()
model = make_model()

pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', model)])

# Entrenar el modelo con el conjunto de entrenamiento
pipeline.fit(X_train, y_train)


In [36]:
# Predicciones
y_pred_train = pipeline.predict(X_train)
y_pred_val = pipeline.predict(X_val)
y_pred_test = pipeline.predict(X_test)

# Calcular el error cuadrático medio
mse_train = mean_squared_error(y_train, y_pred_train)
mse_val = mean_squared_error(y_val, y_pred_val)
mse_test = mean_squared_error(y_test, y_pred_test)

# Imprimir los resultados sin notación científica
print(f'Train MSE: {mse_train:.10f}')
print(f'Validation MSE: {mse_val:.10f}')
print(f'Test MSE: {mse_test:.10f}')


Train MSE: 0.0000073916
Validation MSE: 0.0000630363
Test MSE: 0.0002041341


In [37]:
X_train.shape

(24143, 25)