In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import optuna

data = pd.read_csv('combined_dataset.csv')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data

Unnamed: 0,DATE,USD to JPY,USD to EUR,USD to GBP,Close_SP,Adj Close_SP,Volume_SP,NASDAQCOM,Close_DJ,Balance_Curr_Acc,Public_debt,GDP,M2SL,year,month,day_of_week,Int_rate
0,1999-01-04,0.0089,1.1812,1.6581,1228.099976,1228.099976,8.770000e+08,2208.05,9184.27,-262.217000,5.638780e+06,13426.748000,4425.300000,1999.0,1.0,0.0,5.04
1,1999-01-05,0.0090,1.1760,1.6566,1244.780029,1244.780029,7.750000e+08,2251.27,9311.19,-262.217000,5.638780e+06,13426.748000,4425.300000,1999.0,1.0,1.0,4.54
2,1999-01-06,0.0089,1.1636,1.6547,1272.339966,1272.339966,9.869000e+08,2320.86,9544.97,-262.217000,5.638780e+06,13426.748000,4425.300000,1999.0,1.0,2.0,4.23
3,1999-01-07,0.0090,1.1672,1.6495,1269.729980,1269.729980,8.630000e+08,2326.09,9537.76,-262.217000,5.638780e+06,13426.748000,4425.300000,1999.0,1.0,3.0,4.49
4,1999-01-08,0.0090,1.1554,1.6405,1275.089966,1275.089966,9.378000e+08,2344.41,9643.32,-262.217000,5.638780e+06,13426.748000,4425.300000,1999.0,1.0,4.0,4.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6378,2019-04-19,0.0089,1.1246,1.3003,5234.180176,5234.180176,3.374700e+09,16428.82,39475.90,-468.503344,2.217556e+07,20635.508340,14602.527273,2019.0,4.0,4.0,2.44
6379,2020-04-10,0.0092,1.0951,1.2485,5234.180176,5234.180176,3.374700e+09,16428.82,39475.90,-526.065875,2.652844e+07,19196.371953,17259.763636,2020.0,4.0,4.0,0.05
6380,2021-04-02,0.0090,1.1763,1.3825,5234.180176,5234.180176,3.374700e+09,16428.82,39475.90,-822.744578,2.852787e+07,21312.255547,20151.750000,2021.0,4.0,4.0,0.07
6381,2022-04-15,0.0079,1.0812,1.3066,5234.180176,5234.180176,3.374700e+09,16428.82,39475.90,-998.940762,3.062578e+07,21730.854286,21697.657143,2022.0,4.0,4.0,0.33


In [3]:
X = data.drop(['USD to JPY', 'USD to EUR', 'USD to GBP'], axis=1)
y = data['USD to JPY']

In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
import optuna
from sklearn.metrics import mean_squared_error
import keras.backend as K

# Assuming 'data' is your DataFrame and it's already loaded and preprocessed
data['DATE'] = pd.to_datetime(data['DATE'])
data.set_index('DATE', inplace=True)

# Prepare features and target
features = data.drop(['USD to JPY', 'USD to EUR', 'USD to GBP'], axis=1)
target = data['USD to JPY']  # Example for USD to JPY

# Normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features)
scaled_target = scaler.fit_transform(target.values.reshape(-1, 1))

# Create time series data
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps), :]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 10
X, y = create_dataset(scaled_features, scaled_target, time_steps)

# Define the objective function
def objective(trial):
    tscv = TimeSeriesSplit(n_splits=5)
    n_layers = trial.suggest_int('n_layers', 1, 3)
    dropout_rate = trial.suggest_float('dropout_rate', 0, 0.5)
    lstm_units = trial.suggest_categorical('lstm_units', [50, 100, 200])
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    mse_list = []
    for train_index, test_index in tscv.split(X):
        K.clear_session()  # Clear model from memory
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model = Sequential([
            LSTM(units=lstm_units, return_sequences=(n_layers > 1), input_shape=(X_train.shape[1], X_train.shape[2])),
            Dropout(dropout_rate)
        ])
        for _ in range(1, n_layers):
            model.add(LSTM(units=lstm_units, return_sequences=(_ < n_layers - 1)))
            model.add(Dropout(dropout_rate))
        model.add(Dense(1))
        model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')

        model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, y_test))
        preds = model.predict(X_test)
        mse = mean_squared_error(y_test, preds)
        mse_list.append(mse)

    return np.mean(mse_list)

# Run optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Output results
print("Number of finished trials:", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(f"Value (MSE): {trial.value}")
for key, value in trial.params.items():
    print(f"  {key}: {value}")


[I 2024-05-12 00:16:19,566] A new study created in memory with name: no-name-117432e0-28ac-4c96-90b0-4c76ac57c303




[I 2024-05-12 00:20:53,094] Trial 0 finished with value: 0.08257152380018933 and parameters: {'n_layers': 1, 'dropout_rate': 0.15278567155581285, 'lstm_units': 200, 'learning_rate': 0.00977904280747385}. Best is trial 0 with value: 0.08257152380018933.




[I 2024-05-12 00:54:29,241] Trial 1 finished with value: 0.08439046112595623 and parameters: {'n_layers': 3, 'dropout_rate': 0.413739846858296, 'lstm_units': 100, 'learning_rate': 0.00147813015300541}. Best is trial 0 with value: 0.08257152380018933.




[I 2024-05-12 00:56:33,625] Trial 2 finished with value: 0.13323101973990512 and parameters: {'n_layers': 1, 'dropout_rate': 0.000935698655318784, 'lstm_units': 50, 'learning_rate': 0.010345977302036331}. Best is trial 0 with value: 0.08257152380018933.




[I 2024-05-12 01:24:14,932] Trial 3 finished with value: 0.06507510545583546 and parameters: {'n_layers': 2, 'dropout_rate': 0.15210233839398724, 'lstm_units': 100, 'learning_rate': 0.0068273737599531875}. Best is trial 3 with value: 0.06507510545583546.




[I 2024-05-12 01:33:32,777] Trial 4 finished with value: 0.15894096505156158 and parameters: {'n_layers': 1, 'dropout_rate': 0.4542184482972067, 'lstm_units': 100, 'learning_rate': 0.000131663637544384}. Best is trial 3 with value: 0.06507510545583546.




[W 2024-05-12 01:48:05,200] Trial 5 failed with parameters: {'n_layers': 3, 'dropout_rate': 0.0260509997276388, 'lstm_units': 50, 'learning_rate': 0.0035102263114283965} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\aghab\AppData\Local\Programs\Python\Python311\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\aghab\AppData\Local\Temp\ipykernel_7680\2501710226.py", line 61, in objective
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0, validation_data=(X_test, y_test))
  File "c:\Users\aghab\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\aghab\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1742, in fit
    tmp

KeyboardInterrupt: 