<h3>Forex Prediction: Recursive Neural Networks (RNNs)

In [2]:
import pandas as pd
import numpy as np
from data_functions import *

In [3]:
all_data, cc_dict, countries =load_and_process_forex_data()
all_data

Unnamed: 0,Date,EURO/US$,UNITED KINGDOM POUND/US$,YEN/US$,YUAN/US$,AUSTRALIAN DOLLAR/US$
0,2000-01-03,0.9847,0.6146,101.70,8.2798,1.5172
1,2000-01-04,0.9700,0.6109,103.09,8.2799,1.5239
2,2000-01-05,0.9676,0.6092,103.77,8.2798,1.5267
3,2000-01-06,0.9686,0.6070,105.19,8.2797,1.5291
4,2000-01-07,0.9714,0.6104,105.17,8.2794,1.5272
...,...,...,...,...,...,...
5211,2019-12-24,0.9022,0.7719,109.38,7.0064,1.4445
5213,2019-12-26,0.9007,0.7688,109.67,6.9949,1.4411
5214,2019-12-27,0.8949,0.7639,109.47,6.9954,1.4331
5215,2019-12-30,0.8915,0.7610,108.85,6.9864,1.4278


In [4]:
def create_data_dict_currency(data,countries,currency_dict):

    def extract_currency_data(data, currency):
            """
            Extract a specific country's data from the main dataframe.
            
            Parameters:
            data (DataFrame): The main dataframe containing all countries' data
            country_name (str): The name of the country to extract data for
            
            Returns:
            DataFrame: A dataframe containing only the specified country's data with date column
            """
            country_data = data[['Date',currency]].copy()
            
            # Ensure date column is included and properly formatted
            
            return country_data

    data_dict = {}
    for country in countries:
        data_dict[currency_dict[country]] = extract_currency_data(data,currency_dict[country])
    
    return data_dict    

In [5]:
data_dict = create_data_dict_currency(all_data,countries,cc_dict)
data_dict['EURO/US$']

Unnamed: 0,Date,EURO/US$
0,2000-01-03,0.9847
1,2000-01-04,0.9700
2,2000-01-05,0.9676
3,2000-01-06,0.9686
4,2000-01-07,0.9714
...,...,...
5211,2019-12-24,0.9022
5213,2019-12-26,0.9007
5214,2019-12-27,0.8949
5215,2019-12-30,0.8915


In [6]:
# Train-test splitting:
df = data_dict['EURO/US$'].copy()
if 'Date' in df.columns:
    df = df.sort_values('Date').reset_index(drop=True)

test_df = df.tail(60).reset_index(drop=True)
train_df = df.iloc[:-60].reset_index(drop=True)

<h5>Pre-processing to carry out:</h5>


- Scaling (will use a RobustScaler())
- Time window creation.

In [8]:
from sklearn.preprocessing import RobustScaler

def robust_scale_train_test(train_df, test_df, drop_date=True):
    """
    Fit RobustScaler on train_df (drop Date column if present) and transform both train and test.
    Returns: scaler, train_scaled_df, test_scaled_df
    """
    if drop_date and 'Date' in train_df.columns:
        X_train = train_df.drop(columns=['Date']).copy()
    else:
        X_train = train_df.copy()
    if drop_date and 'Date' in test_df.columns:
        X_test = test_df.drop(columns=['Date']).copy()
    else:
        X_test = test_df.copy()

    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train.values)
    X_test_scaled = scaler.transform(X_test.values)

    train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
    test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

    return scaler, train_scaled_df, test_scaled_df

In [9]:
eur_scaler, train_scaled_df, test_scaled_df = robust_scale_train_test(train_df,test_df)

In [10]:
# Create windows:

def LSTM_input(df, input_sequence):
    """
    Generate supervised learning sequences from a time-ordered dataset
    for one-step-ahead LSTM forecasting.

    This function converts a time-series DataFrame into sliding input
    sequences (X) and corresponding target values (y), suitable for
    training a many-to-one LSTM model.

    For each sample:
        - X contains `input_sequence` consecutive past observations
        - y contains the immediately following observation

    The function assumes the data is:
        - Ordered in ascending chronological order (oldest → newest)
        - Free of non-numeric columns (e.g. time columns removed)
        - Already scaled or normalised, if required

    Parameters
    ----------
    df : pandas.DataFrame
        Time-series data containing one or more numeric features.
        Shape: (n_samples, n_features).
        The index or original time column is not used by this function.

    input_sequence : int
        Number of past time steps to include in each input sequence
        (i.e. the LSTM lookback window).

    Returns
    -------
    X : numpy.ndarray
        Array of input sequences with shape:
            (n_samples - input_sequence, input_sequence, n_features)

    y : numpy.ndarray
        Array of target values with shape:
            (n_samples - input_sequence, n_features)

    Notes
    -----
    - This function performs one-step-ahead forecasting.
    - Each target value corresponds to the time step immediately
      following its input sequence.
    - The function does not shuffle data and preserves temporal order.
    - The function does not perform any scaling or missing-value handling.

    Example
    -------
    >>> X, y = Sequential_Input_LSTM(df_scaled, input_sequence=28)
    >>> X.shape
    (num_samples, 28, num_features)
    >>> y.shape
    (num_samples, num_features)
    """
    df_np = df.to_numpy()
    X = []
    y = []
    
    for i in range(len(df_np) - input_sequence):
        row = [a for a in df_np[i:i + input_sequence]]
        X.append(row)
        label = df_np[i + input_sequence]
        y.append(label)
        
    return np.array(X), np.array(y)