In [1]:
# ! .venv\Scripts\pip install darts
# ! .venv\Scripts\pip install dask dask[distributed]
# ! .venv\Scripts\pip install bokeh dask[dataframe] pyarrow

In [1]:
import pandas as pd
df = pd.read_parquet('TRAIN_Reco_2021_2022_2023.parquet.gzip').reset_index()

In [2]:
df['ExecutionTime'] = pd.to_datetime(df['ExecutionTime'])
numerical_columns = ['high', 'low', 'close', 'volume']
df[numerical_columns] = df[numerical_columns].astype('float16')
df.dtypes

ExecutionTime    datetime64[ns, Europe/Berlin]
ID                                      object
high                                   float16
low                                    float16
close                                  float16
volume                                 float16
dtype: object

In [3]:
train_start_date = '2023-01-01'
train_end_date = '2023-09-30'

val_start_date = '2023-10-01'
val_end_date = '2023-12-31'  # Adjust if you have data beyond 2023

# Step 4: Split the data into training and validation sets
train_df = df[(df['ExecutionTime'] >= train_start_date) & (df['ExecutionTime'] <= train_end_date)]
val_df = df[(df['ExecutionTime'] >= val_start_date) & (df['ExecutionTime'] <= val_end_date)]

In [4]:
print(train_df["ID"].nunique(), val_df["ID"].nunique())

672 672


In [5]:
print(train_df.shape, val_df.shape)

(17545248, 6) (5483520, 6)


In [6]:
train_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume
69513,2023-01-01 00:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69514,2023-01-01 00:15:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69515,2023-01-01 00:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69516,2023-01-01 00:45:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69517,2023-01-01 01:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0


In [7]:
train_df.set_index('ExecutionTime', inplace=True)
val_df.set_index('ExecutionTime', inplace=True)

def create_lag_rolling_features(df):
    # Lag features
    for column in ['low', 'high', 'close', 'volume']:
        for lag in range(1, 11):  # Create 10 lags
            df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    
    # Rolling window features (rolling mean of the last 10 periods)
    for column in ['low', 'high', 'close', 'volume']:
        df[f'{column}_rolling_mean_10'] = df[column].rolling(window=10).mean()
    
    return df

In [8]:
# Apply the lag and rolling window function to each asset group separately in the training set
train_df = train_df.groupby('ID', as_index=False).apply(create_lag_rolling_features)

# Apply the lag and rolling window function to each asset group separately in the validation set
val_df = val_df.groupby('ID', as_index=False).apply(create_lag_rolling_features)

  train_df = train_df.groupby('ID', as_index=False).apply(create_lag_rolling_features)
  val_df = val_df.groupby('ID', as_index=False).apply(create_lag_rolling_features)


In [9]:
# Handle missing values resulting from lagging
train_df.dropna(inplace=True)
val_df.dropna(inplace=True)

In [10]:
train_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,low_lag_5,...,volume_lag_5,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10
Unnamed: 0_level_1,ExecutionTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,2023-01-01 02:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2023-01-01 02:45:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2023-01-01 03:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2023-01-01 03:15:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2023-01-01 03:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
train_df = train_df.reset_index()
val_df = val_df.reset_index()

In [15]:
train_df = train_df.drop(columns=["level_0"])
train_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,...,volume_lag_5,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10
0,2023-01-01 02:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2023-01-01 02:45:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2023-01-01 03:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2023-01-01 03:15:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2023-01-01 03:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
val_df = val_df.drop(columns=["level_0"])
val_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,...,volume_lag_5,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10
0,2023-10-01 02:30:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2023-10-01 02:45:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2023-10-01 03:00:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2023-10-01 03:15:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2023-10-01 03:30:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
from sklearn.preprocessing import MinMaxScaler

# List of columns to scale
columns_to_scale = ['high', 'low', 'close', 'volume']

# Create copies of the DataFrames
train_df_scaled = train_df.copy()
val_df_scaled = val_df.copy()

# Dictionary to store scalers for each asset
scalers = {}

# Assets present in training data
assets_in_train = train_df_scaled.index.get_level_values('ID').unique()

for asset in assets_in_train:
    # Training data for this asset
    asset_train_data = train_df_scaled.loc[asset, columns_to_scale]
    
    # Initialize and fit the scaler
    scaler = MinMaxScaler()
    scaled_train_values = scaler.fit_transform(asset_train_data)
    
    # Replace training data with scaled values
    train_df_scaled.loc[asset, columns_to_scale] = scaled_train_values
    
    # Store the scaler
    scalers[asset] = scaler
    
    # Check if the asset exists in validation data
    if asset in val_df_scaled.index.get_level_values('ID'):
        asset_val_data = val_df_scaled.loc[asset, columns_to_scale]
        
        # Transform validation data
        scaled_val_values = scaler.transform(asset_val_data)
        
        # Replace validation data with scaled values
        val_df_scaled.loc[asset, columns_to_scale] = scaled_val_values
    else:
        # Asset not in validation data; no action needed
        pass

# Handle assets present only in validation data
assets_in_val = val_df_scaled.index.get_level_values('ID').unique()
assets_only_in_val = set(assets_in_val) - set(assets_in_train)

for asset in assets_only_in_val:
    print(f"Warning: Asset {asset} is present in validation data but not in training data. Skipping scaling for this asset.")
    # Decide how to handle these assets
    # For example, you could drop them:
    val_df_scaled = val_df_scaled.drop(asset, level='ID')

# # Reset index if necessary
# train_df_scaled = train_df_scaled.reset_index()
# val_df_scaled = val_df_scaled.reset_index()

# Now proceed with your modeling using train_df_scaled and val_df_scaled


KeyError: 'Requested level (ID) does not match index name (None)'

In [15]:
train_df_scaled.shape

(17538528, 49)

In [16]:
train_df_scaled["ID"].nunique()

672

In [None]:
# import gc

# # Delete unused variables
# # del train_df
# del val_df
# # Explicitly call garbage collection
# gc.collect()


In [30]:
new_train_scaled_df = train_df_scaled
new_val_scaled_df = val_df_scaled

In [31]:
new_train_scaled_df['ID_numeric'] = new_train_scaled_df['ID'].astype('category').cat.codes
new_train_scaled_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,low_lag_5,...,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10,ID_numeric
ID,ExecutionTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Fri00Q1,2023-01-01 02:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
Fri00Q1,2023-01-01 02:45:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
Fri00Q1,2023-01-01 03:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
Fri00Q1,2023-01-01 03:15:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
Fri00Q1,2023-01-01 03:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [20]:
# new_train_scaled_df.columns

In [21]:
import pandas as pd
from darts import TimeSeries
from darts.models import RNNModel

  from .autonotebook import tqdm as notebook_tqdm


In [33]:
# # Remove timezone information from the 'ExecutionTime' column
# # new_train_scaled_df['ExecutionTime'] = pd.to_datetime(new_train_scaled_df['ExecutionTime']).dt.tz_localize(None)
# # new_train_scaled_df.index = new_train_scaled_df.index.tz_localize(None)

# # Remove 'ID' from the index, keep 'ExecutionTime' as the index
new_train_scaled_df = new_train_scaled_df.reset_index(level='ID', drop=True)


In [34]:
new_train_scaled_df = new_train_scaled_df.reset_index()
new_train_scaled_df['ExecutionTime'] = pd.to_datetime(new_train_scaled_df['ExecutionTime']).dt.tz_localize(None)

new_train_scaled_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,...,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10,ID_numeric
0,2023-01-01 02:30:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,2023-01-01 02:45:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,2023-01-01 03:00:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,2023-01-01 03:15:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,2023-01-01 03:30:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [38]:
new_val_scaled_df = new_val_scaled_df.reset_index(level='ID', drop=True)
new_val_scaled_df = new_val_scaled_df.reset_index()
new_val_scaled_df['ExecutionTime'] = pd.to_datetime(new_val_scaled_df['ExecutionTime']).dt.tz_localize(None)

new_val_scaled_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,...,volume_lag_5,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10
0,2023-10-01 02:30:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2023-10-01 02:45:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2023-10-01 03:00:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2023-10-01 03:15:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2023-10-01 03:30:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
new_val_scaled_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ID,high,low,close,volume,low_lag_1,low_lag_2,low_lag_3,low_lag_4,low_lag_5,...,volume_lag_5,volume_lag_6,volume_lag_7,volume_lag_8,volume_lag_9,volume_lag_10,low_rolling_mean_10,high_rolling_mean_10,close_rolling_mean_10,volume_rolling_mean_10
ID,ExecutionTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Fri00Q1,2023-10-01 02:30:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fri00Q1,2023-10-01 02:45:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fri00Q1,2023-10-01 03:00:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fri00Q1,2023-10-01 03:15:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Fri00Q1,2023-10-01 03:30:00+02:00,Fri00Q1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
new_val_scaled_df.to_csv('val.csv')
new_train_scaled_df.to_csv('train.csv')

In [25]:
# import gc

# # Delete unused variables
# # del large_unused_dataframe

# # Explicitly call garbage collection
# gc.collect()


In [26]:
# target_columns = ['high', 'low', 'close', 'volume']
# feature_columns = [col for col in new_train_scaled_df.columns if col not in target_columns + ['ID_numeric', 'ExecutionTime']]

# # Step 2: Convert the dataframe into Darts TimeSeries objects
# # TimeSeries for target variables (multivariate time series)
# targets = TimeSeries.from_dataframe(new_train_scaled_df, time_col='ExecutionTime', value_cols=target_columns, fill_missing_dates=True, freq='15T')

# # TimeSeries for covariates (including lags, rolling means, and ID_numeric)
# covariates = TimeSeries.from_dataframe(new_train_scaled_df, time_col='ExecutionTime', value_cols=feature_columns + ['ID_numeric'], fill_missing_dates=True, freq='15T')


In [35]:
# Step 1: Prepare the time series for both targets and covariates for each asset
def create_time_series(df):
    asset_time_series = {}
    asset_covariates = {}
    for asset in df['ID'].unique():
        # Filter the data for each asset
        asset_data = df[df['ID'] == asset]
        
        # Create TimeSeries object for target columns (high, low, close, volume)
        ts = TimeSeries.from_dataframe(asset_data, 'ExecutionTime', 
                                       ['high', 'low', 'close', 'volume'],
                                       fill_missing_dates=True, freq='15T')
        
        # Create TimeSeries object for covariates (lag features and rolling means)
        covariates = TimeSeries.from_dataframe(asset_data, 'ExecutionTime', 
                                               [col for col in df.columns if 'lag' in col or 'rolling_mean' in col],
                                               fill_missing_dates=True, freq='15T')
        
        asset_time_series[asset] = ts
        asset_covariates[asset] = covariates
    return asset_time_series, asset_covariates

# Step 2: Prepare data and create time series for each asset
asset_time_series, asset_covariates = create_time_series(new_train_scaled_df)

  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(fre

In [27]:
# # Features
# X_train = new_train_scaled_df.drop(['ID', 'ExecutionTime', 'high', 'low', 'close', 'volume'], axis=1)

# # Targets
# y_train = new_train_scaled_df[['high', 'low', 'close', 'volume']]


In [28]:
# model = RNNModel(
#     input_chunk_length=15,  # Number of past time steps to use as input
#     output_chunk_length=10,  # Number of future time steps to predict
#     model="LSTM",  # You can choose between "RNN", "LSTM", and "GRU"
#     n_epochs=100,
#     batch_size=32,
#     random_state=42
# )

In [36]:
# Step 3: Run the model for each asset with covariates
def run_model_for_each_asset(asset_time_series, asset_covariates):
    models = {}
    predictions = {}

    for asset, ts in asset_time_series.items():
        covariates = asset_covariates[asset]
        
        # Define the model (LSTM in this case)
        model = RNNModel(input_chunk_length=15, output_chunk_length=10, model="LSTM", n_epochs=10)
        
        # Train the model on the asset's time series and covariates
        model.fit(ts, future_covariates=covariates)
        models[asset] = model
        
        # Predict the next 10 time steps
        prediction = model.predict(10, future_covariates=covariates)
        predictions[asset] = prediction
        print(f"Asset {asset} prediction:\n", prediction)
    
    return models, predictions

# Run the model for each asset with covariates
models, predictions = run_model_for_each_asset(asset_time_series, asset_covariates)

ignoring user defined `output_chunk_length`. RNNModel uses a fixed `output_chunk_length=1`.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 7.5 K  | train
6 | V               | Linear           | 104    | train
-------------------------------------------------------------
7.6 K     Trainable params
0         Non-trainable params
7.6 K     Total params
0.030     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Epoch 10:  92%|█████████▏| 750/815 [00:19<00:01, 38.04it/s, train_loss=nan.0]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Step 3: Run the model for each asset with covariates using custom LSTM
def run_model_for_each_asset_keras(asset_time_series, asset_covariates):
    models = {}
    predictions = {}

    for asset, ts in asset_time_series.items():
        covariates = asset_covariates[asset]

        # Prepare the data (assuming ts and covariates are already preprocessed into X and y)
        X_train = ts.values()  # This assumes ts is pre-processed to extract the values
        y_train = covariates.values()  # Assuming y_train depends on the covariates or other logic

        # Define the Keras LSTM model
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
        model.add(LSTM(50, return_sequences=False))
        model.add(Dense(25))
        model.add(Dense(1))  # Output layer

        # Compile the model
        model.compile(optimizer='adam', loss='mean_squared_error', metrics=["mae"])

        # Train the model
        model.fit(X_train, y_train, batch_size=1, epochs=10)

        models[asset] = model

        # For prediction (you may need to adjust X_pred to match your input setup)
        X_pred = X_train[-10:]  # Assuming you want to predict the next 10 steps from the last 10
        prediction = model.predict(X_pred)
        predictions[asset] = prediction
        print(f"Asset {asset} prediction:\n", prediction)
    
    return models, predictions

# Run the model for each asset with covariates
models, predictions = run_model_for_each_asset_keras(asset_time_series, asset_covariates)
