In [None]:
import pandas as pd
from darts import TimeSeries
from darts.models import RNNModel

In [1]:
import pandas as pd
df = pd.read_parquet('TRAIN_Reco_2021_2022_2023.parquet.gzip').reset_index()

In [2]:
df['ExecutionTime'] = pd.to_datetime(df['ExecutionTime'])
numerical_columns = ['high', 'low', 'close', 'volume']
df[numerical_columns] = df[numerical_columns].astype('float16')
df.dtypes

ExecutionTime    datetime64[ns, Europe/Berlin]
ID                                      object
high                                   float16
low                                    float16
close                                  float16
volume                                 float16
dtype: object

In [3]:
train_start_date = '2023-01-01'
train_end_date = '2023-09-30'

val_start_date = '2023-10-01'
val_end_date = '2023-12-31'  # Adjust if you have data beyond 2023

# Step 4: Split the data into training and validation sets
train_df = df[(df['ExecutionTime'] >= train_start_date) & (df['ExecutionTime'] <= train_end_date)]
val_df = df[(df['ExecutionTime'] >= val_start_date) & (df['ExecutionTime'] <= val_end_date)]

In [12]:
train_df.head()

Unnamed: 0,ExecutionTime,ID,high,low,close,volume
69513,2023-01-01 00:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69514,2023-01-01 00:15:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69515,2023-01-01 00:30:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69516,2023-01-01 00:45:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0
69517,2023-01-01 01:00:00+01:00,Fri00Q1,0.0,0.0,0.0,0.0


In [13]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Assume you have already scaled your data
scaler = MinMaxScaler()

# Fit and transform your training data
train_df_scaled = scaler.fit_transform(train_df[['high', 'low', 'close', 'volume']])

# Transform test data
test_df_scaled = scaler.transform(val_df[['high', 'low', 'close', 'volume']])

# Convert scaled data back to DataFrame (keeping the same column names)
train_df_scaled = pd.DataFrame(train_df_scaled, columns=['high', 'low', 'close', 'volume'], index=train_df.index)
val_df_scaled = pd.DataFrame(test_df_scaled, columns=['high', 'low', 'close', 'volume'], index=val_df.index)

# Retain 'ID' and 'ExecutionTime' in the final DataFrame
train_df_scaled[['ID', 'ExecutionTime']] = train_df[['ID', 'ExecutionTime']]
val_df_scaled[['ID', 'ExecutionTime']] = val_df[['ID', 'ExecutionTime']]

In [14]:
train_df_scaled.head()

Unnamed: 0,high,low,close,volume,ID,ExecutionTime
69513,0.188232,0.486084,0.204712,0.0,Fri00Q1,2023-01-01 00:00:00+01:00
69514,0.188232,0.486084,0.204712,0.0,Fri00Q1,2023-01-01 00:15:00+01:00
69515,0.188232,0.486084,0.204712,0.0,Fri00Q1,2023-01-01 00:30:00+01:00
69516,0.188232,0.486084,0.204712,0.0,Fri00Q1,2023-01-01 00:45:00+01:00
69517,0.188232,0.486084,0.204712,0.0,Fri00Q1,2023-01-01 01:00:00+01:00


In [16]:
# Remove the timezone information
train_df_scaled['ExecutionTime'] = pd.to_datetime(train_df_scaled['ExecutionTime']).dt.tz_localize(None)
val_df_scaled['ExecutionTime'] = pd.to_datetime(val_df_scaled['ExecutionTime']).dt.tz_localize(None)


In [17]:
from darts import TimeSeries
from darts.models import NBEATSModel
import pandas as pd

# Step 1: Prepare the time series for each asset
def create_time_series(df):
    asset_time_series = {}
    for asset in df['ID'].unique():
        # Filter the data for each asset
        asset_data = df[df['ID'] == asset]
        
        # Create a Darts TimeSeries object
        ts = TimeSeries.from_dataframe(asset_data, 'ExecutionTime', 
                                       ['high', 'low', 'close', 'volume'],
                                       fill_missing_dates=True, freq='15T')  # Adjust frequency if needed
        
        asset_time_series[asset] = ts
    return asset_time_series

# Step 2: Create time series for each asset
asset_time_series = create_time_series(train_df_scaled)

  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(freq)
  resampled_time_index = resampled_time_index.asfreq(fre

In [19]:
# Step 3: Create N-BEATS model and run it for all assets
def run_nbeats_model_for_all_assets(asset_time_series):
    predictions = {}
    
    # Initialize the N-BEATS model
    model = NBEATSModel(input_chunk_length=15, output_chunk_length=10, n_epochs=10)
    
    # Combine all the asset time series into one list
    combined_series = list(asset_time_series.values())
    
    # Train the model on all the series
    model.fit(combined_series)
    
    # Predict the next 10 steps for all series
    for asset, ts in asset_time_series.items():
        prediction = model.predict(n=10, series=ts)
        predictions[asset] = prediction
        print(f"Asset {asset} prediction:\n", prediction)
    
    return predictions

# Step 4: Run the N-BEATS model for all assets
predictions = run_nbeats_model_for_all_assets(asset_time_series)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | stacks          | ModuleList       | 6.5 M  | train
-------------------------------------------------------------
6.5 M     Trainable params
1.6 K     Non-trainable params
6.5 M     Total params
25.939    Total estimated model params size (MB)
396       Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 1899/547869 [06:32<31:21:28,  4.84it/s, train_loss=nan.0] 


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined