# Dudley

In [None]:
import pandas as pd

In [None]:
df = pd.read_parquet("amalgamated.parquet")

In [None]:
vars = [
    "BTC-USD_High",
    "BTC-USD_Low",
    "AdrBalNtv0.01Cnt",
    "AdrBalNtv0.1Cnt",
    "AdrBalNtv1Cnt",
    "AdrBalNtv10Cnt",
    "BlkSizeMeanByte",
    "CapRealUSD",
    "FeeByteMeanNtv",
    "FlowInExNtv",
    "FlowOutExNtv",
    "FlowTfrFromExCnt",
    "HashRate",
    "NDF",
    "SplyAct1d",
    "SplyActPct1yr",
    "TxCnt",
    "VelCur1yr",
    'SPY_High',
    'SPY_Low',
    'QQQ_High',
    'QQQ_Low',
    '^IRX_High',
    '^IRX_Low',
    '^TNX_High',
    '^TNX_Low',
    '^TYX_High',
    '^TYX_Low',
    'Global_Liquidity_Index',
    'BTC-USD_High_SMA_5',
    'BTC-USD_Low_SMA_5',
    'BTC-USD_High_SMA_10',
    'BTC-USD_Low_SMA_10',
    'BTC-USD_High_SMA_20',
    'BTC-USD_Low_SMA_20',
    'BTC-USD_High_SMA_50',
    'BTC-USD_Low_SMA_50',
    'BTC-USD_High_SMA_100',
    'BTC-USD_Low_SMA_100'
]

df = df[vars]

In [None]:
# Assuming df is your DataFrame
df = df.diff()

# The first row will be NaN because there's no previous data to subtract from the first entry
# If you wish to remove the NaN values, you can drop the first row
df.dropna(inplace=True)

df

In [None]:
import pandas as pd
import numpy as np
import torch

from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

# Assuming df is your DataFrame with the data
target_column = 'BTC-USD_Low'  # Replace with your target column name

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 40  # Set your prediction length

# Select dynamic features from the DataFrame, excluding the target column
dynamic_feature_columns = df.columns.drop(target_column)  # This excludes the target column

# Extract dynamic features as a numpy array
dynamic_features = df[dynamic_feature_columns].values.transpose()

# Assuming all dynamic features are known in the future, adjust the dimensions accordingly
dynamic_dims = [1] * len(dynamic_feature_columns)  # Adjust based on actual dynamic features

# Make sure the lengths match when creating ListDataset
training_data = ListDataset([
    {
        "start": df.index[0],
        "target": df[target_column][:-prediction_length],
        "feat_dynamic_real": dynamic_features[:, :-prediction_length]
    }
], freq=freq)

# Adjust the slicing for dynamic features for the test dataset to ensure correct dimensions
test_dynamic_features_sliced = dynamic_features[:, -prediction_length*2:]

test_data = ListDataset([
    {
        "start": df.index[-prediction_length],
        "target": df[target_column][-prediction_length:].values,
        "feat_dynamic_real": test_dynamic_features_sliced
    }
], freq=freq)



# Initialize the Temporal Fusion Transformer Estimator
estimator = TemporalFusionTransformerEstimator(
    freq=freq,
    prediction_length=prediction_length,
    context_length=3*prediction_length,  # Optional: adjust based on your needs
    num_heads=8,
    hidden_dim=64,
    variable_dim=64,
    dynamic_dims=dynamic_dims,
    quantiles=[0.1, 0.5, 0.9],  # Specifying the quantiles for forecasting
    lr=0.001,
    weight_decay=1e-08,
    dropout_rate=0.1,
    patience=10,
    batch_size=64,
    num_batches_per_epoch=100,
    trainer_kwargs={'max_epochs': 160},  # Adjust 'gpus' based on your setup
)

# Train the model
predictor = estimator.train(training_data)

# Debugging: Print dimensions to verify alignment
print(f"Dynamic Features Training Shape: {training_dynamic_features.shape}")
print(f"Dynamic Features Test Shape: {test_dynamic_features_sliced.shape}")

# Collect actual and predicted values for evaluation, including percentiles
actuals = df[target_column][-prediction_length:].values
mean_predictions = []
p10_predictions = []
p50_predictions = []
p90_predictions = []

for forecast in predictor.predict(test_data):
    mean_predictions.append(forecast.mean)
    p10_predictions.append(forecast.quantile(0.1))
    p50_predictions.append(forecast.quantile(0.5))  # Median
    p90_predictions.append(forecast.quantile(0.9))

# Convert lists to numpy arrays for slicing
mean_predictions = np.array(mean_predictions).flatten()[:prediction_length]
p10_predictions = np.array(p10_predictions).flatten()[:prediction_length]
p50_predictions = np.array(p50_predictions).flatten()[:prediction_length]
p90_predictions = np.array(p90_predictions).flatten()[:prediction_length]


In [None]:
# Ensure forecast_start_date is a datetime object and exists in df.index
forecast_start_date = end_training + pd.Timedelta(days=1)

# Plot the forecast and actual values starting from the forecast start date
plt.figure(figsize=(12, 6))

# Plot actual values from the start of the forecast
actuals_start_index = df.index.get_loc(forecast_start_date)
plt.plot(df.index[actuals_start_index:], df[target_column][actuals_start_index:], label="True values", color="black")

# Plot forecast values
forecast_index = pd.date_range(start=forecast_start_date, periods=prediction_length, freq=freq)
plt.plot(forecast_index, mean_predictions, color='red', linestyle='--', label="Forecast (mean)")
plt.fill_between(forecast_index, p10_predictions, p90_predictions, color='red', alpha=0.3, label="P10-P90 interval")
plt.fill_between(forecast_index, p10_predictions, p50_predictions, color='red', alpha=0.5, label="P10-P50 interval")
plt.fill_between(forecast_index, p50_predictions, p90_predictions, color='red', alpha=0.5, label="P50-P90 interval")

# Add a vertical line and other plot elements
plt.axvline(x=forecast_start_date, color='blue', linestyle='--', label='Start of forecast')
plt.legend(loc="upper left", fontsize="large")
plt.title('Forecast vs Actual Values from Forecast Start')
plt.xlabel('Date')
plt.ylabel('Value')
plt.tight_layout()
plt.show()


In [None]:
# Function to calculate sMAPE
def calculate_smape(forecasts, actuals):
    return 100 * np.mean(2 * np.abs(forecasts - actuals) / (np.abs(actuals) + np.abs(forecasts)))

# Calculate standard evaluation metrics for mean predictions
mae = np.mean(np.abs(mean_predictions - actuals))
rmse = np.sqrt(np.mean(np.square(mean_predictions - actuals)))
mape = np.mean(np.abs((mean_predictions - actuals) / actuals)) * 100
smape = calculate_smape(mean_predictions, actuals)

# Calculate the percentage of actuals within the 10th to 90th percentile range
within_range = np.sum((actuals >= p10_predictions) & (actuals <= p90_predictions)) / len(actuals) * 100

print("Evaluation Metrics for Mean Predictions:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")
print(f"sMAPE: {smape:.2f}%")
print(f"Percentage of Actuals within P10-P90 Interval: {within_range:.2f}%")


In [None]:
btcc_share = 7111.9523

# Adjust predictions and actuals by dividing by btcc_share
adjusted_mean_predictions = mean_predictions / btcc_share
adjusted_p10_predictions = p10_predictions / btcc_share
adjusted_p50_predictions = p50_predictions / btcc_share
adjusted_p90_predictions = p90_predictions / btcc_share
adjusted_actuals = actuals / btcc_share

# Calculate standard evaluation metrics for adjusted mean predictions
mae = np.mean(np.abs(adjusted_mean_predictions - adjusted_actuals))
rmse = np.sqrt(np.mean(np.square(adjusted_mean_predictions - adjusted_actuals)))
mape = np.mean(np.abs((adjusted_mean_predictions - adjusted_actuals) / adjusted_actuals)) * 100
adjusted_smape = calculate_smape(adjusted_mean_predictions, adjusted_actuals)

# Calculate the percentage of adjusted actuals within the 10th to 90th percentile range
within_range = np.sum((adjusted_actuals >= adjusted_p10_predictions) & (adjusted_actuals <= adjusted_p90_predictions)) / len(adjusted_actuals) * 100

print("Evaluation Metrics for Adjusted Mean Predictions:")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"sMAPE: {adjusted_smape:.2f}%")
print(f"Percentage of Adjusted Actuals within P10-P90 Interval: {within_range:.2f}%")

In [None]:
import os
import torch

# Define a path to save the model
model_save_path = 'dudley_low.pth'

# Assuming 'predictor' is the trained model from DeepAREstimator
torch.save(predictor, model_save_path)

print(f'Model saved to {model_save_path}')
