# Ron High

In [None]:
import pandas as pd

In [None]:
df = pd.read_parquet("yahoo-finance.parquet")

In [None]:
yfinance_metrics = [
    "BTC-CAD_High",
    "BTC-CAD_Low",
]

df = df[yfinance_metrics]

In [None]:
# Assuming df is your DataFrame after resampling and you've already dropped NA values
df.dropna(inplace=True)

# Add an assertion to ensure there are no NA values in the DataFrame
assert df.isnull().sum().sum() == 0, "DataFrame contains NA values"

df

In [None]:
import pandas as pd
import numpy as np
import torch

from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

torch.set_float32_matmul_precision('high')
                                   
# Assuming df is your DataFrame with the data
target_column = 'BTC-CAD_High'  # Replace with your target column name

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 1  # Set your prediction length

# Make sure the lengths match when creating ListDataset
training_data = ListDataset([
    {
        "start": df.index[0],
        "target": df[target_column][:-100]    
    }
], freq=freq)

test_data = ListDataset([
    {
        "start": df.index[-100],
        "target": df[target_column][-100:].values
    }
], freq=freq)

# Initialize the Temporal Fusion Transformer Estimator
estimator = TemporalFusionTransformerEstimator(
    freq=freq,
    prediction_length=prediction_length,
    context_length=60,  # Optional: adjust based on your needs
    num_heads=32,
    hidden_dim=1024,
    variable_dim=1024,
    quantiles=[0.1, 0.5, 0.9],  # Specifying the quantiles for forecasting
    lr=0.001,
    weight_decay=1e-08,
    dropout_rate=0.1,
    patience=10,
    batch_size=128,
    num_batches_per_epoch=100,
    trainer_kwargs={'max_epochs': 1000},  # Adjust 'gpus' based on your setup
)

# Train the model
predictor = estimator.train(training_data)


In [None]:
# Collect actual and predicted values for evaluation, including percentiles
actuals = df[target_column][-prediction_length:].values

mean_predictions = []
p10_predictions = []
p50_predictions = []
p90_predictions = []

for forecast in predictor.predict(test_data):
    p10_predictions.append(forecast.quantile(0.1))
    p50_predictions.append(forecast.quantile(0.5))  # Median
    p90_predictions.append(forecast.quantile(0.9))

# Convert lists to numpy arrays for slicing
p10_predictions = np.array(p10_predictions).flatten()[:prediction_length]
p50_predictions = np.array(p50_predictions).flatten()[:prediction_length]
p90_predictions = np.array(p90_predictions).flatten()[:prediction_length]

In [None]:
last_row = df.iloc[-prediction_length:, :1]
last_row

In [None]:
print(p10_predictions, p50_predictions, p90_predictions)

In [None]:
# Function to calculate sMAPE
def calculate_smape(forecasts, actuals):
    return 100 * np.mean(2 * np.abs(forecasts - actuals) / (np.abs(actuals) + np.abs(forecasts)))

# Calculate standard evaluation metrics for mean predictions
mae = np.mean(np.abs(p50_predictions - actuals))
mape = np.mean(np.abs((p50_predictions - actuals) / actuals)) * 100
smape = calculate_smape(p50_predictions, actuals)

# Calculate the percentage of actuals within the 10th to 90th percentile range
within_range = np.sum((actuals >= p10_predictions) & (actuals <= p90_predictions)) / len(actuals) * 100

print("Evaluation Metrics for Mean Predictions:")
print(f"MAE: {mae:.2f}")
print(f"MAPE: {mape:.2f}%")
print(f"sMAPE: {smape:.2f}%")
print(f"Percentage of Actuals within P10-P90 Interval: {within_range:.2f}%")


In [None]:
import os
import torch

# Define a path to save the model
model_save_path = 'ron_high_huge.pth'

# Assuming 'predictor' is the trained model from DeepAREstimator
torch.save(predictor, model_save_path)

print(f'Model saved to {model_save_path}')


In [None]:
import pandas as pd
import numpy as np
from gluonts.dataset.common import ListDataset
# [other imports]

# [Assuming your DataFrame 'df' and the target_column are already defined]
# [Also assuming the estimator and model training are already done and you have 'predictor']

prediction_length = 1  # Forecasting 30 days into the future
context_length = 60  # The length of the history to consider for the prediction

# Rolling window prediction function
def perform_rolling_prediction(df, predictor, target_column, context_length, prediction_length):
    rolling_predictions = []
    prediction_dates = []
    temp_df = df.copy()  # Create a copy of the dataframe to modify

    # Create dates for predictions
    start_prediction_date = temp_df.index.max() + pd.Timedelta(1, unit='D')
    prediction_dates = pd.date_range(start=start_prediction_date, periods=30, freq='D')
    
    for i, prediction_date in enumerate(prediction_dates):
        end_idx = len(temp_df) - prediction_length + i
        test_data = ListDataset([
            {
                "start": temp_df.index[0],
                "target": temp_df[target_column][:end_idx].values
            }
        ], freq='D')

        forecast = next(predictor.predict(test_data))
        predicted_value = forecast.quantile(0.5)[-1]
        rolling_predictions.append(predicted_value)
        
        # Append the predicted value for future rolling windows
        new_row = pd.DataFrame({target_column: [predicted_value]}, index=[prediction_date])
        temp_df = pd.concat([temp_df, new_row])
    
    return prediction_dates, rolling_predictions

# Perform rolling predictions for the next 30 days
prediction_dates, rolling_predictions = perform_rolling_prediction(df, predictor, target_column, context_length, prediction_length)

# Print the rolling predictions
print("Rolling Predictions for the next 30 days:")
for date, prediction in zip(prediction_dates, rolling_predictions):
    print(f"{date.strftime('%Y-%m-%d')}: {prediction}")
