# Petunia

In [1]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import pytz

In [2]:
# Adjusting the start and end times
frequency = "1m"
start_time = (datetime.now(pytz.timezone('UTC')) - timedelta(days=5)).strftime('%Y-%m-%d')
end_time = datetime.now(pytz.timezone('UTC')).strftime('%Y-%m-%d')
print(end_time)

2024-02-11


In [3]:
# Define your tickers, start time, end time, and frequency
tickers = ["MSTR", "ETH-CAD", "SOL-CAD"]

# Retrieve historical data and adjust to Pacific Time
historical_data = {}
for ticker in tickers:
    data = yf.download(ticker, start=start_time, end=end_time, interval=frequency,  progress=True)[['High', 'Low', 'Open', 'Close']]
    # Ensure the index is a DateTimeIndex with timezone information
    if not isinstance(data.index, pd.DatetimeIndex):
        data.index = pd.to_datetime(data.index)
    if data.index.tz is None:
        data.index = data.index.tz_localize('UTC')
    data.index = data.index.tz_convert('US/Pacific')
    historical_data[ticker] = data

# Combine data without weights
df = pd.DataFrame()
for ticker, data in historical_data.items():
    for col in data.columns:
        combined_column_name = f"{ticker}_{col}"
        df[combined_column_name] = data[col]



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [4]:
df = df.pct_change()*10_000
df.dropna(inplace=True)
df.tail()

  df = df.pct_change()*10_000


Unnamed: 0_level_0,MSTR_High,MSTR_Low,MSTR_Open,MSTR_Close,ETH-CAD_High,ETH-CAD_Low,ETH-CAD_Open,ETH-CAD_Close,SOL-CAD_High,SOL-CAD_Low,SOL-CAD_Open,SOL-CAD_Close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-02-09 12:55:00-08:00,-13.572363,-21.707068,-16.66513,-29.101291,-5.090281,-5.090281,-5.090281,-5.090281,5.604595,5.604595,5.604595,5.604595
2024-02-09 12:56:00-08:00,-19.922441,-10.916365,-12.519476,-10.761376,-4.329997,-15.269166,-4.329997,-15.269166,-1.861049,-5.179273,-1.861049,-5.179273
2024-02-09 12:57:00-08:00,-14.237538,-5.424885,-16.249154,-5.579957,-10.285469,0.65916,-10.285469,0.65916,-6.539659,-3.221886,-6.539659,-3.221886
2024-02-09 12:58:00-08:00,-4.338643,-1.549591,-6.973635,2.325807,3.397547,3.397547,3.397547,3.397547,-0.881723,-0.881723,-0.881723,-0.881723
2024-02-09 12:59:00-08:00,42.47981,2.867709,2.867709,33.645851,6.609315,6.609315,6.609315,6.609315,6.97248,6.97248,6.97248,6.97248


In [5]:
import pandas as pd
import numpy as np
import torch

from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

torch.set_float32_matmul_precision('medium')
                                   
# Assuming df is your DataFrame with the data
target_column = 'BTC-CAD_High'  # Replace with your target column name

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 10  # Set your prediction length

# Select dynamic features from the DataFrame, excluding the target column
past_dynamic_feature_columns = df.columns.drop(target_column)  # This excludes the target column

# Extract dynamic features as a numpy array
past_dynamic_features = df[past_dynamic_feature_columns].values.transpose()

# Assuming all dynamic features are known in the future, adjust the dimensions accordingly
past_dynamic_dims  = [1] * len(past_dynamic_feature_columns)  # Adjust based on actual dynamic features

# Make sure the lengths match when creating ListDataset
training_data = ListDataset([
    {
        "start": df.index[0],
        "target": df[target_column][:-prediction_length],
        "past_feat_dynamic_real": past_dynamic_features[:, :-prediction_length]
    }
], freq=freq)

# Adjust the slicing for dynamic features for the test dataset to ensure correct dimensions
test_past_dynamic_features_sliced = past_dynamic_features[:, -prediction_length:]

test_data = ListDataset([
    {
        "start": df.index[-prediction_length],
        "target": df[target_column][-prediction_length:].values,
        "past_feat_dynamic_real": test_past_dynamic_features_sliced
    }
], freq=freq)

# Initialize the Temporal Fusion Transformer Estimator
estimator = TemporalFusionTransformerEstimator(
    freq=freq,
    prediction_length=prediction_length,
    context_length=90,  # Optional: adjust based on your needs
    num_heads=32,
    hidden_dim=1024,
    variable_dim=1024,
    past_dynamic_dims= past_dynamic_dims,
    quantiles=[0.1, 0.5, 0.9],  # Specifying the quantiles for forecasting
    lr=0.001,
    weight_decay=1e-08,
    dropout_rate=0.1,
    patience=10,
    batch_size=128,
    num_batches_per_epoch=100,
    trainer_kwargs={'max_epochs': 5000},  # Adjust 'gpus' based on your setup
)

# Train the model
predictor = estimator.train(training_data)

# Debugging: Print dimensions to verify alignment
print(f"Past Dynamic Features Training Shape: {past_dynamic_features[:, :-prediction_length].shape}")
print(f"Past Dynamic Features Test Shape: {test_past_dynamic_features_sliced.shape}")

# Collect actual and predicted values for evaluation, including percentiles
actuals = df[target_column][-prediction_length:].values
mean_predictions = []
p10_predictions = []
p50_predictions = []
p90_predictions = []

for forecast in predictor.predict(test_data):
    mean_predictions.append(forecast.mean)
    p10_predictions.append(forecast.quantile(0.1))
    p50_predictions.append(forecast.quantile(0.5))  # Median
    p90_predictions.append(forecast.quantile(0.9))

# Convert lists to numpy arrays for slicing
mean_predictions = np.array(mean_predictions).flatten()[:prediction_length]
p10_predictions = np.array(p10_predictions).flatten()[:prediction_length]
p50_predictions = np.array(p50_predictions).flatten()[:prediction_length]
p90_predictions = np.array(p90_predictions).flatten()[:prediction_length]


KeyboardInterrupt: 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming df is your DataFrame with the data, and it's sorted by the index (date)
target_column = 'BTC-USD_High'  # Ensure this matches your actual target column name
prediction_length = 1
# Assuming the DataFrame's index is a datetime index and sorted
end_training = df.index[-1]  # Assuming prediction_length is defined earlier

# Ensure forecast_start_date is a datetime object and exists in df.index
forecast_start_date = end_training 

# Plot the forecast and actual values starting from the forecast start date
plt.figure(figsize=(12, 6))

# Plot actual values from the start of the forecast
actuals_start_index = df.index.get_loc(forecast_start_date)
plt.plot(df.index[actuals_start_index:], df[target_column][actuals_start_index:], label="True values", color="black")

# Assuming mean_predictions, p10_predictions, p50_predictions, p90_predictions are defined from the forecasting model
# Plot forecast values
forecast_index = pd.date_range(start=forecast_start_date, periods=prediction_length, freq=freq)  # freq should be defined as per your data's frequency
plt.plot(forecast_index, mean_predictions, color='red', linestyle='--', label="Forecast (mean)")
plt.fill_between(forecast_index, p10_predictions, p90_predictions, color='red', alpha=0.3, label="P10-P90 interval")
plt.fill_between(forecast_index, p10_predictions, p50_predictions, color='red', alpha=0.5, label="P10-P50 interval")
plt.fill_between(forecast_index, p50_predictions, p90_predictions, color='red', alpha=0.5, label="P50-P90 interval")

# Add a vertical line and other plot elements
plt.axvline(x=forecast_start_date, color='blue', linestyle='--', label='Start of forecast')
plt.legend(loc="upper left", fontsize="large")
plt.title('Forecast vs Actual Values from Forecast Start')
plt.xlabel('Date')
plt.ylabel('Value')
plt.tight_layout()
plt.show()


In [None]:
last_row = df.iloc[-1:, :]
last_row

In [None]:
p50_predictions

In [None]:
# Function to calculate sMAPE
def calculate_smape(forecasts, actuals):
    return 100 * np.mean(2 * np.abs(forecasts - actuals) / (np.abs(actuals) + np.abs(forecasts)))

# Calculate standard evaluation metrics for mean predictions
mae = np.mean(np.abs(mean_predictions - actuals))
rmse = np.sqrt(np.mean(np.square(mean_predictions - actuals)))
mape = np.mean(np.abs((mean_predictions - actuals) / actuals)) * 100
smape = calculate_smape(mean_predictions, actuals)

# Calculate the percentage of actuals within the 10th to 90th percentile range
within_range = np.sum((actuals >= p10_predictions) & (actuals <= p90_predictions)) / len(actuals) * 100

print("Evaluation Metrics for Mean Predictions:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")
print(f"sMAPE: {smape:.2f}%")
print(f"Percentage of Actuals within P10-P90 Interval: {within_range:.2f}%")


In [None]:
import os
import torch

# Define a path to save the model
model_save_path = 'dudley_high.pth'

# Assuming 'predictor' is the trained model from DeepAREstimator
torch.save(predictor, model_save_path)

print(f'Model saved to {model_save_path}')
