# Dudley Inference

In [1]:
import pandas as pd

In [2]:
df = pd.read_parquet("amalgamated.parquet")

In [3]:
vars = [
    "BTC-USD_High",
    "BTC-USD_Low",
    "AdrBalNtv0.01Cnt",
    "AdrBalNtv0.1Cnt",
    "AdrBalNtv1Cnt",
    "AdrBalNtv10Cnt",
    "BlkSizeMeanByte",
    "CapRealUSD",
    "FeeByteMeanNtv",
    "FlowInExNtv",
    "FlowOutExNtv",
    "FlowTfrFromExCnt",
    "HashRate",
    "NDF",
    "SplyAct1d",
    "SplyActPct1yr",
    "TxCnt",
    "VelCur1yr",
    'SPY_High',
    'SPY_Low',
    'QQQ_High',
    'QQQ_Low',
    '^IRX_High',
    '^IRX_Low',
    '^TNX_High',
    '^TNX_Low',
    '^TYX_High',
    '^TYX_Low',
    'Global_Liquidity_Index',
    'BTC-USD_High_SMA_5',
    'BTC-USD_Low_SMA_5',
    'BTC-USD_High_SMA_10',
    'BTC-USD_Low_SMA_10',
    'BTC-USD_High_SMA_20',
    'BTC-USD_Low_SMA_20',
    'BTC-USD_High_SMA_50',
    'BTC-USD_Low_SMA_50',
    'BTC-USD_High_SMA_100',
    'BTC-USD_Low_SMA_100'
]

df = df[vars]

In [4]:
# Assuming df is your DataFrame
df = df.pct_change()*100

# The first row will be NaN because there's no previous data to subtract from the first entry
# If you wish to remove the NaN values, you can drop the first row
df.dropna(inplace=True)

df

Unnamed: 0,BTC-USD_High,BTC-USD_Low,AdrBalNtv0.01Cnt,AdrBalNtv0.1Cnt,AdrBalNtv1Cnt,AdrBalNtv10Cnt,BlkSizeMeanByte,CapRealUSD,FeeByteMeanNtv,FlowInExNtv,...,BTC-USD_High_SMA_5,BTC-USD_Low_SMA_5,BTC-USD_High_SMA_10,BTC-USD_Low_SMA_10,BTC-USD_High_SMA_20,BTC-USD_Low_SMA_20,BTC-USD_High_SMA_50,BTC-USD_Low_SMA_50,BTC-USD_High_SMA_100,BTC-USD_Low_SMA_100
2016-06-17,0.211191,2.876146,-0.371156,0.018004,-0.092548,-0.112296,-3.903503,0.436663,-1.724138,-2.189851,...,2.531538,3.315268,2.840253,2.416764,1.996920,2.120650,1.265512,1.122770,0.762469,0.662384
2016-06-18,0.339711,2.424512,0.113330,0.089415,0.074519,-0.012251,-5.396800,0.390686,-8.771930,-29.617663,...,1.690885,2.034674,2.911824,2.545879,1.813320,1.888356,1.237427,1.142061,0.759130,0.691963
2016-06-19,-1.461332,1.594020,-0.060433,-0.145128,-0.009462,0.044686,-18.217238,0.310397,-5.769231,-37.416573,...,1.666283,2.378377,2.673821,2.696944,1.767048,1.861005,1.178457,1.168459,0.716243,0.704217
2016-06-20,-0.330931,-1.730220,0.319801,0.281632,0.289035,0.025215,11.915418,0.121753,20.408163,69.572091,...,1.788421,1.687593,2.611801,2.396472,1.698822,1.740030,1.166869,1.104060,0.710307,0.686563
2016-06-21,-3.690823,-12.781975,-0.114202,0.360448,0.146665,0.121721,14.307934,0.080662,-3.389831,96.345462,...,-0.980851,-1.584752,1.772052,0.886751,1.480998,0.914827,1.049150,0.756481,0.657885,0.480667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-31,-0.277240,-0.965611,-0.097035,-0.077312,-0.085272,-0.167342,-1.100007,0.157324,8.735609,0.186467,...,0.703541,1.192024,0.444699,0.197518,-0.612509,-0.409098,0.076531,0.077755,0.229264,0.310403
2024-02-01,-1.084781,-0.992351,0.000182,0.018156,0.040220,0.029124,1.940695,0.055864,-5.553618,3.048019,...,0.485296,0.213334,0.378514,0.597104,-0.381949,-0.002987,-0.008550,0.057261,0.198046,0.228216
2024-02-02,0.414679,1.683759,-0.002671,-0.013757,0.010982,0.010999,-0.352834,0.065608,-4.640086,-8.800797,...,0.288292,0.421771,0.780518,0.992684,0.022125,0.014609,0.001449,0.038899,0.202442,0.224579
2024-02-03,-0.144043,0.719684,0.035375,0.010893,-0.019217,-0.018114,-0.215535,0.018688,239.317017,-45.462379,...,0.024859,0.507580,0.675998,0.818294,0.034664,0.141726,0.012468,0.056992,0.207841,0.230470


## High Forecast

In [5]:
import torch
import numpy as np
import pandas as pd  # Make sure to import pandas

from gluonts.torch.model.predictor import PyTorchPredictor
from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

# Load the trained model
model_path = 'dudley_high.pth'
trained_predictor = torch.load(model_path)

# Assuming df is your DataFrame with the data
target_column = 'BTC-USD_High'  # Replace with your target column name

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 40  # Set your prediction length
context_length = 40  # Set your prediction length

# Select dynamic features from the DataFrame, excluding the target column
past_dynamic_feature_columns = df.columns.drop(target_column)  # This excludes the target column

# Extract dynamic features as a numpy array
past_dynamic_features = df[past_dynamic_feature_columns].values.transpose()

# Assuming all dynamic features are known in the future, adjust the dimensions accordingly
past_dynamic_dims = [1] * len(past_dynamic_feature_columns)  # Adjust based on actual dynamic features

# Adjust the slicing for dynamic features for the test dataset to ensure correct dimensions
inference_past_dynamic_features_sliced = past_dynamic_features[:, -context_length:]

# Correct forecast start date to the day after the last day in the dataset
forecast_start_date = df.index[-1] + pd.Timedelta(days=1)

# Setup for inference remains the same, assuming the last context_length days are used for input
inference_data = ListDataset([
    {
        "start": forecast_start_date,
        "target": df[target_column][-context_length:].values,
        "past_feat_dynamic_real": inference_past_dynamic_features_sliced
    }
], freq=freq)

# Perform inference using the loaded model
high_p50_predictions = []

for forecast in trained_predictor.predict(inference_data):
    high_p50_predictions.append(forecast.quantile(0.5))  # Median

# Convert lists to numpy arrays for slicing if necessary
high_p50_predictions = np.array(high_p50_predictions).flatten()[:prediction_length]

# Output the size of high_p50_predictions to confirm it matches prediction_length
print(high_p50_predictions.size)


40


## Low Forecast

In [6]:
import torch
import numpy as np
import pandas as pd  # Make sure to import pandas

from gluonts.torch.model.predictor import PyTorchPredictor
from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

# Load the trained model
model_path = 'dudley_low.pth'
trained_predictor = torch.load(model_path)

# Assuming df is your DataFrame with the data
target_column = 'BTC-USD_Low'  # Replace with your target column name

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 40  # Set your prediction length
context_length = 40  # Set your prediction length

# Select dynamic features from the DataFrame, excluding the target column
past_dynamic_feature_columns = df.columns.drop(target_column)  # This excludes the target column

# Extract dynamic features as a numpy array
past_dynamic_features = df[past_dynamic_feature_columns].values.transpose()

# Assuming all dynamic features are known in the future, adjust the dimensions accordingly
past_dynamic_dims = [1] * len(past_dynamic_feature_columns)  # Adjust based on actual dynamic features

# Adjust the slicing for dynamic features for the test dataset to ensure correct dimensions
inference_past_dynamic_features_sliced = past_dynamic_features[:, -context_length:]

# Correct forecast start date to the day after the last day in the dataset
forecast_start_date = df.index[-1] + pd.Timedelta(days=1)

# Setup for inference remains the same, assuming the last context_length days are used for input
inference_data = ListDataset([
    {
        "start": forecast_start_date,
        "target": df[target_column][-context_length:].values,
        "past_feat_dynamic_real": inference_past_dynamic_features_sliced
    }
], freq=freq)

# Perform inference using the loaded model
low_p50_predictions = []

for forecast in trained_predictor.predict(inference_data):
    low_p50_predictions.append(forecast.quantile(0.5))  # Median

# Convert lists to numpy arrays for slicing if necessary
low_p50_predictions = np.array(low_p50_predictions).flatten()[:prediction_length]

# Output the size of low_p50_predictions to confirm it matches prediction_length
print(low_p50_predictions.size)


40


## Predictions

In [7]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# Set the start and end times for data retrieval
start_time = (datetime.now() - timedelta(days=3)).strftime('%Y-%m-%d')  # 3 days ago
end_time = (datetime.now() - timedelta(days=0)).strftime('%Y-%m-%d')  # Yesterday
frequency = "1d"

# Retrieve historical data and store it in a dictionary
btcc_df = yf.download("BTCC-B.TO", start=start_time, end=end_time, interval=frequency)[['High', 'Low']]
btcc_df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-02-02,8.37,8.23


In [8]:
import pandas as pd

# Generate forecast dates starting from the day after the last day in the dataset
forecast_dates = pd.date_range(start=forecast_start_date, periods=prediction_length, freq=freq)

# Ensure predictions are properly shaped
low_p50_predictions = np.array(low_p50_predictions).flatten()
high_p50_predictions = np.array(high_p50_predictions).flatten()


# Check if the length of predictions matches the expected prediction_length
if len(low_p50_predictions) != prediction_length:
    print(f"Warning: The number of predictions ({len(low_p50_predictions)}) does not match the expected prediction_length ({prediction_length}).")

# Create a DataFrame with forecast dates and predictions
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Low_P50_Prediction': low_p50_predictions,
    'High_P50_Prediction': high_p50_predictions
})

# Optionally, set the date as the index of the DataFrame
forecast_df.set_index('Date', inplace=True)

# Display the DataFrame
forecast_df


Unnamed: 0_level_0,Low_P50_Prediction,High_P50_Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-02-05,0.173461,0.431886
2024-02-06,0.49563,-0.15219
2024-02-07,-0.468951,0.651503
2024-02-08,2.349905,1.079908
2024-02-09,0.852555,0.07197
2024-02-10,-0.222038,0.412653
2024-02-11,-3.60782,-0.846677
2024-02-12,0.057257,0.8786
2024-02-13,0.541983,0.530777
2024-02-14,2.489828,-2.355069


In [9]:
import pandas as pd

# Assuming btcc_df and forecast_df are already defined
last_high = btcc_df['High'].iloc[-1]
last_low = btcc_df['Low'].iloc[-1]

future_highs = []
future_lows = []

for _, row in forecast_df.iterrows():
    new_high = last_high * (1 + row['High_P50_Prediction'] / 100)
    new_low = last_low * (1 + row['Low_P50_Prediction'] / 100)

    # Ensure new_low is not greater than new_high
    if new_low > new_high:
        new_low = new_high  # Adjust new_low to match new_high if it's greater

    future_highs.append(new_high)
    future_lows.append(new_low)
    last_high, last_low = new_high, new_low  # Update last known values for the next iteration

future_df = pd.DataFrame({
    'Low': future_lows,
    'High': future_highs
}, index=forecast_df.index)

# Reset btcc_df index to use 'Date' if not already, and concatenate with future_df
btcc_df = btcc_df.reset_index().set_index('Date')
final_df = pd.concat([btcc_df, future_df])

# Sort final_df by the index (Date) to ensure proper order
final_df.sort_index(inplace=True)

# Explicitly set the column order for final_df to ensure 'Low' comes before 'High'
final_df = final_df[['Low', 'High'] + [col for col in final_df.columns if col not in ['Low', 'High']]]

# Add the percentage difference column to final_df
final_df['Percentage Difference'] = ((final_df['High'] - final_df['Low']) / final_df['Low']) * 100

In [10]:
import pandas as pd

# Assuming the rest of your code is defined above and final_df is already created

# Get today's date using pd.Timestamp and define the end date as today + 5 days
today = pd.Timestamp('today').normalize()  # normalize to remove time component
end_date = today + pd.Timedelta(days=5)

# Filter final_df for rows where the index (Date) is between today and the next 5 days
filtered_df = final_df.loc[today:end_date]

# Display the filtered DataFrame
filtered_df


Unnamed: 0_level_0,Low,High,Percentage Difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-02-05,8.244275,8.406149,1.963464
2024-02-06,8.285136,8.393355,1.306182
2024-02-07,8.246283,8.448038,2.446619
2024-02-08,8.440063,8.539269,1.175422
2024-02-09,8.512019,8.545415,0.392338
2024-02-10,8.493119,8.580678,1.030937
