# Ron Monte Carlo

In [1]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import pytz

frequency = "1d"
start_time = (datetime.now(pytz.timezone('US/Pacific')) - timedelta(days=720)).strftime('%Y-%m-%d')
end_time = (datetime.now(pytz.timezone('US/Pacific'))).strftime('%Y-%m-%d')

tickers = ["BTC-CAD"]

# Retrieve and concatenate historical data in one step
df = pd.concat(
    [yf.download(ticker, start=start_time, end=end_time, interval=frequency)[['High', 'Low']].add_prefix(f"{ticker}_") for ticker in tickers],
    axis=1
)

[*********************100%%**********************]  1 of 1 completed


In [2]:
# Add an assertion to ensure there are no NA values in the DataFrame
assert df.isnull().sum().sum() == 0, "DataFrame contains NA values"

df

Unnamed: 0_level_0,BTC-CAD_High,BTC-CAD_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-22,54283.960938,51562.402344
2022-03-23,53872.656250,52716.457031
2022-03-24,55361.382812,53687.386719
2022-03-25,56350.390625,54836.527344
2022-03-26,55864.074219,55099.636719
...,...,...
2024-03-06,93812.804688,80644.625000
2024-03-07,91916.906250,85369.171875
2024-03-08,91648.546875,88748.687500
2024-03-09,94348.257812,89181.023438


In [3]:
import pandas as pd
import numpy as np
import torch
import os

from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

torch.set_float32_matmul_precision('high')
                                   
# Assuming df is your DataFrame with the data
target_column_high = 'BTC-CAD_High' 
target_column_low = 'BTC-CAD_Low' 

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 1  # Set your prediction length
context_length = 60
volatility_window = 60

model_high = 'ron_high_huge.pth'
model_low = 'ron_low_huge.pth'

predictor_high = torch.load(model_high)
predictor_low = torch.load(model_low)

In [5]:
%%time


# Modify perform_rolling_prediction to handle timezone-aware datetimes
def perform_rolling_prediction(df, predictor, target_column, context_length, prediction_length, target_date, noise_mean, noise_std_dev, volatility_window=60):
    rolling_predictions = []
    prediction_dates = []

    temp_df = df.copy()

    if not temp_df.index.tz:
        temp_df.index = temp_df.index.tz_localize(pytz.timezone('US/Pacific'))

    current_max_date = temp_df.index.max()

    if (target_date - current_max_date).days < 0:
        return "Target date is before the current latest date in the dataset."

    start_prediction_date = current_max_date + pd.Timedelta(1, unit='D')
    prediction_dates = pd.date_range(start=start_prediction_date, end=target_date, freq='D', tz=pytz.timezone('US/Pacific'))
    
    for i, prediction_date in enumerate(prediction_dates):
        end_idx = len(temp_df) - prediction_length + i
        test_data = ListDataset([{"start": temp_df.index[0], "target": temp_df[target_column][:end_idx].values}], freq='D')

        forecast = next(predictor.predict(test_data))
        predicted_value = forecast.quantile(0.5)[-1]
        
        # Calculate dynamic noise based on recent market volatility
        recent_volatility = temp_df[target_column][-volatility_window:].pct_change().std()
        noise = np.random.laplace(noise_mean, recent_volatility / np.sqrt(2))

        predicted_value_with_noise = predicted_value + noise
        rolling_predictions.append(predicted_value_with_noise)

        new_row = pd.DataFrame({target_column: [predicted_value_with_noise]}, index=[prediction_date])
        temp_df = pd.concat([temp_df, new_row])

    return prediction_dates, rolling_predictions


######## Monte-Carlo Simulation Function
def monte_carlo_simulation_for_date(df, predictor_high, predictor_low, target_column_high, target_column_low, context_length, prediction_length, target_date, noise_mean_high, noise_std_dev_high, noise_mean_low, noise_std_dev_low, num_simulations=10):
    high_simulations = []
    low_simulations = []
    for _ in range(num_simulations):
        # Generate rolling predictions for high with noise
        _, rolling_predictions_high = perform_rolling_prediction(df, predictor_high, target_column_high, context_length, prediction_length, target_date, noise_mean_high, noise_std_dev_high, volatility_window)
        # Generate rolling predictions for low with noise
        _, rolling_predictions_low = perform_rolling_prediction(df, predictor_low, target_column_low, context_length, prediction_length, target_date, noise_mean_low, noise_std_dev_low, volatility_window)

        high_prediction = rolling_predictions_high[-1]
        low_prediction = rolling_predictions_low[-1]

        simulated_high_return = np.random.laplace(mean_high, std_dev_high)
        simulated_low_return = np.random.laplace(mean_low, std_dev_low)

        simulated_high_price = high_prediction * (1 + (0.1 * simulated_high_return))
        simulated_low_price = low_prediction * (1 + (0.1 * simulated_low_return))

        high_simulations.append(simulated_high_price)
        low_simulations.append(simulated_low_price)

    # Calculate confidence intervals
    high_lower_bound = np.percentile(high_simulations, 2.5)
    high_upper_bound = np.percentile(high_simulations, 97.5)
    low_lower_bound = np.percentile(low_simulations, 2.5)
    low_upper_bound = np.percentile(low_simulations, 97.5)

    return np.mean(high_simulations), high_lower_bound, high_upper_bound, np.mean(low_simulations), low_lower_bound, low_upper_bound







################## DATE #################
(current_date) = datetime.now(pytz.timezone('US/Pacific'))
target_date = pd.Timestamp('2024-09-20-23:00').tz_localize(pytz.timezone('US/Pacific'))





# Formatting dates for better readability
formatted_current_date = current_date.strftime('%Y-%m-%d %H:%M:%S %Z')
formatted_target_date = target_date.strftime('%Y-%m-%d %H:%M:%S %Z')

# Improved print statement
print(f"Current Date and Time (US/Pacific): {formatted_current_date}")
print(f"Target Date and Time (US/Pacific): {formatted_target_date}")

# Calculate daily returns for High and Low
df['BTC-CAD_High_Return'] = df['BTC-CAD_High'].pct_change()
df['BTC-CAD_Low_Return'] = df['BTC-CAD_Low'].pct_change()

# Calculate the mean and standard deviation of daily returns
mean_high = df['BTC-CAD_High_Return'].mean()
std_dev_high = df['BTC-CAD_High_Return'].std()
mean_low = df['BTC-CAD_Low_Return'].mean()
std_dev_low = df['BTC-CAD_Low_Return'].std()

# Ensure both dates are either timezone-aware or timezone-naive
current_date_naive = current_date.replace(tzinfo=None)  # Convert to naive datetime
target_date_naive = target_date.tz_localize(None)  # Convert to naive Timestamp if it's timezone-aware

# Calculate the period from current date to target date
period_to_target = (target_date_naive - current_date_naive).days

# Perform Monte Carlo simulation for the target date using both high and low predictors
num_simulations = 100

# Perform Monte Carlo simulation for the target date using both high and low predictors
high_mean, high_lower_ci, high_upper_ci, low_mean, low_lower_ci, low_upper_ci = monte_carlo_simulation_for_date(
    df, predictor_high, predictor_low, target_column_high, target_column_low, context_length, prediction_length, target_date, noise_mean_high=mean_high, noise_std_dev_high=std_dev_high, noise_mean_low=mean_low, noise_std_dev_low=std_dev_low, num_simulations=num_simulations
)

# Print the results
print(f"High prediction for {target_date.date()}: Mean = {high_mean}, 95% CI = {high_lower_ci} to {high_upper_ci}")
print(f"Low prediction for {target_date.date()}: Mean = {low_mean}, 95% CI = {low_lower_ci} to {low_upper_ci}")

Current Date and Time (US/Pacific): 2024-03-11 08:53:16 PDT
Target Date and Time (US/Pacific): 2024-09-20 23:00:00 PDT
High prediction for 2024-09-20: Mean = 106591.92811434185, 95% CI = 105477.67142744947 to 107591.37354085234
Low prediction for 2024-09-20: Mean = 88532.98197198134, 95% CI = 88017.49999570764 to 89196.12513829369
CPU times: total: 7min 2s
Wall time: 7min 6s
