# Ron Monte Carlo

In [1]:
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import pytz

frequency = "1d"
start_time = "2012-01-01"
end_time = (datetime.now(pytz.timezone('US/Pacific'))).strftime('%Y-%m-%d')

tickers = ["BTC-CAD"]

# Retrieve and concatenate historical data in one step
df = pd.concat(
    [yf.download(ticker, start=start_time, end=end_time, interval=frequency)[['High', 'Low']].add_prefix(f"{ticker}_") for ticker in tickers],
    axis=1
)

# Fill missing data
df.fillna(method='ffill', inplace=True)

[*********************100%%**********************]  1 of 1 completed
  df.fillna(method='ffill', inplace=True)


In [2]:
# Assuming df is your DataFrame after resampling and you've already dropped NA values
df.dropna(inplace=True)

# Add an assertion to ensure there are no NA values in the DataFrame
assert df.isnull().sum().sum() == 0, "DataFrame contains NA values"

df

Unnamed: 0_level_0,BTC-CAD_High,BTC-CAD_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-17,513.446411,497.302246
2014-09-18,502.728729,452.183655
2014-09-19,468.607666,419.985870
2014-09-20,463.974731,427.350739
2014-09-21,452.060120,430.965698
...,...,...
2024-03-04,85708.109375,83437.976562
2024-03-05,93044.843750,84514.890625
2024-03-06,93812.804688,80644.625000
2024-03-07,91916.906250,85369.171875


In [3]:
import pandas as pd
import numpy as np
import torch
import os

from gluonts.dataset.common import ListDataset
from gluonts.torch.model.tft import TemporalFusionTransformerEstimator
from gluonts.transform.feature import MissingValueImputation

torch.set_float32_matmul_precision('high')
                                   
# Assuming df is your DataFrame with the data
target_column_high = 'BTC-CAD_High' 
target_column_low = 'BTC-CAD_Low' 

# Ensure the DataFrame's index is a datetime index and set the frequency explicitly if needed
df.index = pd.to_datetime(df.index)
freq = "D"  # Set the frequency of your data, e.g., 'D' for daily. Adjust as needed.
df = df.asfreq(freq)

# Define the prediction length
prediction_length = 1  # Set your prediction length
context_length = 60

model_high = 'ron_high.pth'
model_low = 'ron_low.pth'

predictor_high = torch.load(model_high)
predictor_low = torch.load(model_low)

In [8]:
%%time

# Current date and target date
current_date = datetime.now(pytz.timezone('US/Pacific'))
target_date = pd.Timestamp('2026-01-16').tz_localize(pytz.timezone('US/Pacific'))

# Modify perform_rolling_prediction to handle timezone-aware datetimes
def perform_rolling_prediction(df, predictor, target_column, context_length, prediction_length, target_date, noise_mean, noise_std_dev):
    rolling_predictions = []
    prediction_dates = []

    temp_df = df.copy()

    # Ensure the DataFrame's index is timezone-aware
    if not temp_df.index.tz:
        temp_df.index = temp_df.index.tz_localize(pytz.timezone('US/Pacific'))

    current_max_date = temp_df.index.max()

    if (target_date - current_max_date).days < 0:
        return f"Target date {target_date} is before the current latest date in the dataset."

    start_prediction_date = current_max_date + pd.Timedelta(1, unit='D')
    prediction_dates = pd.date_range(start=start_prediction_date, end=target_date, freq='D', tz=pytz.timezone('US/Pacific'))
    
    for i, prediction_date in enumerate(prediction_dates):
        end_idx = len(temp_df) - prediction_length + i
        test_data = ListDataset([
            {
                "start": temp_df.index[0],
                "target": temp_df[target_column][:end_idx].values
            }
        ], freq='D')

        forecast = next(predictor.predict(test_data))
        predicted_value = forecast.quantile(0.5)[-1]
        
        # Inject noise into the predicted value
        noise = np.random.normal(noise_mean, noise_std_dev)
        predicted_value_with_noise = predicted_value + noise

        rolling_predictions.append(predicted_value_with_noise)

        # Append the predicted value for future rolling windows
        new_row = pd.DataFrame({target_column: [predicted_value_with_noise]}, index=[prediction_date])
        temp_df = pd.concat([temp_df, new_row])

    return prediction_dates, rolling_predictions


######## Monte-Carlo Simulation Function
def monte_carlo_simulation_for_date(df, predictor_high, predictor_low, target_column_high, target_column_low, context_length, prediction_length, target_date, noise_mean_high, noise_std_dev_high, noise_mean_low, noise_std_dev_low, num_simulations=10):
    simulations = []
    for _ in range(num_simulations):
        # Generate rolling predictions for high with noise
        _, rolling_predictions_high = perform_rolling_prediction(df, predictor_high, target_column_high, context_length, prediction_length, target_date, noise_mean_high, noise_std_dev_high)
        # Generate rolling predictions for low with noise
        _, rolling_predictions_low = perform_rolling_prediction(df, predictor_low, target_column_low, context_length, prediction_length, target_date, noise_mean_low, noise_std_dev_low)

        # Use the last predictions as the current high and low predictions
        high_prediction = rolling_predictions_high[-1]
        low_prediction = rolling_predictions_low[-1]

        #Simulate daily return for High and Low using historical mean and std dev
        #simulated_high_return = np.random.normal(mean_high, std_dev_high)
        #simulated_low_return = np.random.normal(mean_low, std_dev_low)

        # Apply the simulated returns to the predictions
        #simulated_high_price = high_prediction * (1 + simulated_high_return)
        #simulated_low_price = low_prediction * (1 + simulated_low_return)

        # Take the average of high and low simulated prices
        simulated_price = np.mean([high_prediction, low_prediction])
        simulations.append(simulated_price)

    # Calculating the average of all simulations
    average_simulated_price = np.mean(simulations)
    return average_simulated_price

# Calculate daily returns for High and Low
df['BTC-CAD_High_Return'] = df['BTC-CAD_High'].pct_change()
df['BTC-CAD_Low_Return'] = df['BTC-CAD_Low'].pct_change()

# Calculate the mean and standard deviation of daily returns
mean_high = df['BTC-CAD_High_Return'].mean()
std_dev_high = df['BTC-CAD_High_Return'].std()
mean_low = df['BTC-CAD_Low_Return'].mean()
std_dev_low = df['BTC-CAD_Low_Return'].std()

# Ensure both dates are either timezone-aware or timezone-naive
current_date_naive = current_date.replace(tzinfo=None)  # Convert to naive datetime
target_date_naive = target_date.tz_localize(None)  # Convert to naive Timestamp if it's timezone-aware

# Calculate the period from current date to target date
period_to_target = (target_date_naive - current_date_naive).days

# Perform Monte Carlo simulation for the target date using both high and low predictors
num_simulations = 100

simulated_price_for_target_date = monte_carlo_simulation_for_date(
    df, predictor_high, predictor_low, target_column_high, target_column_low, context_length, prediction_length, target_date, num_simulations=num_simulations, noise_mean_high=mean_high, noise_std_dev_high=std_dev_high, noise_mean_low=mean_low, noise_std_dev_low=std_dev_low
)


# Print the simulated price for the target date and the period
print(f"Period from current date to target date: {period_to_target} days")
print(f"Simulated price for {target_date.date()}: {simulated_price_for_target_date}")

Period from current date to target date: 677 days
Simulated price for 2026-01-16: 132467.6701039159
CPU times: total: 24min 51s
Wall time: 24min 58s
