In [None]:
import warnings
warnings.filterwarnings("ignore")

import vectorbtpro as vbt
import numpy as np
import pandas as pd

vbt.settings.set_theme("dark")
vbt.settings.plotting["layout"]["width"] = 800
vbt.settings.plotting['layout']['height'] = 200

import pandas_ta as ta


# Import the data

In [None]:
btc_90M_db_vbt = vbt.BinanceData.load('data/btc_90M_db_vbt.pkl')

data = btc_90M_db_vbt['2021-01-01':'2023-01-01']
outofsample_data = btc_90M_db_vbt['2023-01-01':'2023-06-03']
print(data.shape)
print(outofsample_data.shape)
# Wherever you saved the pickle file
data_path = '/Users/ericervin/Documents/Coding/data-repository/data/fixed_BTCUSDT.csv'
# min_data = vbt.BinanceData.from_csv(data_path)
# print(min_data.shape)

In [None]:
markov_df = data.get()
markov_df['pct_change'] = markov_df['Close'].pct_change()
markov_df['log_ret'] = np.log(markov_df['Close']).diff()
markov_df['volatility'] = markov_df['log_ret'].rolling(100).std() * np.sqrt(100)
markov_df['volume_chg'] = markov_df['Volume'].pct_change()
markov_df['current_state'] = markov_df['pct_change'].apply(lambda x: 1 if x > 0 else 0) # 1 for up, 0 for down
markov_df = markov_df.dropna()

up_counts   = len(markov_df[markov_df['current_state'] == 1])
down_counts = len(markov_df[markov_df['current_state'] == 0])
up_to_up    = len(markov_df[(markov_df['current_state'] == 1) & (markov_df['current_state'].shift(-1) == 1)])/up_counts
down_to_up  = len(markov_df[(markov_df['current_state'] == 0) & (markov_df['current_state'].shift(-1) == 1)])/down_counts
up_to_down  = len(markov_df[(markov_df['current_state'] == 1) & (markov_df['current_state'].shift(-1) == 0)])/up_counts
down_to_down= len(markov_df[(markov_df['current_state'] == 0) & (markov_df['current_state'].shift(-1) == 0)])/down_counts

transition_matrix = pd.DataFrame({
    "up": [up_to_up, up_to_down],
    "down": [down_to_up, down_to_down]
}, index=["up", "down"])

print(transition_matrix)


In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    entries=np.where(markov_df['current_state']==1,True,False),
    short_entries=np.where(markov_df['current_state']==0,True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

# Now let's make this more robust

### Define the thresholds
For demonstration purposes, I'll use the 25th and 75th percentiles of the positive and negative pct_change values, respectively.

In [None]:
positive_changes = markov_df[markov_df['pct_change'] > 0]['pct_change']
negative_changes = markov_df[markov_df['pct_change'] < 0]['pct_change']

small_up_threshold = positive_changes.quantile(0.75)
large_up_threshold = positive_changes.quantile(0.80)

small_down_threshold = negative_changes.quantile(0.25)
large_down_threshold = negative_changes.quantile(0.20)

print(f"Small up threshold: {small_up_threshold}")
print(f"Large up threshold: {large_up_threshold}")
print(f"Small down threshold: {small_down_threshold}")
print(f"Large down threshold: {large_down_threshold}")

def define_state(x):
    if x > large_up_threshold:
        return "large_up"
    elif x > 0 and x <= large_up_threshold:
        return "small_up"
    elif x < 0 and x >= large_down_threshold:
        return "small_down"
    elif x < large_down_threshold:
        return "large_down"

markov_df['current_state'] = markov_df['pct_change'].apply(define_state)

markov_df['current_state'].value_counts()

### Modify the State Definition
Now we'll categorize the pct_change values based on the thresholds:

### Compute Transition Probabilities
Finally, let's update the transition matrix calculation:

In [None]:
states = ["large_up", "small_up", "small_down", "large_down"]
transition_matrix = pd.DataFrame(index=states, columns=states)

for from_state in states:
    for to_state in states:
        from_count = len(markov_df[markov_df['current_state'] == from_state])
        
        transition_prob = len(markov_df[(markov_df['current_state'] == from_state) & (markov_df['current_state'].shift(-1) == to_state)]) / from_count
        transition_matrix.at[from_state, to_state] = transition_prob

print(transition_matrix)


Have a look at a hypothetical simulation of buying after a large_up or shorting after a large_down

In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    entries=np.where(markov_df['current_state']=='large_up',True,False),
    short_entries=np.where(markov_df['current_state']=='large_down',True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

### Now let's move to Higher order Markov Chain Analysis



In [None]:
markov_df

In [None]:
import pandas as pd
import numpy as np

order = 3  # you can change this value as needed

# drop all columns with future_ or past_ in the name
markov_df = markov_df.loc[:, ~markov_df.columns.str.contains('future_|past_')]

# Create future state columns for the current state and future states
for i in range(1, order):
    markov_df[f'future_state{i}'] = markov_df['current_state'].shift(-i)

# Create a column for the sequence of future states
markov_df['future_sequence'] = markov_df[['current_state'] + [f'future_state{i}' for i in range(1, order)]].apply(lambda row: tuple(row.dropna()), axis=1)

# Create past state columns for the current state and previous states
for i in range(1, order):
    markov_df[f'past_state{i}'] = markov_df['current_state'].shift(i)

# Create a column for the sequence of past states
markov_df['past_sequence'] = markov_df[['current_state'] + [f'past_state{i}' for i in range(1, order)]].apply(lambda row: tuple(x for x in row if pd.notna(x)), axis=1)

# Count transitions for future sequences
future_transition_counts = markov_df.groupby('future_sequence')['current_state'].value_counts().unstack().fillna(0)

# Calculate transition probabilities for future sequences
future_transition_probs = future_transition_counts.div(future_transition_counts.sum(axis=1), axis=0).fillna(0)

# Count transitions for past sequences
past_transition_counts = markov_df.groupby('past_sequence')['current_state'].value_counts().unstack().fillna(0)

# Calculate transition probabilities for past sequences
past_transition_probs = past_transition_counts.div(past_transition_counts.sum(axis=1), axis=0).fillna(0)

# Order the columns
ordered_columns = ['large_up', 'small_up', 'small_down', 'large_down']
future_transition_probs = future_transition_probs[ordered_columns]
past_transition_probs = past_transition_probs[ordered_columns]

# Sort the transition matrices
future_transition_probs.sort_index(inplace=True)
past_transition_probs.sort_index(inplace=True)

# Print the future and past transition matrices
print(future_transition_probs)
print(past_transition_probs)


In [None]:
markov_df

In [None]:


def transition_probabilities(markov_df, state_column, sequence_column):
    transition_counts = {}
    for i, row in markov_df.iterrows():
        state = row[state_column]
        sequence = row[sequence_column]
        if pd.notna(sequence):
            if state not in transition_counts:
                transition_counts[state] = {}
            for seq_state in sequence:
                if seq_state not in transition_counts[state]:
                    transition_counts[state][seq_state] = 1
                else:
                    transition_counts[state][seq_state] += 1

    print(f"Transition counts: {transition_counts}")

    transition_probabilities = {}
    for state, counts in transition_counts.items():
        total = sum(counts.values())
        print(f"State: {state}, Total: {total}")
        probabilities = {seq_state: count / total for seq_state, count in counts.items()}
        transition_probabilities[state] = probabilities

    return pd.DataFrame(transition_probabilities).fillna(0).T



# Proper code


In [None]:
order = 3
# drop all columns with future_ or past_ in the name
markov_df = markov_df.loc[:,~markov_df.columns.str.contains('future_|past_')]

# Create future state columns for the current state and future states
for i in range(order):
    markov_df[f'future_state{i}'] = markov_df['current_state'].shift(-i)

# Generate sequences of future states and count transitions
future_states_df = markov_df[[f'future_state{i}' for i in range(order)]]
future_transition_counts = future_states_df.groupby([f'future_state{i}' for i in range(order - 1)]).future_state2.value_counts().unstack().fillna(0).astype(int)

# Calculate transition probabilities for future sequences
future_transition_probs = future_transition_counts.div(future_transition_counts.sum(axis=1), axis=0)

# Create past state columns for the current state and previous states
for i in range(order):
    markov_df[f'past_state{i}'] = markov_df['current_state'].shift(i)

# Generate sequences of past states and count transitions
past_states_df = markov_df[[f'past_state{i}' for i in range(order)]]
past_transition_counts = past_states_df.groupby([f'past_state{i}' for i in range(order - 1)]).past_state2.value_counts().unstack().fillna(0).astype(int)

# Calculate transition probabilities for past sequences
past_transition_probs = past_transition_counts.div(past_transition_counts.sum(axis=1), axis=0)

# Order the columns
ordered_columns = ['large_up', 'small_up', 'small_down', 'large_down']
future_transition_probs = future_transition_probs[ordered_columns]
past_transition_probs = past_transition_probs[ordered_columns]

# Sort the transition matrices
future_transition_probs.sort_index(inplace=True)
past_transition_probs.sort_index(inplace=True)

# Print the future and past transition matrices
print(future_transition_probs)
print(past_transition_probs)


In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    # entries=np.where(markov_df['current_state']==1,True,False),
    short_entries=np.where((markov_df['past_state0']=='large_down') & (markov_df['past_state1']=='large_up'),True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

In [None]:
# order = 3
# # drop all columns with future_ or past_ in the name
# markov_df = markov_df.loc[:,~markov_df.columns.str.contains('future_|past_')]

# # Create future state columns for the current state and future states
# for i in range(order):
#     markov_df[f'future_state{i}'] = markov_df['current_state'].shift(-i)

# # Generate sequences of future states
# future_sequences = [tuple(markov_df[[f'future_state{i}' for i in range(order)]].iloc[i].dropna().tolist()) for i in range(len(markov_df) - order + 1)]

# # Create past state columns for the current state and previous states
# for i in range(order):
#     markov_df[f'past_state{i}'] = markov_df['current_state'].shift(i)

# # Generate sequences of past states
# past_sequences = [tuple(markov_df[[f'past_state{i}' for i in range(order)]].iloc[i].dropna().tolist()) for i in range(len(markov_df) - order + 1)]

# # Count transitions for future sequences
# future_transition_counts = {}
# for seq in future_sequences:
#     from_states = seq[:-1]
#     to_state = seq[-1]
#     from_states_str = ' -> '.join(map(str, from_states))  # Convert to string
#     if from_states_str not in future_transition_counts:
#         future_transition_counts[from_states_str] = {}
#     if to_state not in future_transition_counts[from_states_str]:
#         future_transition_counts[from_states_str][to_state] = 0
#     future_transition_counts[from_states_str][to_state] += 1

# # Calculate transition probabilities for future sequences
# future_transition_probs = {}
# for from_states_str, to_states in future_transition_counts.items():
#     total_counts = sum(to_states.values())
#     future_transition_probs[from_states_str] = {to_state: count / total_counts for to_state, count in to_states.items()}

# # Convert to DataFrame for future sequences
# future_transition_matrix = pd.DataFrame(future_transition_probs).T.fillna(0)

# # Order the columns
# ordered_columns = ['large_up', 'small_up', 'small_down', 'large_down']
# future_transition_matrix = future_transition_matrix[ordered_columns]

# # Sort the future transition matrix
# future_transition_matrix.sort_index(inplace=True)

# # Print the future transition matrix
# print(future_transition_matrix)


# Plot the transition matrix as a heatmap

In [None]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# def plot_transition_matrix_heatmap(transition_matrix, title="Transition Matrix Heatmap"):
#     fig, ax = plt.subplots(figsize=(15, 200))
#     sns.heatmap(
#         transition_matrix.apply(pd.to_numeric, errors='coerce'),  # Convert values to numeric, coercing errors to NaN
#         annot=True,
#         cmap='coolwarm',
#         fmt=".2f",
#         linewidths=.5,
#         ax=ax
#     )
#     ax.set_title(title)

#     # Create a second x-axis at the top of the plot with the same ticks and labels as the main x-axis
#     ax2 = ax.twiny()
#     ax2.set_xlim(ax.get_xlim())
#     ax2.set_xticks(ax.get_xticks())
#     ax2.set_xticklabels(ax.get_xticklabels())
#     ax2.tick_params(axis='x', direction='in', top=True, bottom=False)

#     plt.show()

# plot_transition_matrix_heatmap(transition_matrix, "Transition Matrix Heatmap")


In [None]:
# Count occurrences for each order-1 sequence
sequence_occurrences = {}
for seq in future_sequences:
    seq_str = ' -> '.join(seq[:order - 1])  # Convert the first 'order - 1' elements of tuple to string
    if seq_str not in sequence_occurrences:
        sequence_occurrences[seq_str] = 0
    sequence_occurrences[seq_str] += 1

# Print the number of occurrences for each order-1 sequence
for seq_str, count in sequence_occurrences.items():
    print(f'{seq_str}: {count}')

# Convert the sequence occurrences to a DataFrame
occurrences_df = pd.DataFrame(list(sequence_occurrences.items()), columns=['sequence', 'occurrences'])

# Display the DataFrame
print(occurrences_df)


In [None]:
markov_df

In [None]:
occurrences_df

In [None]:
future_transition_matrix

In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    # entries=np.where(markov_df['current_state']==1,True,False),
    short_entries=np.where((markov_df['past_state0']=='large_down') & (markov_df['past_state1']=='large_down'),True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    # entries=np.where(markov_df['current_state']==1,True,False),
    short_entries=np.where((markov_df['past_state0']=='large_down'),True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    # entries=np.where(markov_df['current_state']==1,True,False),
    short_entries=np.where(
        (markov_df['past_state0']=='large_down') & 
        (markov_df['past_state1']=='large_down') & 
        (markov_df['past_state2']=='large_down'),
        True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

In [None]:
pf.plot().show()

In [None]:
# Merge transition_matrix and occurrences_df
merged_df = future_transition_matrix.merge(
    occurrences_df,
    left_index=True,
    right_on='sequence',
    how='left'
)

# Fill NaN values in 'count' column with 0
merged_df['occurrences'].fillna(0, inplace=True)
merged_df.set_index('sequence', inplace=True)
# Show the resulting dataframe
print(merged_df)


In [None]:
merged_df

In [None]:
# Query merged_df where sum of small_down and large_down greater than 0.6 and count greater than 100
print(merged_df.query('(small_down + large_down) > 0.65 and occurrences > 100'))
print("\nNumber of up squences with a 40% probability: ")
print(merged_df.query('(small_up + large_up) > 0.40 and occurrences > 100'))

In [None]:
# Query markov_df where the percent change was negative what percent of the time
markov_df.query('pct_change < 0')['pct_change'].count() / markov_df['pct_change'].count()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_transition_matrix_heatmap_with_counts(transition_matrix, title="Transition Matrix Heatmap"):
    fig, ax1 = plt.subplots(figsize=(15, 300))

    # Plot the transition matrix with probabilities
    sns.heatmap(
        transition_matrix.iloc[:, :-1],  # Exclude the 'occurrences' column
        cmap='coolwarm',
        annot=True,
        fmt=".2f",
        linewidths=.5,
        ax=ax1,
        cbar_kws={'label': 'Probability'}
    )
    ax1.set_title(title)

    # Create a second y-axis sharing the same x-axis
    ax2 = ax1.twinx()

    # Plot the occurrences on the second axis
    sns.heatmap(
        transition_matrix[['occurrences']],  # Only the 'occurrences' column
        cmap='YlGnBu',
        annot=True,
        fmt="d",  # Integer format
        linewidths=.5,
        ax=ax2,
        cbar_kws={'label': 'Occurrences'}
    )

    # Hide the second y-axis
    ax2.yaxis.set_visible(False)

    plt.show()

# Plot the transition matrix with occurrences
plot_transition_matrix_heatmap_with_counts(transition_matrix, "Transition Matrix Heatmap with Counts")


# Create a buy or sell signal based on probabilities
identify if the probability of a large down or a large up move is high and enter a -1 or 1 in the signal column of the original dataframe

In [None]:
import numpy as np

print("Number of rows meeting condition for 'sell':", ((merged_df['large_down'] > 0.2) & (merged_df['occurrences'] > 100)).sum())
print("Number of rows meeting condition for 'buy':", ((merged_df['large_up'] > 0.2) & (merged_df['occurrences'] > 100)).sum())


columns_to_drop = ['signal_x', 'signal_y', 'signal']
for col in columns_to_drop:
    if col in markov_df.columns:
        markov_df.drop(columns=col, inplace=True)


# Define conditions
conditions = [
    (merged_df['large_down'] > 0.2) & (merged_df['occurrences'] > 100),
    (merged_df['large_up'] > 0.2) & (merged_df['occurrences'] > 100)
]

# Define choices
choices = ['sell', 'buy']

# Create new column 'signal' with values based on conditions
merged_df['signal'] = np.select(conditions, choices, default='hold')

# Check if 'signal' column exists in merged_df
if 'signal' in merged_df.columns:
    print("Signal column exists in merged_df")

# Determine the order (number of states) dynamically
order = 6
print(f"The Order previously used was {order}")

# Handle NaN values in the state columns
for i in range(order):
    markov_df[f'past_state{i}'] = markov_df[f'past_state{i}'].fillna('')

# Initialize the "markov_chain" column with the first past state
markov_df['markov_chain'] = markov_df['past_state0']

# Concatenate the rest of the past states to "markov_chain" column
for i in range(1, order - 1):
    markov_df['markov_chain'] += ' -> ' + markov_df[f'past_state{i}']

# Merge the signals from merged_df to markov_df based on the 'markov_chain'
markov_df = markov_df.merge(merged_df[['signal']], left_on='markov_chain', right_index=True, how='left')

# Check if 'signal' column exists in markov_df after merge
if 'signal' in markov_df.columns:
    print("Signal column exists in markov_df")

# Handle NaN values in the signal column
markov_df['signal'] = markov_df['signal'].fillna('hold')

# Display the DataFrame
print(merged_df.signal.value_counts())


In [None]:
markov_df

In [None]:
# Convert 'markov_chain' values and index of 'merged_df' to sets
markov_chain_values = set(markov_df['markov_chain'])
index_values = set(merged_df.index)

# Find common values
common_values = markov_chain_values.intersection(index_values)

# Display number of common values and some examples
print("Number of common values:", len(common_values))
if len(common_values) > 0:
    print("Sample common values:", list(common_values)[:5])
else:
    print("No common values found.")


In [None]:
merged_df

In [None]:
markov_df.signal.value_counts()
merged_df.signal.value_counts()


In [None]:
# Drop 'signal', 'signal_x', and 'signal_y' columns if they exist
columns_to_drop = ['signal', 'signal_x', 'signal_y']
for col in columns_to_drop:
    if col in markov_df.columns:
        markov_df = markov_df.drop(columns=col)

# Merge the signals from merged_df to markov_df based on the 'markov_chain'
markov_df = markov_df.merge(merged_df[['signal']], left_on='markov_chain', right_index=True, how='left')

# Check columns in markov_df after the merge operation
print(markov_df.columns)


In [None]:
markov_df.iloc[5].markov_chain

In [None]:
print(np.where(markov_df['signal']=='buy',True,False).sum())

# Sell signals
print(np.where(markov_df['signal']=='sell',True,False).sum())

In [None]:
# Find common values between 'markov_chain' in markov_df and the index of merged_df
common_values = set(markov_df['markov_chain']).intersection(set(merged_df.index))

# Print the number and sample of common values
print("Number of common values:", len(common_values))
print("Sample common values:", list(common_values)[:5])


In [None]:
print(markov_df['markov_chain'].dtype)
print(merged_df.index.dtype)


In [None]:
markov_df['markov_chain'] = markov_df['markov_chain'].str.strip()
merged_df.index = merged_df.index.str.strip()

# Perform the merge
markov_df = markov_df.merge(merged_df[['signal']], left_on='markov_chain', right_index=True, how='left')

# Check for 'buy' and 'sell' signals
print(np.where(markov_df['signal']=='buy', True, False).sum())
print(np.where(markov_df['signal']=='sell', True, False).sum())



In [None]:
pf = vbt.Portfolio.from_signals(
    markov_df['Close'],
    entries=np.where(markov_df['signal']=='buy',True,False),
    short_entries=np.where(markov_df['signal']=='sell',True,False),
    td_stop = 1,
    time_delta_format = 'rows',
    freq = '10T',
    # fees = 0.0004,
)

pf.stats()

In [None]:
pf.plot().show()
