In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mplfinance as mpf
from pyclustering.cluster.silhouette import silhouette_ksearch_type, silhouette_ksearch
from pyclustering.cluster.kmeans import kmeans
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from perceptually_important import find_pips
from sklearn.preprocessing import StandardScaler

In [17]:
def load_data(file_path):
    # Load data from CSV file
    data = pd.read_csv(file_path)
    
    # Convert 'date' column to datetime and set it as the index
    data['date'] = pd.to_datetime(data['date'])
    data = data.set_index('date')
    
    # Take the logarithm of the closing prices
    data = np.log(data)
    
    return data

In [18]:
def find_unique_patterns(data, n_pips, lookback):
    unique_pip_indices = []
    unique_pip_patterns = []
    last_pips_x = [0] * n_pips

    for i in range(lookback - 1, len(data) - 6):
        start_i = i - lookback + 1
        window = data['close'].iloc[start_i: i + 1].to_numpy()
        pips_x, pips_y = find_pips(window, n_pips, 3)
        pips_x = [j + start_i for j in pips_x]

        # Check internal pips to see if it is the same as last
        same = all(pips_x[j] == last_pips_x[j] for j in range(1, n_pips - 1))

        if not same:
            # Z-Score normalize pattern
            pips_y = list(StandardScaler().fit_transform(np.array(pips_y).reshape(-1, 1)).flatten())
            unique_pip_patterns.append(pips_y)
            unique_pip_indices.append(i)

        last_pips_x = pips_x

    return unique_pip_indices, unique_pip_patterns


def kmeans_cluster_patterns(unique_pip_patterns, amount_clusters):
    initial_centers = kmeans_plusplus_initializer(unique_pip_patterns, amount_clusters).initialize()
    kmeans_instance = kmeans(unique_pip_patterns, initial_centers)
    kmeans_instance.process()

    pip_clusters = kmeans_instance.get_clusters()
    cluster_centers = kmeans_instance.get_centers()

    return pip_clusters, cluster_centers


def get_martin(rets):
    rsum = np.sum(rets)
    short = False

    if rsum < 0.0:
        rets *= -1
        rsum *= -1
        short = True

    csum = np.cumsum(rets)
    eq = pd.Series(np.exp(csum))
    sumsq = np.sum(((eq / eq.cummax()) - 1) ** 2.0)
    ulcer_index = (sumsq / len(rets)) ** 0.5
    martin = rsum / ulcer_index

    if short:
        martin = -martin

    return martin


def get_cluster_signals(pip_clusters, data, hold_period):
    cluster_signals = []

    for clust in pip_clusters:
        signal = np.zeros(len(data))
        for mem in clust:
            arr_i = mem
            signal[arr_i: arr_i + hold_period] = 1.

        cluster_signals.append(signal)

    return cluster_signals


def assign_clusters(pip_clusters, cluster_signals, returns):
    selected_long = [np.argmax([get_martin(returns * sig) for sig in cluster_signals])]
    selected_short = [np.argmin([get_martin(returns * sig) for sig in cluster_signals])]

    return selected_long, selected_short


def get_total_performance(pip_clusters, selected_long, selected_short, cluster_signals, returns):

    file_path = 'BTCUSDT3600.csv'
    data = load_data(file_path)

    long_signal = np.zeros(len(data))
    short_signal = np.zeros(len(data))

    for clust_i in range(len(pip_clusters)):
        if clust_i in selected_long:
            long_signal[:len(cluster_signals[clust_i])] += cluster_signals[clust_i]
        elif clust_i in selected_short:
            short_signal[:len(cluster_signals[clust_i])] += cluster_signals[clust_i]

    long_signal /= len(selected_long)
    short_signal /= len(selected_short)
    short_signal *= -1

    rets = (long_signal + short_signal) * returns

    martin = get_martin(rets)
    return martin



def plot_cluster_examples(candle_data, pip_clusters, unique_pip_indices, lookback, n_pips):
    plt.style.use('dark_background')
    grid_size = 5
    fig, axs = plt.subplots(grid_size, grid_size)
    flat_axs = axs.flatten()

    for i in range(len(flat_axs)):
        if i >= len(pip_clusters):
            break

        pat_i = unique_pip_indices[pip_clusters[i][0]]
        data_slice = candle_data.iloc[pat_i - lookback + 1: pat_i + 1]
        idx = data_slice.index
        plot_pip_x, plot_pip_y = find_pips(data_slice['close'].to_numpy(), n_pips, 3)

        pip_lines = []
        colors = []

        for line_i in range(n_pips - 1):
            l0 = [(idx[plot_pip_x[line_i]], plot_pip_y[line_i]),
                  (idx[plot_pip_x[line_i + 1]], plot_pip_y[line_i + 1])]
            pip_lines.append(l0)
            colors.append('w')

        mpf.plot(data_slice, type='candle', alines=dict(alines=pip_lines, colors=colors),
                 ax=flat_axs[i], style='charles', update_width_config=dict(candle_linewidth=1.75))
        flat_axs[i].set_yticklabels([])
        flat_axs[i].set_xticklabels([])
        flat_axs[i].set_xticks([])
        flat_axs[i].set_yticks([])
        flat_axs[i].set_ylabel("")

    fig.suptitle("Cluster Examples", fontsize=32)
    plt.show()


def train_and_evaluate(data, n_pips=5, lookback=24, hold_period=6, n_reps=-1):
    returns = pd.Series(data['close']).diff().shift(-1)
    unique_pip_indices, unique_pip_patterns = find_unique_patterns(data, n_pips, lookback)

    search_instance = silhouette_ksearch(
        unique_pip_patterns, 5, 40, algorithm=silhouette_ksearch_type.KMEANS).process()

    amount = search_instance.get_amount()
    pip_clusters, cluster_centers = kmeans_cluster_patterns(unique_pip_patterns, amount)

    cluster_signals = get_cluster_signals(pip_clusters, data, hold_period)
    selected_long, selected_short = assign_clusters(pip_clusters, cluster_signals, returns)

    fit_martin = get_total_performance(pip_clusters, selected_long, selected_short, cluster_signals, returns)
    print("Fit Martin:", fit_martin)

    if n_reps <= 1:
        return

    # Start monte carlo permutation test
    for rep in range(1, n_reps):
        x = np.diff(data['close']).copy()
        np.random.shuffle(x)
        x = np.concatenate([np.array([data['close'].iloc[0]]), x])
        data['close'] = np.cumsum(x)
        returns = pd.Series(data['close']).diff().shift(-1)

        print("rep", rep)
        unique_pip_indices, unique_pip_patterns = find_unique_patterns(data, n_pips, lookback)
        search_instance = silhouette_ksearch(
            unique_pip_patterns, 5, 40, algorithm=silhouette_ksearch_type.KMEANS).process()
        amount = search_instance.get_amount()
        pip_clusters, cluster_centers = kmeans_cluster_patterns(unique_pip_patterns, amount)
        cluster_signals = get_cluster_signals(pip_clusters, data, hold_period)
        selected_long, selected_short = assign_clusters(pip_clusters, cluster_signals, returns)
        perm_martin = get_total_performance(pip_clusters, selected_long, selected_short, cluster_signals, returns)

        print("Permutation Martin:", perm_martin)


In [19]:
# Load data
data = pd.read_csv('BTCUSDT3600.csv')
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data = np.log(data)

# Filter data
data = data[data.index < '01-01-2020']


In [20]:
# Train and evaluate
train_and_evaluate(data, n_pips=5, lookback=24, hold_period=6, n_reps=-1)

ValueError: operands could not be broadcast together with shapes (43654,) (17350,) 