In [None]:
import numpy as np
from scipy.signal import argrelextrema
import ccxt
import pandas as pd
import numba as nb
from sklearn.preprocessing import StandardScaler, RobustScaler
pd.options.plotting.backend = "plotly"
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
nb.jit
def create_target(df, long, method='polyfit', polyfit_var='close', pct=0.1):
    if method == 'polyfit':

        trend_list = []
        slope_list = []
        start_list = []
        end_list = []

        index_l = np.arange(long)
        rolling_df = df[polyfit_var].rolling(window=long, min_periods=long)
        for roll in rolling_df:
            if len(roll) < long:
                continue
            slope_array = np.round(np.polyfit(index_l, roll.values, deg=1)[-2], decimals=8)
            slope_list.append(slope_array)
            trend_list.append(np.where(slope_array > 0, 1, np.where(slope_array == 0, 0, -1)).tolist())
            start_list.append(roll.index[0])
            end_list.append(roll.index[long-1])

    y = pd.DataFrame({'trend': trend_list, 'slope': slope_list, 'start_windows': start_list, 'end_windows': end_list})
    return y

In [None]:
def adjusted_sigmoid(x, k=0.5, x0=0):
    """
    Adjusted sigmoid function to map values to the range [-1, 1].
    k controls the steepness of the curve.
    x0 is the midpoint of the sigmoid.
    """
    return 2 / (1 + np.exp(-k * (x - x0))) - 1

In [None]:
def download_1000_candles(market='ADA/USDT', tf='1h'):
    ex = ccxt.binance()
    ohlcv = ex.fetch_ohlcv(market, tf, limit=1001)  # Fetch one extra to ensure 1000 closed candles

    # Build DataFrame
    header = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    ohlcv_df = pd.DataFrame(ohlcv, columns=header)
    ohlcv_df['timestamp'] = pd.to_datetime(ohlcv_df['timestamp'], unit='ms', utc=True)
    # ohlcv_df.set_index('timestamp', inplace=True)

    # Remove the last row to ensure all candles are closed
    ohlcv_df = ohlcv_df.iloc[:-1].copy()

    return ohlcv_df

# download_1000_candles()

In [None]:
candles = download_1000_candles(market='BTC/USDT', tf='1h')
candles

In [None]:
kernel = 6
min_peaks = argrelextrema(
	candles["low"].values, np.less,
	order=kernel
)
max_peaks = argrelextrema(
	candles["high"].values, np.greater,
	order=kernel
)

candles["extrema"] = 0

for mp in min_peaks[0]:
	candles.at[mp, "extrema"] = -1
for mp in max_peaks[0]:
	candles.at[mp, "extrema"] = 1

candles['extrema'] = candles['extrema'].rolling(
	window=3, win_type='gaussian', center=True).mean(std=0.5)

candles['extrema'].fillna(0, inplace=True)

In [None]:
candles['extrema'].value_counts()

In [None]:
candles['extrema'].plot()

In [129]:
import numpy as np
import pandas as pd
from scipy.signal import argrelextrema
import scipy.stats as stats

def extrema_analyze_candles(candles, kernel=6):
    # Find peaks
    min_peaks = argrelextrema(candles["low"].values, np.less, order=kernel)[0]
    max_peaks = argrelextrema(candles["high"].values, np.greater, order=kernel)[0]

    # Count peaks
    num_low_peaks = len(min_peaks)
    num_high_peaks = len(max_peaks)
    total_peaks = num_low_peaks + num_high_peaks

    # Prepare lists for data
    distances = []
    candles_between_peaks = []
    distance_per_candle = []

    for low_peak in min_peaks:
        # Find the next high peak
        next_high_peaks = max_peaks[max_peaks > low_peak]
        if next_high_peaks.size > 0:
            high_peak = next_high_peaks[0]
            low_price = candles.at[low_peak, 'close']
            high_price = candles.at[high_peak, 'close']
            distance_percentage = ((high_price - low_price) / low_price) * 100
            distances.append(distance_percentage)

            # Calculate number of candles between peaks
            num_candles = high_peak - low_peak
            candles_between_peaks.append(num_candles)

            # Calculate distance % per candle
            if num_candles != 0:
                distance_per_candle.append(distance_percentage / num_candles)
            else:
                distance_per_candle.append(0)

    # Describe distances, candles between peaks, and distance per candle
    distances_description = pd.Series(distances).describe()
    candles_between_peaks_description = pd.Series(candles_between_peaks).describe()
    distance_per_candle_description = pd.Series(distance_per_candle).describe()

    print(f"Low peaks: {num_low_peaks}")
    print(f"High peaks: {num_high_peaks}")
    print(f"Total peaks: {total_peaks}")
    print(f"Distances (%): {distances_description}")
    print(f"Candles between peaks: {candles_between_peaks_description}")
    # print(f"Distance per candle (%): {distance_per_candle_description}")

    # Perform correlation analysis
    if len(distances) > 1 and len(candles_between_peaks) > 1:  # Ensure there are enough data points
        correlation, p_value = stats.pearsonr(candles_between_peaks, distances)

        print(f"Correlation between number of candles and return price: {correlation}")
        print(f"P-value of the correlation: {p_value}")

    else:
        print("Not enough data points for correlation analysis.")
    # return num_low_peaks, num_high_peaks, total_peaks, distances_description, candles_between_peaks_description, distance_per_candle_description


In [130]:
def extrema_analyze_candles2(candles, kernel=6):
    # Find peaks
    min_peaks = argrelextrema(candles["low"].values, np.less, order=kernel)[0]
    max_peaks = argrelextrema(candles["high"].values, np.greater, order=kernel)[0]

    # Count peaks
    num_low_peaks = len(min_peaks)
    num_high_peaks = len(max_peaks)
    total_peaks = num_low_peaks + num_high_peaks

    # Prepare lists for upward data
    distances_upward = []
    candles_between_peaks_upward = []

    # Prepare lists for downward data
    distances_downward = []
    candles_between_peaks_downward = []

    # Calculate distances upward (low to high)
    for low_peak in min_peaks:
        next_high_peaks = max_peaks[max_peaks > low_peak]
        if next_high_peaks.size > 0:
            high_peak = next_high_peaks[0]
            low_price = candles.at[low_peak, 'close']
            high_price = candles.at[high_peak, 'close']
            distance_upward = ((high_price - low_price) / low_price) * 100
            distances_upward.append(distance_upward)
            num_candles_upward = high_peak - low_peak
            candles_between_peaks_upward.append(num_candles_upward)

    # Calculate distances downward (high to low)
    for high_peak in max_peaks:
        next_low_peaks = min_peaks[min_peaks > high_peak]
        if next_low_peaks.size > 0:
            low_peak = next_low_peaks[0]
            high_price = candles.at[high_peak, 'close']
            low_price = candles.at[low_peak, 'close']
            distance_downward = ((low_price - high_price) / high_price) * 100
            distances_downward.append(distance_downward)
            num_candles_downward = low_peak - high_peak
            candles_between_peaks_downward.append(num_candles_downward)

    # Describe upward data
    distances_upward_description = pd.Series(distances_upward).describe()
    candles_between_peaks_upward_description = pd.Series(candles_between_peaks_upward).describe()

    # Describe downward data
    distances_downward_description = pd.Series(distances_downward).describe()
    candles_between_peaks_downward_description = pd.Series(candles_between_peaks_downward).describe()

    # Print results
    print(f"Low peaks: {num_low_peaks}, High peaks: {num_high_peaks}, Total peaks: {total_peaks}")
    print(f"Upward Distances (%): {distances_upward_description}")
    print(f"Candles Between Peaks Upward: {candles_between_peaks_upward_description}")
    print(f"Downward Distances (%): {distances_downward_description}")
    print(f"Candles Between Peaks Downward: {candles_between_peaks_downward_description}")

    # Perform correlation analysis for upward movement
    if len(distances_upward) > 1 and len(candles_between_peaks_upward) > 1:
        correlation_upward, p_value_upward = stats.pearsonr(candles_between_peaks_upward, distances_upward)
        print(f"Correlation between number of candles and return price (upward): {correlation_upward}, P-value: {p_value_upward}")

    # Perform correlation analysis for downward movement
    if len(distances_downward) > 1 and len(candles_between_peaks_downward) > 1:
        correlation_downward, p_value_downward = stats.pearsonr(candles_between_peaks_downward, distances_downward)
        print(f"Correlation between number of candles and return price (downward): {correlation_downward}, P-value: {p_value_downward}")

    else:
        print("Not enough data points for correlation analysis.")

    # Return the data for further analysis if needed
    return {
        "upward": {
            "num_low_peaks": num_low_peaks,
            "num_high_peaks": num_high_peaks,
            "total_peaks": total_peaks,
            "distances": distances_upward_description,
            "candles_between_peaks": candles_between_peaks_upward_description,
            "correlation": correlation_upward,
            "p_value": p_value_upward
        },
        "downward": {
            "distances": distances_downward_description,
            "candles_between_peaks": candles_between_peaks_downward_description,
            "correlation": correlation_downward,
            "p_value": p_value_downward
        }
    }


In [143]:
def extrema_analyze_candles3(candles, kernel=6):
    # Find peaks
    min_peaks = argrelextrema(candles["low"].values, np.less, order=kernel)[0]
    max_peaks = argrelextrema(candles["high"].values, np.greater, order=kernel)[0]

    # Prepare lists for upward and downward data
    distances_upward = []
    candles_between_peaks_upward = []
    distances_downward = []
    candles_between_peaks_downward = []

    # Iterate over low peaks to find the next high peak
    for low_peak in min_peaks:
        next_high_peaks = max_peaks[max_peaks > low_peak]
        if next_high_peaks.size > 0:
            high_peak = next_high_peaks[0]
            distance_percentage = ((candles.at[high_peak, 'close'] - candles.at[low_peak, 'close']) / candles.at[low_peak, 'close']) * 100
            distances_upward.append(distance_percentage)
            candles_between_peaks_upward.append(high_peak - low_peak)

    # Iterate over high peaks to find the next low peak
    for high_peak in max_peaks:
        next_low_peaks = min_peaks[min_peaks > high_peak]
        if next_low_peaks.size > 0:
            low_peak = next_low_peaks[0]
            distance_percentage = ((candles.at[low_peak, 'close'] - candles.at[high_peak, 'close']) / candles.at[high_peak, 'close']) * -100
            distances_downward.append(distance_percentage)
            candles_between_peaks_downward.append(low_peak - high_peak)

    # Merge and describe the data
    all_distances = distances_upward + distances_downward
    all_candles_between_peaks = candles_between_peaks_upward + candles_between_peaks_downward
    all_distances_description = pd.Series(all_distances).describe()
    all_candles_between_peaks_description = pd.Series(all_candles_between_peaks).describe()

    # Print merged results
    print(f"Merged Distances (%): {all_distances_description}")
    print(f"Merged Candles Between Peaks: {all_candles_between_peaks_description}")

    # Return the merged data for further analysis if needed
    # return {
    #     "merged_distances": all_distances_description,
    #     "merged_candles_between_peaks": all_candles_between_peaks_description
    # }


In [144]:
extrema_analyze_candles(candles, 6)

Low peaks: 53
High peaks: 54
Total peaks: 107
Distances (%): count    52.000000
mean      1.763008
std       1.794914
min      -1.162591
25%       0.682252
50%       1.234622
75%       2.527502
max       6.739049
dtype: float64
Candles between peaks: count    52.000000
mean     11.019231
std       7.306968
min       1.000000
25%       5.750000
50%       9.500000
75%      16.250000
max      32.000000
dtype: float64
Correlation between number of candles and return price: 0.35286895459135464
P-value of the correlation: 0.01029264997271907


In [145]:
extrema_analyze_candles3(candles, 6)

Merged Distances (%): count    106.000000
mean       1.553823
std        1.604107
min       -1.700418
25%        0.621755
50%        1.219966
75%        2.156576
max        6.739049
dtype: float64
Merged Candles Between Peaks: count    106.000000
mean      10.424528
std        7.179267
min        1.000000
25%        5.250000
50%        8.000000
75%       14.750000
max       32.000000
dtype: float64
