In [None]:
from bot import get_historical_data
import pandas as pd
from binance.client import Client
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, make_scorer, precision_score
from itertools import combinations

In [None]:
transaction_costs = 0.0005

# Start investment is EURO!

In [None]:
# Fetch Bitcoin prices at 5-minute intervals for the last 40 days
symbol = 'BTCEUR'
days = 10000

bitcoin_data_hour = get_historical_data(symbol, Client.KLINE_INTERVAL_1HOUR, days)
#bitcoin_data_5minute = get_historical_data(symbol, Client.KLINE_INTERVAL_5MINUTE, days)

In [None]:
bitcoin_data_hour["pct"] = bitcoin_data_hour["Close"].pct_change().shift(-1)    #time shift ist WICHTIG! einer der häufigsten Fehler, wenn er vergessen wird
#bitcoin_data_5minute["pct"] = bitcoin_data_5minute["Close"].pct_change().shift(-1)

In [None]:
bitcoin_data_hour.dropna(inplace=True)
#bitcoin_data_5minute.dropna(inplace=True)

In [None]:
bitcoin_data_hour.head()

In [None]:
df_hour = bitcoin_data_hour.copy()
#df_5minute = bitcoin_data_5minute
df_hour.head()

In [None]:
def sharpe_ratio(s: pd.Series):
    sharpe = s.mean() / s.std()
    print(s.mean(), s.std())
    return sharpe

In [None]:
#Funktion um zu zählen wie häufig ein buy Signal hinter einander auftritt
def count_consecutive_ones(data: pd.DataFrame, signal_col: str):
    new_column_name = signal_col + "_count"
    
    count = 0
    counts = []
    
    for value in data[signal_col]:
        if value == 1:
            count += 1
        else:
            count = 0
        counts.append(count)
    
    data[new_column_name] = counts

### Indikatoren und Indexe berechnen

In [None]:
def OBV_berechnen(data:pd.DataFrame):
    #benötigt ein DataFrame mit den Spalten "4a. close (EUR)" und "5. volume"
    data["OBV"] = (np.sign(data["Close"].pct_change()) * data["Volume"]).cumsum()
    
def SMA_berechnen(data:pd.DataFrame, intervall:int):
    spalten_name = "SMA_"+str(intervall)
    data[spalten_name] = data["Close"].rolling(intervall).mean()
    spalten_name_sig = "SMA_"+str(intervall)+"_Sig"
    data[spalten_name_sig] = (data["Close"]>data[spalten_name]).astype(int)
    print(data[spalten_name_sig].mean())
    count_consecutive_ones(data, spalten_name_sig)
    
def RSI_berechnen(data:pd.DataFrame, intervall:int):

    spalten_name = "RSI_"+str(intervall)

    # Bestimme die Preisänderung zum jeweiligen Zeitpunkt t-1
    delta = data["Close"].diff()

    # Get rid of the first row, which has NaN values
    delta = delta[1:]

    # Calculate the gains and losses
    up = delta.where(delta > 0, 0)
    down = -delta.where(delta < 0, 0)

    # Calculate the rolling average of the gains and losses
    #window_size = 14 #als default
    avg_gain = up.rolling(intervall).mean()
    avg_loss = down.rolling(intervall).mean()

    # Calculate the relative strength
    rs = avg_gain / avg_loss

    # Calculate the RSI
    data[spalten_name] = 100 - (100 / (1 + rs))
    
def EMA_berechnen(data: pd.DataFrame, intervall: int):
    spalten_name = "EMA_" + str(intervall)
    data[spalten_name] = data["Close"].ewm(span=intervall, adjust=False).mean()
    spalten_name_sig = "EMA_"+str(intervall)+"_Sig"
    data[spalten_name_sig] = (data["Close"]>data[spalten_name]).astype(int)
    count_consecutive_ones(data, spalten_name_sig)

def HMA_berechnen(data: pd.DataFrame, intervall: int):
    spalten_name = "HMA_" + str(intervall)
    half_length = int(intervall / 2)
    sqrt_length = int(np.sqrt(intervall))

    wma_half = data["Close"].rolling(window=half_length).mean()
    wma_full = data["Close"].rolling(window=intervall).mean()

    raw_hma = 2 * wma_half - wma_full
    data[spalten_name] = raw_hma.rolling(window=sqrt_length).mean()
    
    spalten_name_sig = "HMA_"+str(intervall)+"_Sig"
    data[spalten_name_sig] = (data["Close"]>data[spalten_name]).astype(int)
    count_consecutive_ones(data, spalten_name_sig)

def MACD_berechnen(data: pd.DataFrame, fast_period: int = 12, slow_period: int = 26, signal_period: int = 9):
    data["MACD"] = data["Close"].ewm(span=fast_period, adjust=False).mean() - data["Close"].ewm(span=slow_period, adjust=False).mean()
    data["MACD_Signal"] = data["MACD"].ewm(span=signal_period, adjust=False).mean()

def Momentum_berechnen(data: pd.DataFrame, intervall: int):
    spalten_name = "Momentum_" + str(intervall)
    data[spalten_name] = data["Close"].diff(intervall)

def Stochastic_RSI_berechnen(data: pd.DataFrame, intervall: int):
    spalten_name = "StochRSI_" + str(intervall)

    delta = data["Close"].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=intervall).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=intervall).mean()

    RS = gain / loss
    RSI = 100 - (100 / (1 + RS))

    min_RSI = RSI.rolling(window=intervall).min()
    max_RSI = RSI.rolling(window=intervall).max()

    data[spalten_name] = (RSI - min_RSI) / (max_RSI - min_RSI)

In [None]:
indicators = []
dt_indicators = []
OBV_berechnen(df_hour)
MACD_berechnen(df_hour)
dt_indicators.extend([f'MACD_Signal'])
for i in [5, 10, 24, 50, 100, 500, 1000]:
    SMA_berechnen(df_hour, i)
    RSI_berechnen(df_hour, i)
    EMA_berechnen(df_hour, i)
    HMA_berechnen(df_hour, i)
    Momentum_berechnen(df_hour, i)
    Stochastic_RSI_berechnen(df_hour, i)
    indicators.extend([f"RSI_{i}", f"EMA_{i}", f"HMA_{i}", f"Momentum_{i}", f"StochRSI_{i}"])
    dt_indicators.extend([f"RSI_{i}", f"SMA_{i}_Sig", f"EMA_{i}_Sig",# f"HMA_{i}_Sig", 
                          f"Momentum_{i}"#, f"StochRSI_{i}"
                         ])

In [None]:
#berechnet performance für das gewollte Zeitfenster
def create_labels(data: pd.DataFrame, window: int):
    data["future_return"] = data["Close"].shift(-window) / data["Close"] - 1
    data["label"] = (data["future_return"] > 0.00).astype(int)
    data.dropna(inplace=True)

In [None]:
from sklearn import tree
def train_decision_tree(data: pd.DataFrame, features: list, target: str, threshold: float):
    transaction_cost = 0.0005
    # Indizes zurücksetzen, um Probleme mit der Indizierung zu vermeiden
    data = data.reset_index(drop=True)
    
    # Aufteilen der Daten in Trainings- und Testdaten
    train_size = int(len(data) * 0.8)
    train_data = data.iloc[:train_size]
    test_data = data.iloc[train_size:]
    X_train = train_data[features]
    X_test = test_data[features]
    y_train = train_data[target]
    y_test = test_data[target]

    # Definiere den Parameterbereich für GridSearch
    param_grid = {
        #'criterion': ['gini', 'entropy'],
        'max_depth': [5, 10, 20, 30],
        #'min_samples_split': [2, 5, 10],
        #'min_samples_leaf': [5, 10, 20, 30],
        #'max_features': [None, 'sqrt', 'log2']
    }
    
    # Definiere den Precision-Scorer
    precision_scorer = make_scorer(precision_score, pos_label=1)
    
    # Initialisiere den GridSearchCV
    grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
                               param_grid=param_grid,
                                cv=3,  # Anzahl der Cross-Validation-Folds
                                scoring=precision_scorer,  # Bewertungskriterium
                                n_jobs=-1,  # Nutze alle verfügbaren CPU-Kerne
                                verbose=2)  # Ausgabe von Fortschrittsinformationen

     # Führe GridSearch aus
    grid_search.fit(X_train, y_train)
    
    # Ausgabe der besten Parameter
    print("Beste Parameter:", grid_search.best_params_)
    
    # Modell mit den besten Parametern
    best_clf = grid_search.best_estimator_
    
    # Vorhersagen auf Testdaten
    #y_pred = best_clf.predict(X_test)
    
    #best_clf = DecisionTreeClassifier(max_depth=2, random_state=42)
    #best_clf.fit(X_train, y_train)

    # Get the predicted probabilities
    y_pred_proba = best_clf.predict_proba(X_test)
    #print(y_pred_proba.shape)
    # Set y_pred to 1 if the probability of class 1 is greater than 0.8, otherwise set it to 0
    #threshold = 0.5
    y_pred = (y_pred_proba[:, 1] > threshold).astype(int)
    #print(y_pred.shape)
    #print(y_test)
    #print(y_pred_proba)
    #print(classification_report(y_test, y_pred))
    #y_pred = clf.predict(X_test)
    # Berechne die Sharpe Ratio
    
    test_returns = data.loc[X_test.index, "pct"]
    print(f'sharpe test {sharpe_ratio(test_returns)}')
    # Initialize strat_returns with test_returns values
    strat_returns = test_returns.copy()
    # Set the rows in strat_returns to 0 where y_pred equals 0
    strat_returns[y_pred == 0] = 0
    
    # Calculate transaction costs: a cost occurs whenever there is a change in the position (buy/sell)
    positions = np.diff(np.concatenate([[0], y_pred]))  # Calculate position changes
    transaction_costs = np.abs(positions) * transaction_cost  # Apply transaction costs
    #print(strat_returns)
    sharpe = sharpe_ratio(strat_returns)
    print(f"Sharpe Ratio without costs: {sharpe}")
    strat_returns -= transaction_costs  # Deduct transaction costs from strategy returns
    #print(strat_returns)
    print(f"number of transactions {np.sum(np.abs(positions))}")
    #print(f"Shape of positions: {positions.shape}")
    #print(f"Shape of y_pred: {y_pred.shape}")
    sharpe = sharpe_ratio(strat_returns)
    print(f"Sharpe Ratio with costs: {sharpe}")
    
    # Performance DataFrame erstellen
    PF = pd.DataFrame(data["pct"].iloc[X_test.index])
    PF["pred"] = y_pred
    PF["strategy_returns"] = PF["pred"] * PF["pct"]
    PF["cumulative_strategy_returns"] = (PF["strategy_returns"] + 1).cumprod()

    PF["benchmark_returns"] = PF["pct"]
    PF["cumulative_benchmark_returns"] = (PF["benchmark_returns"] + 1).cumprod()
    
    # Calculate transaction indices
    transaction_indices = np.where(np.abs(positions) > 0)[0]
    
    # Adjust transaction_indices to match PF index
    transaction_indices = transaction_indices[transaction_indices < len(PF)]
    
    # Plot results
    plt.figure(figsize=(14, 7))
    plt.plot(PF["cumulative_strategy_returns"], label="Strategy Returns")
    plt.plot(PF["cumulative_benchmark_returns"], label="Benchmark Returns")
    
    # Add dots where transactions occur
    plt.scatter(PF.index[transaction_indices], PF["cumulative_strategy_returns"].iloc[transaction_indices],
                color='red', marker='o', label='Transactions')
    
    plt.title("Cumulative Returns with Transactions")
    plt.xlabel("Time")
    plt.ylabel("Cumulative Returns")
    plt.legend()
    plt.show()
    
    return best_clf, sharpe

In [None]:
def find_best_indicators(data: pd.DataFrame, indicators: list, target: str, threshold: float):
    best_sharpe = -np.inf
    best_combination = None
    best_model = None

    for r in range(1, len(indicators) + 1):
        for combo in combinations(indicators, r):
            if len(combo)<5:
                continue
            print(combo)
            features = list(combo)
            clf, sharpe = train_decision_tree(data, features, target, threshold)
            if sharpe > best_sharpe:
                best_sharpe = sharpe
                best_combination = combo
                best_model = clf

    print(f"Best Sharpe Ratio: {best_sharpe}")
    print(f"Best Indicator Combination: {best_combination}")

    return best_model, best_combination, best_sharpe

In [None]:
# Labels erstellen
create_labels(df_hour, window=1)

df_hour.dropna()

print(df_hour.head())

### threshold = 0.5

In [None]:
#dt_indicators.extend([f'EMA_{5}', f'EMA_{50}', f'EMA_{500}'])
# Liste der Indikatoren
features = ["SMA_100_Sig", "EMA_100", "SMA_500", "EMA_500", "Momentum_100", "Momentum_500", "RSI_100", "RSI_500", "SMA_100_Sig_count", "EMA_100_Sig_count"]#["OBV", "RSI_5", "RSI_10"]#, "RSI_50", "RSI_50", "MACD", "MACD_Signal", "Momentum_10", "StochRSI_10", "StochRSI_20", "StochRSI_50"]


# Beste Indikatoren finden
best_model, best_combination, best_sharpe = find_best_indicators(df_hour, features, "label", threshold = 0.5)

In [None]:
# Best Sharpe Ratio: 0.03856916901080272
# Best Indicator Combination: ('SMA_100_Sig', 'EMA_100', 'Momentum_100', 'Momentum_500', 'RSI_500')
# Beste Parameter: {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 10}
# 9.206389028191728e-05 0.005648386026191348
# sharpe test 0.01629914985537826
# 5.4330651286781826e-05 0.003247057379067564
# Sharpe Ratio: 0.016732273238233814

In [None]:
#dt_indicators.extend([f'EMA_{5}', f'EMA_{50}', f'EMA_{500}'])
# Liste der Indikatoren
features = ["SMA_100_Sig", "EMA_100", "SMA_500", "EMA_500", "SMA_100_Sig_count", "EMA_100_Sig_count", "Momentum_100", "Momentum_500", "RSI_500"]#["OBV", "RSI_5", "RSI_10"]#, "RSI_50", "RSI_50", "MACD", "MACD_Signal", "Momentum_10", "StochRSI_10", "StochRSI_20", "StochRSI_50"]


# Beste Indikatoren finden
best_model, best_combination, best_sharpe = find_best_indicators(df_hour, features, "label", threshold = 0.5)

In [None]:
# Best Sharpe Ratio: 0.038928980946217204
# Best Indicator Combination: ('SMA_100_Sig', 'EMA_100', 'SMA_500', 'EMA_500', 'SMA_100_Sig_count', 'RSI_500')

### threshold = 0.6

In [None]:
#dt_indicators.extend([f'EMA_{5}', f'EMA_{50}', f'EMA_{500}'])
# Liste der Indikatoren
features = ["SMA_100_Sig", "EMA_100", "SMA_500", "EMA_500", "SMA_100_Sig_count", "EMA_100_Sig_count", "Momentum_100", "Momentum_500", "RSI_500"]#["OBV", "RSI_5", "RSI_10"]#, "RSI_50", "RSI_50", "MACD", "MACD_Signal", "Momentum_10", "StochRSI_10", "StochRSI_20", "StochRSI_50"]


# Beste Indikatoren finden
best_model, best_combination, best_sharpe = find_best_indicators(df_hour, features, "label", threshold = 0.6)

In [None]:
# Best Sharpe Ratio: 0.02696321379463089
# Best Indicator Combination: ('SMA_100_Sig', 'EMA_100', 'SMA_500', 'EMA_500', 'SMA_100_Sig_count', 'RSI_500')

man könnte probieren, zu zählen wie häufig die Buy-Signale bei den Averages hintereinander aufkommen und das dann in den DT übergeben. 

In [None]:
df_hour["Signals"] = (df_hour["SMA_5_Sig"] & df_hour["SMA_10_Sig"] & df_hour["SMA_50_Sig"] & df_hour["SMA_100_Sig"] 
                    & df_hour["EMA_5_Sig"] & df_hour["EMA_10_Sig"] & df_hour["EMA_50_Sig"] & df_hour["EMA_100_Sig"] 
                    & df_hour["HMA_5_Sig"] & df_hour["HMA_10_Sig"] & df_hour["HMA_50_Sig"] & df_hour["HMA_100_Sig"])  

In [None]:
df_hour["Signals_pct"] = df_hour["SMA_1680_Sig"] * df_hour["pct"]
df_hour["Signals_PF"] = (df_hour["Signals_pct"] +1).cumprod()

#Benchmark
df_hour["PF_BM"] =  df_hour["pct"]
df_hour["myPF_BM"] = (df_hour["PF_BM"] +1).cumprod()

df_hour[["myPF_BM","Signals_PF"]].plot(figsize=(16,6))

In [None]:
# Aufteilen der Daten in Trainings- und Testdaten
train_size = int(len(data) * 0.8)
train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]
X_train = train_data[features]
X_test = test_data[features]
y_train = train_data[target]
y_test = test_data[target]

In [None]:
df_hour[["Close","SMA_1680","SMA_100"]].plot(figsize=(16,6))

In [None]:
# Ensure 'Open time' is a datetime index
df_hour['Close time'] = pd.to_datetime(df_hour['Close time'])

# Set 'Open time' as the DataFrame index
df_hour.set_index('Close time', inplace=True)

In [None]:
# Iterate over 3-month periods
start_date = df_hour.index.min()
end_date = df_hour.index.max()

# Create plots for each 3-month period
while start_date < end_date:
    # Define the end date of the current 3-month period
    current_end_date = start_date + pd.DateOffset(months=3)
    
    # Slice the data for the current 3-month period
    df_slice = df_hour[(df_hour.index >= start_date) & (df_hour.index < current_end_date)]
    
    # Plot
    plt.figure(figsize=(16, 6))
    df_slice[["Close", "SMA_1680", "SMA_100", "EMA_100", "EMA_500", "SMA_50", "EMA_50"]].plot(ax=plt.gca())
    plt.title(f'Close Price and SMAs from {start_date.date()} to {current_end_date.date()}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.grid(True)
    plt.show()
    
    # Move to the next 3-month period
    start_date = current_end_date

In [None]:
import yfinance as yf

# Define the VIX ticker symbol
ticker = '^VIX'

# Fetch data from Yahoo Finance
data = yf.download(ticker, interval='1h', start='2024-08-01', end='2024-08-10')

# Print the first few rows of the data
print(data.head())

In [None]:
# Define the Bitcoin ticker symbol
ticker = 'BTC-EUR'

# Fetch data from Yahoo Finance
data = yf.download(ticker, interval='1h', start='2024-08-01', end='2024-08-10')

# Calculate returns
data['Return'] = data['Close'].pct_change()

# Calculate rolling volatility (standard deviation of returns)
data['Volatility'] = data['Return'].rolling(window=24).std() * np.sqrt(24)

# Plot the volatility
data['Volatility'].plot(title='Hourly Bitcoin Volatility')
plt.show()