In [1]:
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
#from config import *

In [2]:
def describe_dfs(dictionary):
    """
    This function should give an broad overview over the structure of dataframes in order to compare
    
    Parameters:
    dictionary (dict): dictionary containing dataframes
    
    Returns:
    no return value, this is just a print function
    """
    
    #loop over dataframes in dictionary an print desciption
    for name in dictionary:
        print(name)
        print(dictionary[name].info())
        print(dictionary[name].describe())
        print("-"*100)
        print("")

In [3]:
def visualize_price_development(currency_df):
    """
    This function visualizes the price of a crypto currency over time
    
    Parameters:
    currency_df (dataframe): dataframe of the crypto currency
    
    Returns:
    no return value, this is a print function
    """
    
    #generate plot by defining size and lables 
    title = ""
    x_label = "Time"
    y_label = "Price"
    plt.figure(figsize = (150,50), dpi=10)
    plt.plot(currency_df["Date"], currency_df["Price"], color = 'tab:red')
    plt.gca().set(title = title, xlabel = x_label, ylabel = y_label)
    plt.show()

In [4]:
def adf_test(timeseries_df):
    """
    This function performs the Augmented Dickey-Fuller Test for Stationarity on a given time series in order to test stationarity
    
    Parameters:
    timeseries_df (dataframe): timeseries that should be tested
    
    Returns:
    no return value, this is a print function
    """
    
    adftest = adfuller(timeseries_df["Price"], autolag = 'AIC', regression = 'ct')
    print("ADF Test Results")
    print("Null Hypothesis: The series has a unit root (non-stationary)")
    print("ADF-Statistic:", adftest[0])
    print("P-Value:", adftest[1])
    print("Number of lags:", adftest[2])
    print("Number of observations:", adftest[3])
    print("Critical Values:", adftest[4])
    print("Note: If P-Value is smaller than 0.05, we reject the null hypothesis and the series is stationary")

In [5]:
def check_stationarity(df):
    """
    Check the stationarity of each column in the given DataFrame.
    
    Parameters:
    df (pd.DataFrame): The input data frame with time series data.
    
    Prints the stationarity status of each column.
    """
    for column in df.select_dtypes(include=[np.number]).columns:
        series = df[column].dropna()
        if len(series) > 1:  # Ensure there is enough data to perform the check
            # Split data into two halves
            split = len(series) // 2
            X1, X2 = series[0:split], series[split:]
            # Calculate means and variances
            mean1, mean2 = X1.mean(), X2.mean()
            var1, var2 = X1.var(), X2.var()

            # Set thresholds
            mean_threshold = abs(mean1 - mean2) / ((abs(mean1) + abs(mean2)) / 2)
            var_threshold = abs(var1 - var2) / ((var1 + var2) / 2)

            if mean_threshold < 0.1 and var_threshold < 0.1:
                print(f"{column}: stationary")
            else:
                print(f"{column}: non-stationary")
        else:
            print(f"{column}: not enough data to determine stationarity")

In [6]:
def determine_timeframe(df):
    """
    This function prints the lowest and highest date in a dataframe
    
    Parameters:
    df (dataframe): timeseries that should be analyzed
    
    Returns:
    no return value, this is a print function
    """
    
    #print min and max values
    print("Start date: " + df["Date"].min())
    print("End date: " + df["Date"].max())

In [7]:
def determine_currency_timeframe(dictionary):
    """
    This function prints the start and end of measuring for each crypto currency 
    
    Parameters:
    dictionary (dictionary): all crypto currencies in a given directory
    
    Returns:
    no return value, this is a print function
    """
    
    #loop over currencies
    for currency in dictionary:
        print(currency)
        determine_timeframe(dictionary[currency])
        print("-"*30)

In [8]:
def visualize_price_development_for_timeframe(df, periods, save_path):
    """
    This function visualizes the price development over a specified timeframe.
    
    Parameters:
    df (DataFrame): The dataframe containing the price data with a datetime index.
    periods (int): The number of periods (rows) to visualize from the dataframe.
    save_path (str): The file path to save the plot. If None, the plot will be displayed instead.
    
    Returns:
    No return value, this function generates and saves/displays a plot of price development over the specified timeframe.
    """

    # Extract relevant data
    price_data = df['Price'][-periods:].values
    dates = df.index[-periods:].values

    # Plotting
    plt.figure(figsize=(12, 6))
    plt.plot(dates, price_data, color='blue', linewidth=2, marker='o', markersize=6)

    # Customize the x-axis formatting
    plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
    plt.gca().xaxis.set_major_locator(plt.MaxNLocator(10))  # Set maximum number of ticks on x-axis

    # Display date labels every 5 ticks
    for label in plt.gca().xaxis.get_ticklabels()[::5]:
        label.set_visible(True)

    # Customize the plot
    plt.title('Price Development Over Time', fontsize=16)
    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Price', fontsize=14)
    plt.grid(True)
    plt.tight_layout()

    # Remove the header
    ax = plt.gca()
    ax.title.set_visible(False)

    # Save or show the plot
    if save_path:
        plt.savefig(save_path)
        print(f"Plot saved at {save_path}")
    else:
        plt.show()

In [9]:
def calculate_average_percentage_change(dictionary):
    """
    This function calculates the average percentage change from period to period in the "Price" column
    for all currencies in the provided dictionary.
    
    Parameters:
    dictionary (dict): Dictionary containing the dataframes for each currency.
    
    Returns:
    float: The overall average percentage change in price across all currencies.
    """
    
    total_percentage_change = 0
    count = 0
    
    # Loop over currencies
    for currency, data in dictionary.items():
        data['Pct_Change'] = data['Price'].pct_change().dropna()  # Calculate percentage change and drop NaN values
        average_change = data['Pct_Change'].mean()  # Calculate the average percentage change
        total_percentage_change += average_change
        count += 1
    
    # Calculate the overall average percentage change
    overall_average_percentage_change = (total_percentage_change / count) * 100  # Convert to percentage
    
    return overall_average_percentage_change

In [10]:
import pandas as pd

def calculate_outlier_profits(daily_dict, weekly_dict):
    """
    This function calculates the outlier profits from daily and weekly cryptocurrency datasets.
    An outlier profit is defined as a trade with an absolute return greater than 50%.
    The function processes both daily and weekly data, and outputs a combined DataFrame with
    summary statistics for each cryptocurrency.

    Parameters:
    daily_dict (dict): A dictionary where keys are cryptocurrency names and values are DataFrames
                       containing daily price data.
    weekly_dict (dict): A dictionary where keys are cryptocurrency names and values are DataFrames
                        containing weekly price data.

    Returns:
    pd.DataFrame: A DataFrame containing summary statistics for each cryptocurrency, including the
                  maximum return, ratio of outlier profits, and the number of outlier trades.
    """

    def process_dataset(data, method):
        """
        Processes the dataset to calculate the required statistics.

        Parameters:
        data (dict): A dictionary where keys are cryptocurrency names and values are DataFrames
                     containing price data.
        method (str): A string indicating whether the data is 'daily' or 'weekly'.

        Returns:
        list: A list of dictionaries containing the processed results for each cryptocurrency.
        """
        results = []
        for crypto, df in data.items():
            # Calculate daily/weekly return and absolute return
            df['Return'] = df['Price'].pct_change() * 100
            df['Absolute Return'] = df['Return'].abs()
            total_return = df['Absolute Return'].sum()

            # Identify and calculate high profit trades
            high_profit_trades = df[df['Absolute Return'] > 50]['Absolute Return']
            high_profit_return = high_profit_trades.sum()
            num_high_profit_trades = high_profit_trades.count()
            ratio_high_profit_return = (high_profit_return / total_return) * 100 if total_return != 0 else 0

            # Append results for the current cryptocurrency
            results.append({
                'Dataset': method,
                'Currency': crypto,
                'Maximum return': total_return,
                'Ratio outliers': ratio_high_profit_return,
                'Number': num_high_profit_trades
            })
        return results

    # Process daily and weekly data
    daily_results = process_dataset(daily_dict, 'daily')
    weekly_results = process_dataset(weekly_dict, 'weekly')

    # Combine results into a single DataFrame
    final_results = daily_results + weekly_results
    result_df = pd.DataFrame(final_results)
    
    # Print DataFrame to LaTeX
    latex_output = result_df.to_latex(index=False)
    print(latex_output)
    
    return result_df