In [3]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots


# Function to load data and fix its structure
def fix_data(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

# Function to perform an exploratory check of the data
def exploratory_check(df):
    print(df.head())  # Print the first few rows
    print(df.dtypes)  # Print the data types of the columns

# Function to process a cryptocurrency data file
def process_crypto_data(file_path):
    df = pd.read_csv(file_path)
    fix_data(df)
    exploratory_check(df)
    return df

# File paths for the cryptocurrency data
crypto_files = {
    'BTC': '../historical_data/BTC-USD.csv',
    'ETH': '../historical_data/ETH-USD.csv',
    'DOGE': '../historical_data/DOGE-USD.csv'
}



In [4]:
# Function to prepare monthly data from daily data
def prepare_monthly_data(df):
    monthly_data = df.resample('M').agg({
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last'
    })
    return monthly_data

# Function to plot a candlestick chart from monthly data
def plot_monthly_candlestick(df, title='Monthly Candlestick Chart'):
    fig = go.Figure(data=[go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close']
    )])

    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title='Price (USD)',
        xaxis=dict(type='date'),
        yaxis=dict(fixedrange=False)  # allows user to change the y-axis view
    )

    fig.show()

# Loop to process and plot each cryptocurrency dataset
for crypto, file_path in crypto_files.items():
    print(f"Processing {crypto} data...")
    df_crypto = process_crypto_data(file_path)

    # Prepare monthly data for the cryptocurrency
    crypto_monthly_data = prepare_monthly_data(df_crypto)

    # Plot the monthly candlestick chart for the cryptocurrency
    plot_monthly_candlestick(crypto_monthly_data, title=f'{crypto} Monthly Candlestick Chart')


Processing BTC data...
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-09-17  465.864014  468.174011  452.421997  457.334015  457.334015   
2014-09-18  456.859985  456.859985  413.104004  424.440002  424.440002   
2014-09-19  424.102997  427.834991  384.532013  394.795990  394.795990   
2014-09-20  394.673004  423.295990  389.882996  408.903992  408.903992   
2014-09-21  408.084991  412.425995  393.181000  398.821014  398.821014   

              Volume  
Date                  
2014-09-17  21056800  
2014-09-18  34483200  
2014-09-19  37919700  
2014-09-20  36863600  
2014-09-21  26580100  
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object


Processing ETH data...
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2017-11-09  308.644989  329.451996  307.056000  320.884003  320.884003   
2017-11-10  320.670990  324.717987  294.541992  299.252991  299.252991   
2017-11-11  298.585999  319.453003  298.191986  314.681000  314.681000   
2017-11-12  314.690002  319.153015  298.513000  307.907990  307.907990   
2017-11-13  307.024994  328.415009  307.024994  316.716003  316.716003   

                  Volume  
Date                      
2017-11-09  8.932500e+08  
2017-11-10  8.859860e+08  
2017-11-11  8.423010e+08  
2017-11-12  1.613480e+09  
2017-11-13  1.041890e+09  
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume       float64
dtype: object


Processing DOGE data...
                Open      High       Low     Close  Adj Close     Volume
Date                                                                    
2017-11-09  0.001207  0.001415  0.001181  0.001415   0.001415  6259550.0
2017-11-10  0.001421  0.001431  0.001125  0.001163   0.001163  4246520.0
2017-11-11  0.001146  0.001257  0.001141  0.001201   0.001201  2231080.0
2017-11-12  0.001189  0.001210  0.001002  0.001038   0.001038  3288960.0
2017-11-13  0.001046  0.001212  0.001019  0.001211   0.001211  2481270.0
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume       float64
dtype: object


In [5]:
import pandas as pd
import plotly.express as px

def load_and_preprocess_data(file_path):
    # Assuming the file is in CSV format with columns 'Date', 'Open', 'Close'
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    return df

def calculate_monthly_deltas(df):
    # Calculate the monthly delta between open and close as a percentage
    monthly_data = df.resample('M').agg({'Open': 'first', 'Close': 'last'})
    monthly_data['Monthly_Delta'] = (monthly_data['Close'] - monthly_data['Open']) / monthly_data['Open'] * 100
    monthly_data.reset_index(inplace=True)
    monthly_data['Year'] = monthly_data['Date'].dt.year
    monthly_data['Month'] = monthly_data['Date'].dt.strftime('%b')
    monthly_data.drop('Date', axis=1, inplace=True)
    monthly_data.drop_duplicates(subset=['Year', 'Month'], inplace=True)
    pivot_table = monthly_data.pivot(index='Year', columns='Month', values='Monthly_Delta')
    month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    pivot_table = pivot_table.reindex(month_order, axis=1)
    return pivot_table

def plot_box_and_whisker_plotly(df, title='Monthly Deltas Box Plot'):
    long_df = df.reset_index().melt(id_vars='Year', var_name='Month', value_name='Monthly Delta')
    
    # Calculate the max absolute value for symmetric y-axis limits
    max_delta = long_df['Monthly Delta'].abs().max()
    
    fig = px.box(long_df, x='Month', y='Monthly Delta', title=title)
    # Set y-axis range to be symmetric
    fig.update_layout(
        yaxis_range=[-max_delta, max_delta],
        width=1200,
        height=600,
    )
    fig.show()


def calculate_color_scale_bounds(pivot_table):
    min_val = pivot_table.min().min()
    max_val = pivot_table.max().max()
    return -max(abs(min_val), abs(max_val)), max(abs(min_val), abs(max_val))

def plot_heatmap(pivot_table, title='Monthly Deltas Heatmap'):
    min_val, max_val = calculate_color_scale_bounds(pivot_table)
    color_scale = [(0, 'red'), (abs(min_val) / (abs(min_val) + max_val), 'white'), (1, 'green')]
    pivot_table.reset_index(inplace=True)
    fig = px.imshow(pivot_table.drop(columns=['Year']), x=pivot_table.columns[1:], y=pivot_table['Year'],
                    color_continuous_scale=color_scale, range_color=(min_val, max_val), title=title,
                    labels={'x': 'Month', 'y': 'Year', 'color': 'Monthly Delta (%)'})
    fig.update_xaxes(side="top")
    fig.update_layout(
        width=1200,
        height=600
    )
    fig.show()

# Example paths (replace with actual paths to your data files)
btc_file_path = crypto_files['BTC']
eth_file_path = crypto_files['ETH']
doge_file_path = crypto_files['DOGE']

# Process and plot for BTC
btc_data = load_and_preprocess_data(btc_file_path)
btc_monthly_deltas = calculate_monthly_deltas(btc_data)
plot_heatmap(btc_monthly_deltas, title='BTC Monthly Deltas Heatmap')
plot_box_and_whisker_plotly(btc_monthly_deltas, title='BTC Monthly Deltas Box Plot')

# Process and plot for ETH
eth_data = load_and_preprocess_data(eth_file_path)
eth_monthly_deltas = calculate_monthly_deltas(eth_data)
plot_heatmap(eth_monthly_deltas, title='ETH Monthly Deltas Heatmap')
plot_box_and_whisker_plotly(eth_monthly_deltas, title='ETH Monthly Deltas Box Plot')

# Process and plot for DOGE
doge_data = load_and_preprocess_data(doge_file_path)
doge_monthly_deltas = calculate_monthly_deltas(doge_data)
plot_heatmap(doge_monthly_deltas, title='DOGE Monthly Deltas Heatmap')
plot_box_and_whisker_plotly(doge_monthly_deltas, title='DOGE Monthly Deltas Box Plot')


In [6]:
# Define a function to plot candlestick charts and mark halving dates
def plot_candlestick_with_halving(df, halving_dates, title):
    fig = go.Figure(data=[go.Candlestick(
        x=df.index,
        open=df['Open'],
        high=df['High'],
        low=df['Low'],
        close=df['Close']
    )])

    # Add halving date lines for all cryptocurrencies
    for halving_date in halving_dates:
        fig.add_shape(
            type="line",
            x0=halving_date, y0=df['Low'].min(),
            x1=halving_date, y1=df['High'].max(),
            line=dict(color="red", width=2, dash="dashdot"),
        )

    fig.update_layout(
        title=title,
        xaxis_title='Date',
        yaxis_title='Price (USD)',
        xaxis=dict(
            type='date',
            rangeslider_visible=False  # Hide the range slider
        ),
        yaxis=dict(fixedrange=False),
        width=1200,
        height=600,
        legend=dict(x=0, y=1)
    )

    fig.show()

# Bitcoin halving dates, assumed to impact all cryptocurrencies
btc_halving_dates = ['2012-11-28', '2016-07-09', '2020-05-11']

# Process each cryptocurrency data file and plot the candlestick chart
for crypto, file_path in crypto_files.items():
    print(f"Processing {crypto} data...")
    df_crypto = pd.read_csv(file_path)
    df_crypto['Date'] = pd.to_datetime(df_crypto['Date'])
    df_crypto.set_index('Date', inplace=True)
    
    # Plot the candlestick chart with Bitcoin halving dates
    plot_candlestick_with_halving(df_crypto, btc_halving_dates, title=f'{crypto} Daily Candlestick Chart with Bitcoin Halving Dates')


Processing BTC data...


Processing ETH data...


Processing DOGE data...


In [7]:
# Function to plot price ratio since halvings
def plot_price_ratio_since_halvings(df, halving_dates, title):
    # Convert halving dates to datetime
    halving_dates = pd.to_datetime(halving_dates)

    # Initialize Plotly figure
    fig = go.Figure()

    # Process each halving period
    for i, halving_date in enumerate(halving_dates):
        if i < len(halving_dates) - 1:
            # Select data between two halvings
            period_data = df[(df.index > halving_date) & (df.index <= halving_dates[i + 1])].copy()
        else:
            # Select data after the last halving
            period_data = df[df.index > halving_date].copy()

        # Calculate days since halving and price ratio
        period_data['Days_Since_Halving'] = (period_data.index - halving_date).days
        halving_price = df.loc[halving_date, 'Close'] if halving_date in df.index else None
        if halving_price is not None:
            period_data['Price_Ratio'] = period_data['Close'] / halving_price
            # Add trace to the figure
            fig.add_trace(
                go.Scatter(
                    x=period_data['Days_Since_Halving'],
                    y=period_data['Price_Ratio'],
                    mode='lines',
                    name=f'{title} Since {halving_date.year} Halving'
                )
            )

    # Update layout
    fig.update_layout(
        title=f'{title} Price Ratio for Each Halving Period',
        xaxis_title='Days Since Halving',
        yaxis_title='Price Ratio',
        legend_title='Halving Periods', width=1200, height=600, legend=dict(x=0, y=1)
    )

    fig.show()

# Bitcoin halving dates, which we will apply to all cryptocurrencies
btc_halving_dates = ['2012-11-28', '2016-07-09', '2020-05-11']

# Process each cryptocurrency data file and plot the price ratio since halvings
for crypto, file_path in crypto_files.items():
    print(f"Processing {crypto} data...")
    df_crypto = pd.read_csv(file_path)
    df_crypto['Date'] = pd.to_datetime(df_crypto['Date'])
    df_crypto.set_index('Date', inplace=True)
    
    # Plot the price ratio since halvings
    plot_price_ratio_since_halvings(df_crypto, btc_halving_dates, title=crypto)


Processing BTC data...


Processing ETH data...


Processing DOGE data...


In [8]:
# Function to calculate the weekly price movement and apply a 10-week moving average
def calculate_weekly_price_movement_with_moving_avg(df):
    # Calculate the percentage change between consecutive weeks
    weekly_price_movement = df['Close'].resample('W').last().pct_change() * 100
    # Apply a 10-week moving average
    moving_avg_10_weeks = weekly_price_movement.rolling(window=1).mean()
    return moving_avg_10_weeks

# Initialize Plotly figure
fig = go.Figure()

# Process each cryptocurrency data file and add to the plot
for crypto, file_path in crypto_files.items():
    print(f"Processing {crypto} data...")
    df_crypto = pd.read_csv(file_path)
    df_crypto['Date'] = pd.to_datetime(df_crypto['Date'])
    df_crypto.set_index('Date', inplace=True)
    
    # Calculate the weekly price movement with a 10-week moving average
    crypto_weekly_movement_moving_avg = calculate_weekly_price_movement_with_moving_avg(df_crypto)
    
    # Add a trace to the figure for each cryptocurrency
    fig.add_trace(
        go.Scatter(
            x=crypto_weekly_movement_moving_avg.index,
            y=crypto_weekly_movement_moving_avg,
            mode='lines',
            name=f'{crypto} Price Movement'
        )
    )

# Update layout
fig.update_layout(
    title='Weekly Price Movement Comparison',
    xaxis_title='Date',
    yaxis_title='Weekly Price Movement (%)',
    legend_title='Cryptocurrency',
        width=1200,
        height=600,
        legend=dict(x=0, y=1)
)

# Show the figure
fig.show()


Processing BTC data...
Processing ETH data...
Processing DOGE data...


In [9]:
import pandas as pd
import plotly.express as px

def plot_price_movement_correlation(btc_file_path, eth_file_path, doge_file_path):
    # Load the data
    df_btc = pd.read_csv(btc_file_path)
    df_eth = pd.read_csv(eth_file_path)
    df_doge = pd.read_csv(doge_file_path)

    # Preprocess the data
    df_btc['Date'] = pd.to_datetime(df_btc['Date'])
    df_eth['Date'] = pd.to_datetime(df_eth['Date'])
    df_doge['Date'] = pd.to_datetime(df_doge['Date'])
    df_btc.set_index('Date', inplace=True)
    df_eth.set_index('Date', inplace=True)
    df_doge.set_index('Date', inplace=True)

    # Calculate daily price movement percentage
    df_btc['BTC_Price_Movement'] = df_btc['Close'].pct_change() * 100
    df_eth['ETH_Price_Movement'] = df_eth['Close'].pct_change() * 100
    df_doge['DOGE_Price_Movement'] = df_doge['Close'].pct_change() * 100

    # Merge the DataFrames on the Date index
    combined_df = pd.merge(df_btc[['BTC_Price_Movement']], df_eth[['ETH_Price_Movement']], left_index=True, right_index=True)
    combined_df = pd.merge(combined_df, df_doge[['DOGE_Price_Movement']], left_index=True, right_index=True)

    # Calculate and print the correlation coefficients
    print("Correlation Coefficients:")
    print(combined_df.corr())

    # Create scatter plots
    fig1 = px.scatter(
        combined_df, 
        x='BTC_Price_Movement', 
        y='ETH_Price_Movement',
        labels={'BTC_Price_Movement': 'BTC Daily Price Movement (%)', 'ETH_Price_Movement': 'ETH Daily Price Movement (%)'},
        title='BTC vs ETH Daily Price Movement Correlation'
    )
    fig2 = px.scatter(
        combined_df, 
        x='BTC_Price_Movement', 
        y='DOGE_Price_Movement',
        labels={'BTC_Price_Movement': 'BTC Daily Price Movement (%)', 'DOGE_Price_Movement': 'DOGE Daily Price Movement (%)'},
        title='BTC vs DOGE Daily Price Movement Correlation'
    )
    fig3 = px.scatter(
        combined_df, 
        x='ETH_Price_Movement', 
        y='DOGE_Price_Movement',
        labels={'ETH_Price_Movement': 'ETH Daily Price Movement (%)', 'DOGE_Price_Movement': 'DOGE Daily Price Movement (%)'},
        title='ETH vs DOGE Daily Price Movement Correlation'
    )

    # Update layout for each figure
    for fig in [fig1, fig2, fig3]:
        fig.update_layout(width=800, height=400)
        fig.show()

# Example usage
btc_file_path = '../historical_data/BTC-USD.csv'
eth_file_path = '../historical_data/ETH-USD.csv'
doge_file_path = '../historical_data/DOGE-USD.csv'
plot_price_movement_correlation(btc_file_path, eth_file_path, doge_file_path)


Correlation Coefficients:
                     BTC_Price_Movement  ETH_Price_Movement  \
BTC_Price_Movement             1.000000            0.775279   
ETH_Price_Movement             0.775279            1.000000   
DOGE_Price_Movement            0.366977            0.339837   

                     DOGE_Price_Movement  
BTC_Price_Movement              0.366977  
ETH_Price_Movement              0.339837  
DOGE_Price_Movement             1.000000  



The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.



In [10]:

def plot_price_movement_correlation_and_volatility(btc_file_path, eth_file_path, doge_file_path):
    # Load the data
    df_btc = pd.read_csv(btc_file_path)
    df_eth = pd.read_csv(eth_file_path)
    df_doge = pd.read_csv(doge_file_path)

    # Preprocess the data
    df_btc['Date'] = pd.to_datetime(df_btc['Date'])
    df_eth['Date'] = pd.to_datetime(df_eth['Date'])
    df_doge['Date'] = pd.to_datetime(df_doge['Date'])
    df_btc.set_index('Date', inplace=True)
    df_eth.set_index('Date', inplace=True)
    df_doge.set_index('Date', inplace=True)

    # Calculate daily price movement percentage
    df_btc['BTC_Price_Movement'] = df_btc['Close'].pct_change() * 100
    df_eth['ETH_Price_Movement'] = df_eth['Close'].pct_change() * 100
    df_doge['DOGE_Price_Movement'] = df_doge['Close'].pct_change() * 100

    # Calculate volatility (standard deviation of daily price movements)
    btc_volatility = df_btc['BTC_Price_Movement'].std()
    eth_volatility = df_eth['ETH_Price_Movement'].std()
    doge_volatility = df_doge['DOGE_Price_Movement'].std()

    print(f"Volatility (Standard Deviation of Daily Price Movements):")
    print(f"BTC: {btc_volatility:.2f}%")
    print(f"ETH: {eth_volatility:.2f}%")
    print(f"DOGE: {doge_volatility:.2f}%")

btc_file_path = '../historical_data/BTC-USD.csv'
eth_file_path = '../historical_data/ETH-USD.csv'
doge_file_path = '../historical_data/DOGE-USD.csv'
plot_price_movement_correlation_and_volatility(btc_file_path, eth_file_path, doge_file_path)

Volatility (Standard Deviation of Daily Price Movements):
BTC: 3.71%
ETH: 4.76%
DOGE: 10.33%



The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.


The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.

