In [8]:
# Import modules
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from pycoingecko import CoinGeckoAPI
from utils import Search

In [9]:
# Create a client
cg = CoinGeckoAPI()

# Confirm connection
cg.ping()

{'gecko_says': '(V3) To the Moon!'}

In [54]:
# Retrieve the list of all coins from CoinGecko API.
coin_list = cg.get_coins_list()

# Convert the list of coins to a DataFrame, sort by 'id', and reset the index.
coin_list_df = pd.DataFrame(coin_list).sort_values(by='id').reset_index(drop=True)

# Display the resulting DataFrame containing all coins.
coin_list_df

Unnamed: 0,id,symbol,name
0,-10,loong,龙
1,-3,meow,Meow Meow Coin
2,-5,🟥🟩,🟥🟪🟦🟩🟨🟧
3,-6,"""　""","""　"""
4,-7,∅,∅
...,...,...,...
18265,zyfi,zfi,ZyfAI
18266,zygo-the-frog,zygo,Zygo The Frog
18267,zyncoin-2,zyn,ZynCoin
18268,zynecoin,zyn,Zynecoin


In [49]:
'''

CoinGecko API Wrapper Function

This function retrieves market chart data for a given coin using the CoinGecko API.

args:
id (required)
vs_currency (required)
days (required)
interval
precision

Personal Notes:
- Method works best when all parameters are specified.

'''
btc_raw = cg.get_coin_market_chart_by_id(id = 'bitcoin', 
                                        vs_currency = 'usd', 
                                        days = 360, 
                                        interval = 'daily')

btc_raw

{'prices': [[1724889600000, 59015.29633887944],
  [1724976000000, 59351.5794909728],
  [1725062400000, 59156.246036463555],
  [1725148800000, 58960.23658664587],
  [1725235200000, 57357.71617282246],
  [1725321600000, 59108.73235863689],
  [1725408000000, 57504.538586220435],
  [1725494400000, 57987.84601410068],
  [1725580800000, 56132.404732088595],
  [1725667200000, 53923.35611071905],
  [1725753600000, 54150.9383589072],
  [1725840000000, 54792.40755768575],
  [1725926400000, 57049.118751143265],
  [1726012800000, 57624.16178336144],
  [1726099200000, 57381.764445000474],
  [1726185600000, 58106.951229815284],
  [1726272000000, 60620.62747295754],
  [1726358400000, 60003.29940083042],
  [1726444800000, 59214.80226801806],
  [1726531200000, 58211.123231286256],
  [1726617600000, 60317.0319794625],
  [1726704000000, 61440.41208494509],
  [1726790400000, 62966.52931910512],
  [1726876800000, 63128.224540748255],
  [1726963200000, 63403.402349259355],
  [1727049600000, 63582.5995692482

In [3]:
# Class to retrieve and visualize coin market chart data from CoinGecko as a DataFrame and plot.

# Import modules
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from pycoingecko import CoinGeckoAPI
from itertools import combinations

class analyze_coin_market_chart:
    """
    A class to retrieve historical market chart data for a specified cryptocurrency from the CoinGecko API,
    convert it into a pandas DataFrame, and provide plotting functionality.

    Attributes
    ----------
    id : str or list of str
        The CoinGecko coin ID(s) (default: 'bitcoin').
    vs_currency : str
        The target currency of market data (default: 'usd').
    days : int
        Number of days to retrieve data for (default: 360).
    interval : str
        Data interval. Only 'daily' is supported (forced).

    Methods
    -------
    reformat():
        Returns the market chart data as a pandas DataFrame indexed by date.
    plot():
        Plots the price data as a time series chart.
    """

    def __init__(self, id = 'bitcoin', vs_currency = 'usd', days = 364):
        """
        Initializes the get_coin_market_chart_as_df class.

        Parameters
        ----------
        id : str or list of str, optional
            The CoinGecko coin ID or list of IDs (default is 'bitcoin').
        vs_currency : str, optional
            The target currency (default is 'usd').
        days : int, optional
            Number of days of data to retrieve (default is 360).
        interval : str, optional
            Data interval. Only 'daily' is supported and enforced.
        """

        # Only allow daily interval since hourly is for enterprise users
        self.id = id
        self.vs_currency = vs_currency
        self.days = days
        self.interval = 'daily'
        self.precision = 2
        self.is_saved = False
        self.cg = CoinGeckoAPI()

        # If the id is a list, save the tables
        if isinstance(self.id, list):
            self.save_tables(self.id)

        # If the id is a string, retrieve the raw chart data
        else:
            self.raw_chart = self.cg.get_coin_market_chart_by_id(id = self.id,
                                                                vs_currency = self.vs_currency, 
                                                                # Add 1 day since pct_change is null for first day
                                                                days = self.days+1,
                                                                interval = self.interval,
                                                                precision = self.precision)
            self.is_saved = True

    def reformat_data(self, chart_data=None):
        """
        Converts the raw market chart data into a pandas DataFrame.

        Returns
        -------
        pd.DataFrame
            DataFrame indexed by date with a 'price' column.
        """

        if chart_data is not None:
            # Extract prices
            prices = [pair[1] for pair in chart_data['prices']]

        else:
            prices = [pair[1] for pair in self.raw_chart['prices']]

        # Assign dates
        current_date = dt.date.today()

        # Determine timedelta (always daily)
        timedelta = dt.timedelta(days=1)

        # Dictionary creation loop
        date_price = {}
        for i in range(len(prices)):
            index = -(i+1)
            date_price[current_date] = prices[index]
            current_date -= timedelta

        # Convert to df
        df = pd.DataFrame(list(date_price.items()), columns=['date', 'daily_close'])
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)
        df.sort_index(inplace=True)

        # Add percent change column
        df['percent_change'] = df['daily_close'].pct_change() * 100
        df.dropna(inplace=True, subset=['percent_change'])

        return df

    def save_tables(self, coin_list):
        """
        Saves reformatted market chart data tables for a list of coins.

        For each coin in the provided coin_list, this method reformats the raw market chart data
        into a pandas DataFrame and stores it in the self.tables dictionary, with the coin's name
        or ID as the key. After saving the tables, it sets the is_cached attribute to True to indicate
        that the tables have been cached.

        Parameters
        ----------
        coin_list : list
            A list of coin names or IDs for which to save the reformatted data tables.

        Returns
        -------
        None
        """

        # Check if the tables are already cached
        if self.is_saved:
            print("Tables already cached. Skipping save.")
            return

        # Create a dictionary to store the tables
        self.saved_tables = {}

        # Create a table for each coin
        for coin in coin_list:
            raw_chart = self.cg.get_coin_market_chart_by_id(id = coin,
                                                            vs_currency = self.vs_currency, 
                                                            # Add 1 day since pct_change is null for first day
                                                            days = self.days+1,
                                                            interval = self.interval,
                                                            precision = self.precision)
            self.saved_tables[coin] = self.reformat_data(raw_chart)

        # Save global is_cached variable
        self.is_saved = True

        print(f"{len(coin_list)} tables saved")

    def correlation_analysis(self):
        """
        Computes the correlation of percent changes between all pairs of coins.

        If self.id is a list, uses the saved_tables for each coin.
        If self.id is a string, returns None (not enough data for correlation).

        Returns
        -------
        pd.DataFrame
            DataFrame with columns ['coin1', 'coin2', 'correlation'] for each coin pair.
        """
        # If only one coin, correlation is not defined
        if not isinstance(self.id, list) or len(self.id) < 2:
            print("Correlation analysis requires at least two coins.")
            return None

        # Use saved_tables to build percent change DataFrame
        coin_comparison = pd.DataFrame()
        for coin in self.id:
            df = self.saved_tables[coin]
            # Align on index, dropna to ensure matching dates
            coin_comparison[coin] = df['percent_change']

        # Drop rows with any NaN to ensure proper alignment
        coin_comparison = coin_comparison.dropna()

        # Calculate the correlation matrix
        corr_matrix = coin_comparison.corr()

        # Save all unique combinations of coin ids
        pairs = combinations(self.id, 2)

        # Prepare data for DataFrame
        data = []
        for coin1, coin2 in pairs:
            corr = corr_matrix.at[coin1, coin2]
            data.append({'coin1': coin1, 'coin2': coin2, 'correlation': corr})

        return pd.DataFrame(data, columns=['coin1', 'coin2', 'correlation']).sort_values(by=['correlation'], ascending=False)

    def plot(self):
        """
        Plots the price chart for the coin or coins using the reformatted market chart data.

        If self.id is a list, plots each coin's price chart in a separate subplot.
        If self.id is a string, plots a single price chart.

        Returns
        -------
        None
            Displays the plot(s) using matplotlib.
        """
        # If id is a list, plot each coin in a separate subplot
        if isinstance(self.id, list):
            num_coins = len(self.id)
            fig, axes = plt.subplots(num_coins, 1, figsize=(12, 5 * num_coins), sharex=False)
            if num_coins == 1:
                axes = [axes]  # Make axes iterable if only one coin

            for ax, coin in zip(axes, self.id):
                df = self.saved_tables[coin]
                ax.plot(df.index, df['daily_close'], marker='o')
                ax.set_title(f"{coin.capitalize()} Price Chart")
                ax.set_xlabel("Date")
                ax.set_ylabel("Price (USD)")
                ax.grid()

            plt.tight_layout()
            plt.show()
        else:
            # Single coin plot (original logic)
            df = self.reformat_data()
            plt.figure(figsize=(12, 6))
            plt.plot(df.index, df['daily_close'], marker='o')
            plt.title(f"{self.id.capitalize()} Price Chart")
            plt.xlabel("Date")
            plt.ylabel("Price (USD)")
            plt.grid()
            plt.show()

In [4]:
coin_list = ['bitcoin', 'ethereum', 'solana', 'dogecoin']

coin_analysis = analyze_coin_market_chart(id = coin_list)

coin_analysis.correlation_analysis()

4 tables saved


Unnamed: 0,coin1,coin2,correlation
0,bitcoin,ethereum,0.774834
1,bitcoin,solana,0.755149
3,ethereum,solana,0.741914
2,bitcoin,dogecoin,0.723957
4,ethereum,dogecoin,0.711263
5,solana,dogecoin,0.663132


In [12]:
# Configuration
coin_id = 'bitcoin'

# Save bitcoin price table and save as df
chart_retrieval = get_coin_market_chart_as_df(id = coin_id)
df = chart_retrieval.reformat_data()

# Calculate average percent change
mean_pct_change = df['percent_change'].mean()
std_pct_change = df['percent_change'].std()

# Display average percent change
print(f'Using data from the last {chart_retrieval.days} days', '\n')
print('Bitcoin price table:', '\n')
print(df.tail(), '\n')
print(f"Average percent change: {mean_pct_change:.2f}%")
print(f"Standard deviation of percent change: {std_pct_change:.2f}%")

NameError: name 'get_coin_market_chart_as_df' is not defined

In [40]:
from itertools import combinations

# Create a class to compare the correlation between all coin pairs in a given list

coin_list = ['bitcoin', 'ethereum', 'solana', 'dogecoin']



def coin_correlation(coin_list):

    # Create a dataframe to store the percent change for each coin
    coin_comparison = pd.DataFrame()   

    for coin in coin_list:

        # Retrieve the percent change for each coin
        chart_retrieval = get_coin_market_chart_as_df(id = coin.lower())
        df = chart_retrieval.reformat_data()

        # Add the percent change for each coin to the dataframe
        coin_comparison[coin] = df['percent_change']

    # Calculate the correlation between each coin
    corr_matrix = coin_comparison.corr()

    # Save all unique combinations of coin ids
    pairs = combinations(coin_list, 2)

    # Create empty dictionary
    correlations = {}

    for pair in pairs:
        corr = corr_matrix.at[pair[0], pair[1]]
        correlations[pair] = corr
        
    return correlations

coin_correlation(coin_list)
    

        


{('bitcoin', 'ethereum'): np.float64(0.7749286569972825),
 ('bitcoin', 'solana'): np.float64(0.7568204374629542),
 ('bitcoin', 'dogecoin'): np.float64(0.7243480211117626),
 ('ethereum', 'solana'): np.float64(0.7437929198966233),
 ('ethereum', 'dogecoin'): np.float64(0.7119262732665762),
 ('solana', 'dogecoin'): np.float64(0.6637492466486986)}

In [139]:
items = ['bitcoin', 'ethereum', 'solana', 'dogecoin']          # input list (already unique)
pairs = combinations(items, 2) # every unordered pair

for pair in pairs:
    print(pair)

('bitcoin', 'ethereum')
('bitcoin', 'solana')
('bitcoin', 'dogecoin')
('ethereum', 'solana')
('ethereum', 'dogecoin')
('solana', 'dogecoin')


In [29]:
type(12)

int