In [5]:
# Import modules
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from pycoingecko import CoinGeckoAPI
from utils import Search

In [None]:
# Test API

cg = CoinGeckoAPI()
btc = cg.get_coin_market_chart_by_id(id = 'bitcoin',
                                vs_currency = 'usd',
                                days = 365,
                                interval = 'daily',
                                precision = 2)
btc

366

In [None]:
# Top 50 non-wrapped / stable coins

top_50_coins = [
    "bitcoin",
    "ethereum",
    "binancecoin",
    "solana",
    "ripple",
    "dogecoin",
    "toncoin",
    "cardano",
    "shiba-inu",
    "avalanche-2",
    "bitcoin-cash",
    "polkadot",
    "tron",
    "chainlink",
    "polygon",
    "litecoin",
    "internet-computer",
    "uniswap",
    "leo-token",
    "ethereum-classic",
    "cosmos",
    "filecoin",
    "aptos",
    "lido-dao",
    "crypto-com-chain",
    "mantle",
    "render-token",
    "near",
    "monero",
    "hedera-hashgraph",
    "arbitrum",
    "the-graph",
    "quant-network",
    "vechain",
    "maker",
    "kaspa",
    "algorand",
    "optimism",
    "stellar",
    "bsv",                 # Bitcoin SV
    "decentraland",
    "flow",
    "tezos",
    "apecoin",
    "eos",
    "immutable-x",
    "multiversx",          # EGLD (formerly Elrond)
    "axie-infinity",
    "thorchain",
]

In [6]:
import pandas as pd
from pycoingecko import CoinGeckoAPI

cg = CoinGeckoAPI()

# Retrieve the list of all coins from CoinGecko API.
coin_list = cg.get_coins_list()

# Convert the list of coins to a DataFrame, sort by 'id', and reset the index.
coin_list_df = pd.DataFrame(coin_list).sort_values(by='id').reset_index(drop=True)

# Display the resulting DataFrame containing all coins.
coin_list_df

Unnamed: 0,id,symbol,name
0,-10,loong,龙
1,-3,meow,Meow Meow Coin
2,-5,🟥🟩,🟥🟪🟦🟩🟨🟧
3,-6,"""　""","""　"""
4,-7,∅,∅
...,...,...,...
18284,zyfi,zfi,ZyfAI
18285,zygo-the-frog,zygo,Zygo The Frog
18286,zyncoin-2,zyn,ZynCoin
18287,zynecoin,zyn,Zynecoin


In [None]:
# Main class for chart analysis

# Import modules
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from pycoingecko import CoinGeckoAPI
from itertools import combinations
import os

class analyze_coin_market_chart:
    """
    A class to retrieve, process, and analyze historical market chart data for one or more cryptocurrencies
    from the CoinGecko API. Provides methods for data conversion, correlation analysis, and plotting.

    Attributes
    ----------
    id : str or list of str
        The CoinGecko coin ID or list of IDs (default: 'bitcoin').
    vs_currency : str
        The target currency for market data (default: 'usd').
    days : int
        Number of days to retrieve data for (default: 364).
    interval : str
        Data interval. Only 'daily' is supported (enforced).
    precision : int
        Number of decimal places for price data (default: 2).
    is_saved : bool
        Indicates whether data tables have been saved/cached.
    cg : CoinGeckoAPI
        Instance of the CoinGeckoAPI client.
    saved_tables : dict
        Dictionary of DataFrames for each coin (only if id is a list).
    raw_chart : dict
        Raw chart data for a single coin (only if id is a string).

    Methods
    -------
    reformat_data(chart_data=None):
        Converts raw market chart data into a pandas DataFrame indexed by date, with daily close and percent change.
    save_tables(coin_list):
        Saves reformatted market chart data tables for a list of coins to disk and memory.
    correlation_analysis():
        Computes the correlation of percent changes between all pairs of coins (if multiple coins).
    plot():
        Plots the price chart(s) for the coin(s) using the reformatted market chart data.
    """

    def __init__(self, id = 'bitcoin', vs_currency = 'usd', days = 364):
        """
        Initializes the analyze_coin_market_chart class.

        Parameters
        ----------
        id : str or list of str, optional
            The CoinGecko coin ID or list of IDs (default is 'bitcoin').
        vs_currency : str, optional
            The target currency (default is 'usd').
        days : int, optional
            Number of days of data to retrieve (default is 364).
            Due to free API limitations, 364 is the max possible value.
        """
        # Only allow daily interval since hourly is for enterprise users
        self.id = id
        self.vs_currency = vs_currency
        self.days = days
        self.interval = 'daily'
        self.precision = 2
        self.is_saved = False
        self.cg = CoinGeckoAPI()

        # If the id is a list, save the tables
        if isinstance(self.id, list):
            self.save_tables(self.id)

        # If the id is a string, retrieve the raw chart data
        else:
            self.raw_chart = self.cg.get_coin_market_chart_by_id(
                id=self.id,
                vs_currency=self.vs_currency, 
                # Add 1 day since pct_change is null for first day
                days=self.days+1,
                interval=self.interval,
                precision=self.precision
            )
            self.is_saved = True

    def reformat_data(self, chart_data=None):
        """
        Converts the raw market chart data into a pandas DataFrame indexed by date.

        Parameters
        ----------
        chart_data : dict, optional
            Raw chart data as returned by CoinGecko API. If None, uses self.raw_chart.

        Returns
        -------
        pd.DataFrame
            DataFrame indexed by date with columns:
            - 'daily_close': closing price for the day
            - 'percent_change': daily percent change in price
        """
        if chart_data is not None:
            # Extract prices
            prices = [pair[1] for pair in chart_data['prices']]
        else:
            prices = [pair[1] for pair in self.raw_chart['prices']]

        # Assign dates (most recent date is today, going backwards)
        current_date = dt.date.today()
        timedelta = dt.timedelta(days=1)

        date_price = {}
        for i in range(len(prices)):
            index = -(i+1)
            date_price[current_date] = prices[index]
            current_date -= timedelta

        # Convert to DataFrame
        df = pd.DataFrame(list(date_price.items()), columns=['date', 'daily_close'])
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)
        df.sort_index(inplace=True)

        # Add percent change column
        df['percent_change'] = df['daily_close'].pct_change() * 100
        df.dropna(inplace=True, subset=['percent_change'])

        return df

    def save_tables(self, coin_list):
        """
        Saves reformatted market chart data tables for a list of coins.

        For each coin in the provided coin_list, this method reformats the raw market chart data
        into a pandas DataFrame and stores it in the self.saved_tables dictionary, with the coin's ID as the key.
        If a table already exists on disk, it is loaded from file instead of calling the API.
        After saving the tables, sets the is_saved attribute to True.

        Parameters
        ----------
        coin_list : list of str
            A list of coin IDs for which to save the reformatted data tables.

        Returns
        -------
        None
        """
        # Check if the tables are already cached
        if self.is_saved:
            print("Tables already cached. Skipping save.")
            return

        # Create a dictionary to store the tables
        self.saved_tables = {}
        failed_coins = []

        # Create a table for each coin
        for coin in coin_list:
            # Create a file name for the table
            file_name = f'datasets/{coin}_{self.vs_currency}_{self.days}days.csv'

            if os.path.exists(file_name):
                # Load from CSV if file exists
                self.saved_tables[coin] = pd.read_csv(file_name, index_col='date', parse_dates=True)
                print(f"Loaded chart for {coin} from file.")
            else:
                try:
                    # Only call API if file does not exist
                    raw_chart = self.cg.get_coin_market_chart_by_id(
                        id=coin,
                        vs_currency=self.vs_currency,
                        days=self.days + 1,  # Add 1 day since pct_change is null for first day
                        interval=self.interval
                    )

                    # Reformat data into a dataframe and save it
                    if len(raw_chart['prices']) != 0:
                        self.saved_tables[coin] = self.reformat_data(raw_chart)
                        self.saved_tables[coin].to_csv(file_name)
                        print(f"Saved chart for {coin}...")

                    # Handle empty dataframe    
                    else:
                        print(f"No data found for {coin}")
                        failed_coins.append(coin)
                        continue

                # Handle API call errors
                except Exception as e:
                    print(f"Error retrieving data for {coin}: {e}")
                    failed_coins.append(coin)
                    continue

        # Remove failed coins from coin_list and update self.id
        self.id = [coin for coin in coin_list if coin not in failed_coins]

        # Save global is_saved variable
        self.is_saved = True

        print(f"\n{len(self.id)} tables saved!\n")

    def correlation_analysis(self):
        """
        Computes the correlation of percent changes between all pairs of coins.

        If self.id is a list, uses the saved_tables for each coin.
        If self.id is a string, returns None (not enough data for correlation).

        Returns
        -------
        pd.DataFrame or None
            DataFrame with columns ['coin1', 'coin2', 'correlation'] for each coin pair,
            sorted by correlation (descending). Returns None if only one coin is provided.
        """
        # If only one coin, correlation is not defined
        if isinstance(self.id, str):
            print("Correlation analysis requires at least two coins.")
            return None

        # Use saved_tables to build percent change DataFrame
        first = True
        for id, table in self.saved_tables.items():
            # First iteration creates initial dataframe
            if first:
                coin_comparison = table[['percent_change']].rename(columns={'percent_change': id})
                first = False
            else:
                # Subsequent iterations join the new table to the existing dataframe on index
                slice = table[['percent_change']].rename(columns={'percent_change': id})
                coin_comparison = coin_comparison.join(slice)

        # Drop rows with any NaN to ensure proper alignment
        coin_comparison = coin_comparison.dropna()

        # Calculate the correlation matrix
        corr_matrix = coin_comparison.corr()

        # Save all unique combinations of coin ids
        pairs = combinations(self.id, 2)

        # Prepare data for DataFrame
        data = []
        for coin1, coin2 in pairs:
            corr = corr_matrix.at[coin1, coin2]
            data.append({'coin1': coin1, 'coin2': coin2, 'correlation': corr})

        # Save the correlation matrix to a variable
        correlation_ranking = pd.DataFrame(data, columns=['coin1', 'coin2', 'correlation']).sort_values(by=['correlation'], ascending=False)
        correlation_ranking = correlation_ranking.dropna()
        correlation_ranking = correlation_ranking.reset_index(drop=True)

        return correlation_ranking

    def plot(self):
        """
        Plots the price chart for the coin or coins using the reformatted market chart data.

        If self.id is a list, plots each coin's price chart in a separate subplot.
        If self.id is a string, plots a single price chart.

        Returns
        -------
        None
            Displays the plot(s) using matplotlib.
        """
        # If id is a list, plot each coin in a separate subplot
        if isinstance(self.id, list):
            num_coins = len(self.id)
            fig, axes = plt.subplots(num_coins, 1, figsize=(12, 5 * num_coins), sharex=False)
            if num_coins == 1:
                axes = [axes]  # Make axes iterable if only one coin

            for ax, coin in zip(axes, self.id):
                # Plot daily close prices
                df = self.saved_tables[coin]
                ax.plot(df.index, df['daily_close'], marker='o')
                ax.set_title(f"{coin.capitalize()} Price Chart")
                ax.set_xlabel("Date")
                ax.set_ylabel(f"Price ({self.vs_currency.upper()})")
                ax.grid()

            plt.tight_layout()
            plt.show()
        else:
            # Single coin plot
            df = self.reformat_data()
            plt.figure(figsize=(12, 6))
            plt.plot(df.index, df['daily_close'], marker='o')
            plt.title(f"{self.id.capitalize()} Price Chart")
            plt.xlabel("Date")
            plt.ylabel(f"Price ({self.vs_currency.upper()})")
            plt.grid()
            plt.show()
            plt.show()

if __name__ == "__main__":
    # Sample analysis
    coin_list = top_50_coins[:5]

    coin_analysis = analyze_coin_market_chart(id = coin_list)

    print(coin_analysis.correlation_analysis())

Loaded chart for bitcoin from file.
Saved chart for ethereum...
Saved chart for binancecoin...
Saved chart for solana...
Saved chart for ripple...

5 tables saved!

         coin1        coin2  correlation
0      bitcoin     ethereum     0.774233
1      bitcoin       solana     0.755403
2     ethereum       solana     0.743742
3     ethereum  binancecoin     0.682354
4      bitcoin  binancecoin     0.631612
5  binancecoin       solana     0.623139
6      bitcoin       ripple     0.585716
7       solana       ripple     0.584233
8     ethereum       ripple     0.575306
9  binancecoin       ripple     0.436286


In [30]:
def top_coins(limit = 5):
    top_coins_by_market_cap = CoinGeckoAPI().get_coins_markets(
        vs_currency='usd',
        order='market_cap_desc',
        per_page=limit,
        page=1,
        sparkline=False
    )

    top = [record['id'] for record in top_coins_by_market_cap]

    return top

top_coins()

['bitcoin', 'ethereum', 'ripple', 'tether', 'binancecoin']

In [34]:
# Sample analysis

coin_list = top_coins(12)

coin_analysis = analyze_coin_market_chart(id = coin_list)

coin_analysis.correlation_analysis()

Loaded chart for bitcoin from file.
Loaded chart for ethereum from file.
Loaded chart for ripple from file.
Loaded chart for tether from file.
Loaded chart for binancecoin from file.
Loaded chart for solana from file.
Loaded chart for usd-coin from file.
Loaded chart for staked-ether from file.
Loaded chart for dogecoin from file.
Loaded chart for tron from file.
Loaded chart for cardano from file.
Saved chart for wrapped-steth...
12 tables saved


Unnamed: 0,coin1,coin2,correlation
0,ethereum,staked-ether,0.999577
1,staked-ether,wrapped-steth,0.995156
2,ethereum,wrapped-steth,0.995014
3,bitcoin,ethereum,0.77488
4,bitcoin,staked-ether,0.774121
5,bitcoin,wrapped-steth,0.769367
6,bitcoin,solana,0.755372
7,ethereum,solana,0.742149
8,solana,staked-ether,0.739
9,solana,wrapped-steth,0.732911


In [12]:
# Configuration
coin_id = 'bitcoin'

# Save bitcoin price table and save as df
chart_retrieval = get_coin_market_chart_as_df(id = coin_id)
df = chart_retrieval.reformat_data()

# Calculate average percent change
mean_pct_change = df['percent_change'].mean()
std_pct_change = df['percent_change'].std()

# Display average percent change
print(f'Using data from the last {chart_retrieval.days} days', '\n')
print('Bitcoin price table:', '\n')
print(df.tail(), '\n')
print(f"Average percent change: {mean_pct_change:.2f}%")
print(f"Standard deviation of percent change: {std_pct_change:.2f}%")

NameError: name 'get_coin_market_chart_as_df' is not defined

In [139]:
items = ['bitcoin', 'ethereum', 'solana', 'dogecoin']          # input list (already unique)
pairs = combinations(items, 2) # every unordered pair

for pair in pairs:
    print(pair)

('bitcoin', 'ethereum')
('bitcoin', 'solana')
('bitcoin', 'dogecoin')
('ethereum', 'solana')
('ethereum', 'dogecoin')
('solana', 'dogecoin')


In [29]:
type(12)

int