In [None]:
import pandas as pd
import time
import json
import os
import requests

def get_top_coins(limit, category=None):
    """Function to grab top `limit` coins from coingecko's main page. You can also provide a category that is defaulted to none. Outputs a list of token ids."""
    url = "https://api.coingecko.com/api/v3/coins/markets"
    params = {
        "vs_currency": "usd",
        "order": "market_cap_desc",
        "per_page": 250,
        "page": 1,
    }
    if category: # If category provided, add it to the list of parameters.
        params['category']=category

    all_coins=[]
    for page in range(1, (limit//250)+2):
        params["page"]=page
        response = requests.get(url,params=params)
        if response.status_code==200:
            all_coins.extend(response.json())
            print(f'Page {page} successful')
        else:
            print(f"Error fetching page {page}: {response.status_code}")
    coin_list = [coin['id'] for coin in all_coins[:limit]] # Get just the top N = limit
    return coin_list

def get_category_list():
    """Function to grab a list of categories available to search using the above function to grab coin ids."""
    category_list=requests.get("https://api.coingecko.com/api/v3/coins/categories/list").json()
    return category_list

def fetch_market_data(coin_ids, path, days=365, currency="usd"):
    """ For all coins in coin list, grab market data last 365 days. 
    Greater than 365 days will provide error 401 permission denied - not available at the free API endpoint.
    currency defaults to usd
    must provide a path for files to be saved. Saves to path with naming convention "coin_gecko"+token_id
    returns a concatenated dataset of all successful coins and a list of failed coins for future runs.
    """
    base_url = "https://api.coingecko.com/api/v3/coins/{}/market_chart"
    data = {}
    failed = []
    fail_counter=0
    for coin in coin_ids:
        url = base_url.format(coin)
        params = {"vs_currency": currency, "days": days, "interval": "daily"}
        response = requests.get(url, params=params)
        if response.status_code == 200:
            print(f"Successfully loaded {coin}")
            market_data = response.json()
            prices = market_data.get("prices", [])
            market_caps = market_data.get("market_caps", [])
            volume = market_data.get("total_volumes")
            df = pd.DataFrame({
                "timestamp": [entry[0] for entry in prices],
                "price": [entry[1] for entry in prices],
                "market_cap": [entry[1] for entry in market_caps],
                "volume": [entry[1] for entry in volume]
            })
            df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
            data[coin] = df
            file_path = fr'{path}\coin_gecko_{coin}.csv'
            df.to_csv(file_path, encoding='utf-8', index=False, header=True)
            fail_counter=0
        else:
            print(f"Failed to fetch data for {coin}: {response.status_code}")
            failed.append(coin)
            time.sleep(61)
            fail_counter+=1
            if fail_counter>10:
                print("Failed to retrieve tokens, operation aborted")
                break
        time.sleep(1)
    return data, failed

def get_failed_list(token_list, path):
    """ 
    Takes list of tokens and checks local storage to see which tokens aren't there
    must provide path where token data is stored
    Returns list of missing tokens and list of currently stored tokens
    """
    files = os.listdir(path)
    succeeded_list = [i.replace('coin_gecko_','') for i in files]
    succeeded_list = [i.replace('.csv','') for i in succeeded_list]
    failed_list = [i for i in token_list if i not in succeeded_list]
    return failed_list, succeeded_list

def compile_files(name, path):
    """ 
    takes in a name for your final dataframe and a path where the coin data is stored.
    returns dataframe of all tokens compiled. Filter this dataset after the fact, the operation is very fast so filtering is more straightforward post.
    """

    files = os.listdir(path)
    df = pd.DataFrame(columns={'timestamp':[],'price':[],'market_cap':[],'volume':[],'token':[]})
    for i, j in enumerate(files):
        new_path = fr'{path}\{j}'
        dd=pd.read_csv(new_path)
        tname=j.replace('coin_gecko_','')
        tname=tname.replace('.csv','')
        dd['token'] = tname
        df = pd.concat([df,dd])
    new_path = fr'{path}\{name}.csv'
    df.to_csv(new_path, encoding='utf-8', index=False, header=True)
    return df

def repeat_get_token_data(limit,path,cat):
    """ 
    Runs fetch_market_data on a loop until the fail list is empty or until fail list equals same length as last iteration then quits loop. 
    Saves files to path, doesn't return proper dataframe. 
    """
    # Get list of tokens 
    coin_list=get_top_coins(limit, cat)
    coin_list=get_failed_list(coin_list, path)[0]

    # Begin searching. Repeat until there are no tokens left that I don't already have.
    print(f'Will begin retreiving {len(coin_list)} coins after 60 seconds')
    time.sleep(60)

    self_destruct=True
    while len(coin_list)>0:
        sd=len(coin_list)
        coin_list = fetch_market_data(coin_list, path)[1] # Call the function, which activates the data retrieval process. Once it's done you'll have a list of failed (index 1) then keep running the operation until that list is empty.
        self_destruct=(sd-len(coin_list))>=1 # If the list is the same length as it was when it started, exit the loop, you're no longer making progress.
        print(f'{len(coin_list)} tokens failed')
        if self_destruct:
            print("Repeating Operation")
        else:
            print(f"Unable to grab last {len(coin_list)} tokens")
            break

In [None]:
# Arguments
limit = 500
path = fr'C:\Users\Name\files_etc'
cat = 'gaming'
repeat_get_token_data(limit,path,cat)