In [1]:
import pandas as pd
import numpy as np
from datetime import date
import requests
import pickle
import re
import time
import json
import json.decoder
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from typing import List

In [2]:
def makeCMCApiCall(url: str, params: dict, retries: int=3) -> Optional[Dict[str, Any]]:
    """ makes an API call to CoinGecko using the provided url and parameters.
    
    Args:
        url (str): The API endpoint URL to call.
        params (dict): A dictionary of parameters to include in the API call.
        retries (int): The number of times to retry the API call if it fails. Default is 3.
        
    Returns:
        response.json() (dict): the data from the api response, or None if the api call failed.
    """
    for attempt in range(retries):
        try:
            response = requests.get(url, params=params, timeout=3)
        except requests.exceptions.Timeout:
            # Timeout error, retry after a short delay
            print('The API call timed out, retrying...')
            time.sleep(1)
            continue
        
        if response.ok:
            try:
                return response.json()
            except json.decoder.JSONDecodeError as e:
                print(f'Error decoding JSON response: {str(e)}')
        else:
            # There was an error, retry after a short delay
            print(f'The API call failed with status code {response.status_code}, retrying...')
            time.sleep(0.5)

    print('The api call failed after 3 attempts.')
    return None

In [None]:
def pullAssetCovariates(base_url: str, base_params: dict, gecko_id_universe: list, panel_df: pd.DataFrame) -> pd.DataFrame:
    """ Pull various asset covariates for a given universe of CoinGecko IDs.

    Args:
        base_url (str): The base URL for the Coingecko API.
        base_params (dict): A dictionary containing the basic parameters for the Coingecko API call.
        gecko_id_universe (list): A list of unique gecko ids to pull.
        panel_df (pd.DataFrame): panel data with columns 'date', 'asset_gecko', 'usd_per_token_cg', 
                                 'usd_mcap_cg', and 'usd_volume_24h_cg'.

    Returns:
        asset_covars_df (pd.DataFrame): panel data with asset covariates.
    """

    # set up object to store all
    gecko_covars_dict = {'date':[],
                        'asset_gecko': [],
                        'twitter_followers': [],
                        'reddit_average_posts_48h': [],
                        'reddit_average_comments_48h': [],
                        'reddit_subscribers': [],
                        'reddit_accounts_active_48h': [],
                        'forks': [],
                        'stars': [],
                        'subscribers': [],
                        'total_issues': [],
                        'closed_issues': [],
                        'pull_requests_merged': [],
                        'pull_request_contributors': [],
                        'code_additions_4_weeks': [],
                        'code_deletions_4_weeks': [],
                        'commit_count_4_weeks': [],
                        'alexa_rank': []}

    # loop over assets to pull
    for i in range(len(gecko_id_universe)):
        # set current id to pull
        gecko_id = gecko_id_universe[i]

        # monitor progress
        print(f"Processing id #{i+1} ({(i+1)/len(gecko_id_universe)*100:.2f}%): {gecko_id}")
        
        # set up endpoint
        endpoint = f"/coins/{gecko_id}/history"
        url = f"{base_url}{endpoint}"

        # set up params
        params = base_params.copy()
        params['id'] = gecko_id

        # extract dates for this asset
        first_date = np.min(panel_df[panel_df.asset_gecko==gecko_id]['date']).strftime(format='%d-%m-%Y')
        last_date  = np.max(panel_df[panel_df.asset_gecko==gecko_id]['date']).strftime(format='%d-%m-%Y')
        all_dates  = getDateList(first_date, last_date)

        for current_date in all_dates:
            # update params
            params['date'] = current_date

            # make the call
            response_json = makeCMCApiCall(url, params)

            # add data to results dict
            gecko_covars_dict['date'].append(np.datetime64(datetime.strptime(current_date, '%d-%m-%Y')+timedelta(days=1), 'D'))
            gecko_covars_dict['asset_gecko'].append(response_json['id'])
            if 'community_data' in response_json.keys():
                gecko_covars_dict['twitter_followers'].append(response_json['community_data']['twitter_followers'])
                gecko_covars_dict['reddit_average_posts_48h'].append(response_json['community_data']['reddit_average_posts_48h'])
                gecko_covars_dict['reddit_average_comments_48h'].append(response_json['community_data']['reddit_average_comments_48h'])
                gecko_covars_dict['reddit_subscribers'].append(response_json['community_data']['reddit_subscribers'])
                gecko_covars_dict['reddit_accounts_active_48h'].append(response_json['community_data']['reddit_accounts_active_48h'])  
            else:
                gecko_covars_dict['twitter_followers'].append(None)
                gecko_covars_dict['reddit_average_posts_48h'].append(None)
                gecko_covars_dict['reddit_average_comments_48h'].append(None)
                gecko_covars_dict['reddit_subscribers'].append(None)
                gecko_covars_dict['reddit_accounts_active_48h'].append(None)
            if 'developer_data' in response_json.keys():
                gecko_covars_dict['forks'].append(response_json['developer_data']['forks'])
                gecko_covars_dict['stars'].append(response_json['developer_data']['stars'])
                gecko_covars_dict['subscribers'].append(response_json['developer_data']['subscribers'])
                gecko_covars_dict['total_issues'].append(response_json['developer_data']['total_issues'])
                gecko_covars_dict['closed_issues'].append(response_json['developer_data']['closed_issues'])
                gecko_covars_dict['pull_requests_merged'].append(response_json['developer_data']['pull_requests_merged'])
                gecko_covars_dict['pull_request_contributors'].append(response_json['developer_data']['pull_request_contributors'])
                gecko_covars_dict['code_additions_4_weeks'].append(response_json['developer_data']['code_additions_deletions_4_weeks']['additions'])
                gecko_covars_dict['code_deletions_4_weeks'].append(response_json['developer_data']['code_additions_deletions_4_weeks']['deletions'])
                gecko_covars_dict['commit_count_4_weeks'].append(response_json['developer_data']['commit_count_4_weeks'])
            else:
                gecko_covars_dict['forks'].append(None)
                gecko_covars_dict['stars'].append(None)
                gecko_covars_dict['subscribers'].append(None)
                gecko_covars_dict['total_issues'].append(None)
                gecko_covars_dict['closed_issues'].append(None)
                gecko_covars_dict['pull_requests_merged'].append(None)
                gecko_covars_dict['pull_request_contributors'].append(None)
                gecko_covars_dict['code_additions_4_weeks'].append(None)
                gecko_covars_dict['code_deletions_4_weeks'].append(None)
                gecko_covars_dict['commit_count_4_weeks'].append(None)
            if 'public_interest_stats' in response_json.keys():
                gecko_covars_dict['alexa_rank'].append(response_json['public_interest_stats']['alexa_rank'])
            else:
                gecko_covars_dict['alexa_rank'].append(None)

            # Space out the calls and increment counter
            time.sleep(0.2)

    # convert to df to return
    asset_covars_df = pd.DataFrame(gecko_covars_dict)

    # clean up the data
    asset_covars_df = asset_covars_df[~asset_covars_df.duplicated(subset=['date', 'gecko_id'])]

    return asset_covars_df

In [None]:
if __name__ == "__main__":
    # set args
    api_fp = '../../admin/coingecko.txt'
    base_url = "https://pro-api.coingecko.com/api/v3"

    # import api key and set base parameters
    with open(api_fp) as f:
        API_KEY = f.readlines()
        API_KEY = API_KEY[0].strip()
    base_params = {'x_cg_pro_api_key': API_KEY}

    # Test it is working
    url = f"{base_url}/ping"
    r = requests.get(url, params=base_params)
    print(r.json()['gecko_says'])

    # TODO pull in the cg universe from the inclusion crit stuff
    
    # pull additional asset covariates
    asset_covars_df = pullAssetCovariates(base_url, base_params, gecko_id_universe, panel_df)

    # TODO SAVE

In [None]:
# TODO 
# come back to pull the asset covariates for only our final panel of data b/c it is so many API calls
# --move the code around to do so