# Automating Crypto Website API Pull

This project demonstrates how to automate the extraction of cryptocurrency data from a website API using Python. It covers setting up the API connection, sending requests, and processing the returned JSON data. 

The project includes code to handle potential errors, ensure data integrity, and efficiently store the retrieved information for further analysis or real-time updates. This automation is ideal for tasks like tracking price changes, portfolio management, or feeding data into a larger trading system. 

The Python libraries used include requests for API calls and pandas for data manipulation.

In [31]:
# Getting the data from https://coinmarketcap.com/api/ as per documentation

from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json

url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
parameters = {
  'start':'1',
  'limit':'30',
  'convert':'USD'
}
headers = {
  'Accepts': 'application/json',
  'X-CMC_PRO_API_KEY': '5a18d06a-f78b-4b2b-8d87-50c2485a1c91',
}

session = Session()
session.headers.update(headers)

try:
  response = session.get(url, params=parameters)
  data = json.loads(response.text)
  print(data)
except (ConnectionError, Timeout, TooManyRedirects) as e:
  print(e)

#NOTE:
# When running this code I encounter an error that limitates the data rate
# To fix it I had to go in and put "jupyter notebook --NotebookApp.iopub_data_rate_limit=1e10"
# into the Anaconda Prompt and use the provided local host URL to keep working
# After that the code was able to run as expected

{'status': {'timestamp': '2024-08-15T07:42:31.498Z', 'error_code': 0, 'error_message': None, 'elapsed': 21, 'credit_count': 1, 'notice': None, 'total_count': 10023}, 'data': [{'id': 1, 'name': 'Bitcoin', 'symbol': 'BTC', 'slug': 'bitcoin', 'num_market_pairs': 11633, 'date_added': '2010-07-13T00:00:00.000Z', 'tags': ['mineable', 'pow', 'sha-256', 'store-of-value', 'state-channel', 'coinbase-ventures-portfolio', 'three-arrows-capital-portfolio', 'polychain-capital-portfolio', 'binance-labs-portfolio', 'blockchain-capital-portfolio', 'boostvc-portfolio', 'cms-holdings-portfolio', 'dcg-portfolio', 'dragonfly-capital-portfolio', 'electric-capital-portfolio', 'fabric-ventures-portfolio', 'framework-ventures-portfolio', 'galaxy-digital-portfolio', 'huobi-capital-portfolio', 'alameda-research-portfolio', 'a16z-portfolio', '1confirmation-portfolio', 'winklevoss-capital-portfolio', 'usv-portfolio', 'placeholder-ventures-portfolio', 'pantera-capital-portfolio', 'multicoin-capital-portfolio', 'par

In [33]:
type(data)

dict

In [35]:
import pandas as pd

# Allowing the columns from this dataset to be displayed in full

pd.set_option('display.max_columns', None)

In [37]:
# Normalizing the data for better overview

df = pd.json_normalize(data['data'])

# Adding a timestamp to the dataset to keep track of the data collection

df['timestamp'] = pd.to_datetime('now')
df

Unnamed: 0,id,name,symbol,slug,num_market_pairs,date_added,tags,max_supply,circulating_supply,total_supply,infinite_supply,platform,cmc_rank,self_reported_circulating_supply,self_reported_market_cap,tvl_ratio,last_updated,quote.USD.price,quote.USD.volume_24h,quote.USD.volume_change_24h,quote.USD.percent_change_1h,quote.USD.percent_change_24h,quote.USD.percent_change_7d,quote.USD.percent_change_30d,quote.USD.percent_change_60d,quote.USD.percent_change_90d,quote.USD.market_cap,quote.USD.market_cap_dominance,quote.USD.fully_diluted_market_cap,quote.USD.tvl,quote.USD.last_updated,platform.id,platform.name,platform.symbol,platform.slug,platform.token_address,timestamp
0,1,Bitcoin,BTC,bitcoin,11633,2010-07-13T00:00:00.000Z,"[mineable, pow, sha-256, store-of-value, state...",21000000.0,1.973998e+07,1.973998e+07,False,,1,,,,2024-08-15T07:40:00.000Z,57972.579648,3.253490e+10,12.5358,-0.368119,-4.727788,1.075420,-7.988078,-12.494066,-12.350869,1.144377e+12,55.6517,1.217424e+12,,2024-08-15T07:40:00.000Z,,,,,,2024-08-15 08:42:38.651678
1,1027,Ethereum,ETH,ethereum,9210,2015-08-07T00:00:00.000Z,"[pos, smart-contracts, ethereum-ecosystem, coi...",,1.202770e+08,1.202770e+08,True,,2,,,,2024-08-15T07:39:00.000Z,2611.776025,1.592308e+10,5.3451,-0.501807,-4.127623,8.014539,-23.058498,-26.712605,-13.673078,3.141367e+11,15.2899,3.141367e+11,,2024-08-15T07:39:00.000Z,,,,,,2024-08-15 08:42:38.651678
2,825,Tether USDt,USDT,tether,94168,2015-02-25T00:00:00.000Z,"[stablecoin, asset-backed-stablecoin, avalanch...",,1.165322e+11,1.180603e+11,True,,3,,,,2024-08-15T07:39:00.000Z,1.000176,5.437938e+10,10.2363,0.017222,-0.033487,-0.023522,-0.033945,0.081678,-0.041556,1.165527e+11,5.6680,1.180811e+11,,2024-08-15T07:39:00.000Z,1027.0,Ethereum,ETH,ethereum,0xdac17f958d2ee523a2206206994597c13d831ec7,2024-08-15 08:42:38.651678
3,1839,BNB,BNB,bnb,2208,2017-07-25T00:00:00.000Z,"[marketplace, centralized-exchange, payments, ...",,1.459363e+08,1.459363e+08,False,,4,,,,2024-08-15T07:39:00.000Z,515.916751,1.621371e+09,-5.0565,-0.371449,-2.047567,6.064996,-8.975936,-14.875279,-10.608008,7.529100e+10,3.6685,7.529100e+10,,2024-08-15T07:39:00.000Z,,,,,,2024-08-15 08:42:38.651678
4,5426,Solana,SOL,solana,716,2020-04-10T00:00:00.000Z,"[pos, platform, solana-ecosystem, cms-holdings...",,4.668354e+08,5.822595e+08,True,,5,,,,2024-08-15T07:39:00.000Z,141.765633,2.751697e+09,29.7328,0.024867,-2.391038,-8.510384,-8.649242,-1.984186,-16.033400,6.618121e+10,3.2212,8.254439e+10,,2024-08-15T07:39:00.000Z,,,,,,2024-08-15 08:42:38.651678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,6210,The Sandbox,SAND,the-sandbox,572,2020-08-05T00:00:00.000Z,"[collectibles-nfts, content-creation, gaming, ...",,2.284232e+09,3.000000e+09,False,,96,,,,2024-08-15T07:40:00.000Z,0.254786,4.582054e+07,17.8305,-0.236351,-2.531881,2.323087,-22.637337,-33.738734,-42.687852,5.819910e+08,0.0283,7.643589e+08,,2024-08-15T07:40:00.000Z,1027.0,Ethereum,ETH,ethereum,0x3845badAde8e6dFF049820680d1F14bD3903a5d0,2024-08-15 08:42:38.651678
96,7334,Conflux,CFX,conflux-network,172,2020-11-11T00:00:00.000Z,"[mineable, pow, platform, collectibles-nfts, d...",,4.332154e+09,5.032154e+09,True,,97,4.332152e+09,5.803262e+08,,2024-08-15T07:39:00.000Z,0.133958,4.084236e+07,49.2668,-0.117851,-3.479544,9.160980,-20.094176,-27.563065,-39.622190,5.803266e+08,0.0282,6.740972e+08,,2024-08-15T07:39:00.000Z,,,,,,2024-08-15 08:42:38.651678
97,30171,Ethena,ENA,ethena,178,2024-03-29T04:42:08.000Z,"[defi, binance-labs-portfolio, okx-ventures-po...",,1.806250e+09,1.500000e+10,False,,98,1.479139e+09,4.653969e+08,,2024-08-15T07:39:00.000Z,0.314640,4.569885e+07,-8.6862,-0.288980,-5.994366,12.992832,-31.199054,-57.399586,-55.650559,5.683192e+08,0.0276,4.719606e+09,,2024-08-15T07:39:00.000Z,1027.0,Ethereum,ETH,ethereum,0x57e114B691Db790C35207b2e685D4A43181e6061,2024-08-15 08:42:38.651678
98,7080,Gala,GALA,gala,452,2020-09-16T00:00:00.000Z,"[collectibles-nfts, gaming, binance-smart-chai...",,3.397325e+10,3.599741e+10,False,,99,3.717357e+10,6.140462e+08,,2024-08-15T07:39:00.000Z,0.016518,6.671969e+07,25.4565,-0.085470,-1.159135,-2.054862,-30.125423,-49.646663,-63.000096,5.611822e+08,0.0273,5.946180e+08,,2024-08-15T07:39:00.000Z,1027.0,Ethereum,ETH,ethereum,0xd1d2Eb1B1e90B638588728b4130137D262C87cae,2024-08-15 08:42:38.651678


In [8]:
# Automating the process through a function that will run over and over again
# and append the new data to the data it already has
# NOTE: This is the final and debugged code, after running into a few issues. See cell below for
# error log and debugging techniques.

import os
from time import sleep
import json
import pandas as pd
from requests import Session, ConnectionError, Timeout, TooManyRedirects

def api_runner():
    global df

    # Initialize an empty DataFrame if df doesn't exist
    if 'df' not in globals():
        df = pd.DataFrame()

    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    parameters = {
        'start': '1',
        'limit': '100',
        'convert': 'USD'
    }
    headers = {
        'Accepts': 'application/json',
        'X-CMC_PRO_API_KEY': '5a18d06a-f78b-4b2b-8d87-50c2485a1c91',
    }

    session = Session()
    session.headers.update(headers)

    try:
        response = session.get(url, params=parameters)
        data = json.loads(response.text)
        print("API request successful.")
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(f"API request failed: {e}")
        return  # Exit the function if an error occurs

    # Check if data is received correctly
    if not data or 'data' not in data:
        print("No data received from the API.")
        return

    df2 = pd.json_normalize(data['data'])
    df2['timestamp'] = pd.to_datetime('now')
    print(f"Data normalized. Number of records fetched: {len(df2)}")

    # Concatenate new data with existing DataFrame
    df = pd.concat([df, df2], ignore_index=True)
    print(f"Data concatenated. Total records in DataFrame: {len(df)}")

    # Correct CSV file path
    csv_file_path = r"C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI.csv"

    # Write to CSV: create new file or append to existing file
    if not os.path.isfile(csv_file_path):
        df.to_csv(csv_file_path, index=False, mode='w', header=True)
        print(f"CSV file created at {csv_file_path}")
    else:
        df.to_csv(csv_file_path, index=False, mode='a', header=False)
        print(f"Data appended to existing CSV file at {csv_file_path}")

# Script to run the API pull multiple times
for i in range(333):
    print(f"Starting API Runner iteration {i+1}")
    api_runner()
    print('API Runner completed successfully')
    sleep(60)  # Sleep for 1 minute

Starting API Runner iteration 1
API request successful.
Data normalized. Number of records fetched: 100
Data concatenated. Total records in DataFrame: 103
CSV file created at C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI.csv
API Runner completed successfully
Starting API Runner iteration 2
API request successful.
Data normalized. Number of records fetched: 100
Data concatenated. Total records in DataFrame: 203
Data appended to existing CSV file at C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI.csv
API Runner completed successfully
Starting API Runner iteration 3
API request successful.
Data normalized. Number of records fetched: 100
Data concatenated. Total records in DataFrame: 303
Data appended to existing CSV file at C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI.csv
API Runner completed successfully


KeyboardInterrupt: 

In [6]:
# I encounter an issue trying to create a csv file to append the data when running the previous script
# To troubleshoot I created a test file creation separately to check if the issue persists and it did not
# Therefore it confirms that the csv creation logic is correct and the issue lies in the integration
# with the API data and the circumstance of when the CSV is being written. To fix that I have to debug the 
# the API script and update. The debugging steps I'm going to take is to add debugging statements
# to track where the process might be failing or stopping. And also add a data validation.
# For a more detail on the troubleshooting I created a documentation file included in my Python Repository on Github.

import os
import pandas as pd

# Dummy DataFrame
data = {
    'Column1': [1, 2, 3],
    'Column2': ['A', 'B', 'C']
}
df = pd.DataFrame(data)

# Correct CSV file path
csv_file_path = r"C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI_test.csv"

# Write to CSV: create new file or append to existing file
if not os.path.isfile(csv_file_path):
    df.to_csv(csv_file_path, index=False, mode='w', header=True)
    print(f"CSV file created at {csv_file_path}")
else:
    df.to_csv(csv_file_path, index=False, mode='a', header=False)
    print(f"Data appended to existing CSV file at {csv_file_path}")


CSV file created at C:\Users\gusta\Desktop\Dri - Projects\CryptoAPI_test.csv
