In [12]:
import pandas as pd
import cbpro
from datetime import datetime, timedelta

# Make using PublicClient Methods easier
public_client = cbpro.PublicClient()

In [13]:
# Get list of crypto currencies available on CoinbasePro
# Store ids in a list
currencies = public_client.get_currencies()

currency_ids = []

for currency in currencies:
    currency_ids.append(currency['id'])

print(f"Number of currencies: {len(currency_ids)}")
currency_ids[:10]

Number of currencies: 97


['ALGO', 'DASH', 'OXT', 'ATOM', 'KNC', 'MIR', 'REP', 'ICP', 'CGLD', 'COMP']

The CoinbasePro API only allows for 300 candles maximum per call to get_product_historic_rates().
Therefore, we will need to perform multiple calls to get data from all days where the crypto 
was available to trade. After some searching, GDAX (former name for Coinbase Pro) was launched in 2015
and Ethereum was launched in 2015 as well. So we will start at 1/1/2015 and work up to today's date 


In [14]:
# Create function that allows for parameterization during the API call
def get_end_date(start_date,days):
    """ Given a start date in datetime format, 
        calculate the end date in datetime format
        based on the number of days provided."""
    
    end_date = start_date + timedelta(days=days)
    return end_date

In [15]:
# The CoinbasePro API only allows for 300 candles maximum per call to get_product_historic_rates()
# Therefore, will need to perform multiple calls to get data from all days where the crypto 
# was available to trade. After some searching, GDAX (former name for Coinbase Pro) was launched in 2015
# and Ethereum was launched in 2015 as well. So we will start at 1/1/2015 and work up to today's date 

# Containers
historical_data = {}
symbol = []
time = []
low = []
high = []
opn = []
close = []
volume = []

# Parameters
start = '2015-01-01'
time_delta = 300
granularity = 86400 #Daily candles
today = datetime.now()

# Pull the data and save to DataFrame
for c in currency_ids:
    
    start_date = datetime.strptime(start,'%Y-%m-%d')
    
    while start_date <= today:
        
        end_date = get_end_date(start_date,time_delta)
        
        # Adjust for end_date being in the future
        if end_date > today:
            end_date = today

        historic_rates = public_client.get_product_historic_rates(
            f"{c}-USD", 
            start=datetime.strftime(start_date,'%Y-%m-%d'), 
            end=datetime.strftime(end_date,'%Y-%m-%d'), 
            granularity=granularity
        )

        # Handle case where there is no data available
        # Return from API will be in dict format if no data is availble
        if type(historic_rates) is not dict: 
            try:
                for day in historic_rates:
                    symbol.append(c)
                    time.append((datetime.fromtimestamp(day[0]) + timedelta(days=1)))
                    low.append(day[1])
                    high.append(day[2])
                    opn.append(day[3])
                    close.append(day[4])
                    volume.append(day[5])
            except Exception as e:
                print('c')
                print(day[0])
                print(e)

        # Calculate new start date to use
        start_date = end_date + timedelta(days=1)

# Create DataFrame
data = {
    'symbol':symbol,
    'date':time,
    'high':high,
    'low':low,
    'open':opn,
    'close':close,
    'volume':volume
}
df = pd.DataFrame(data)

In [None]:
df[df['Symbol'] == 'ETH'].sort_values('Date').head()

Unnamed: 0,Symbol,Date,High,Low,Open,Close,Volume
15316,ETH,2016-05-18 19:00:00,14.93,12.5,12.5,13.18,482.521827
15315,ETH,2016-05-19 19:00:00,14.9,13.0,13.18,14.9,950.441205
15314,ETH,2016-05-20 19:00:00,14.82,13.71,14.82,14.17,254.166617
15313,ETH,2016-05-23 19:00:00,13.91,13.61,13.86,13.61,180.786712
15312,ETH,2016-05-24 19:00:00,13.74,12.0,13.68,12.77,2753.239298


In [18]:
df.to_csv('/Volumes/develop/projects/crypto-analysis/output/cbpro_data.csv', index=False)