# BTC Data download
<a href=https://medium.com/swlh/retrieving-full-historical-data-for-every-cryptocurrency-on-binance-bitmex-using-the-python-apis-27b47fd8137f>Code Source

### To do:
[ ] Add older data?
    - Bitcoin hasn't been easily tradeable for a while so 4 years probably enough
[ ] VWAP - what is it for?
[ ] MACD - ML to choose these?
[ ] 

In [80]:
# Imports
import pandas as pd
import os
from dotenv import find_dotenv, load_dotenv
import math
import time
import datetime as dt
from bitmex import bitmex
from binance.client import Client
from datetime import timedelta, datetime
from dateutil import parser
from tqdm.notebook import tqdm#(Optional, used for progress-bars)

In [2]:
# Load API keys & secrets
load_dotenv(find_dotenv())
binance_api_key = os.environ.get('BINANCE_API_KEY')
binance_api_secret = os.environ.get('BINANCE_API_SECRET')
bitmex_api_key = os.environ.get('BITMEX_API_KEY')
bitmex_api_secret = os.environ.get('BITMEX_API_KEY')

In [3]:
# Constants
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750
binance_client = Client(api_key=binance_api_key, api_secret=binance_api_secret)
bitmex_client = bitmex(test=False, api_key=bitmex_api_key, api_secret=bitmex_api_secret)



In [4]:
### FUNCTIONS
def minutes_of_new_data(symbol, kline_size, data, source):
    if len(data) > 0:  old = parser.parse(data["timestamp"].iloc[-1])
    elif source == "binance": old = datetime.strptime('1 Jan 2017', '%d %b %Y')
    elif source == "bitmex": old = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=False).result()[0][0]['timestamp']
    if source == "binance": new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
    if source == "bitmex": new = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=True).result()[0][0]['timestamp']
    return old, new

In [112]:
# Get full historical data - ONLY NEEDED ONCE OR
# Set start date in milliseconds
start_str = str(dt.datetime(2017,1,1).timestamp())

In [160]:
# Use a generator to get full price history
def get_full_historical_data(symbol, interval, start_str):
    kline_data = [x for x in binance_client.get_historical_klines_generator(symbol, interval=interval, start_str=start_str)]

    market_data = pd.DataFrame(kline_data)

    print(f'num rows: {market_data.shape}')

    market_data.columns = [['timestamp',
                           'open',
                           'high',
                           'low',
                           'close',
                           'volume',
                           'close_timestamp',
                           'quote_asset_volume',
                           'no_trades',
                           'taker_buy_base_asset_volume',
                           'taker_buy_quote_asset_volume',
                           'ignore']]

    try:
        market_data.to_csv(f'binance_{symbol}_{interval}_{dt.datetime.fromtimestamp(float(start_str)).strftime("%Y-%m-%d")}.csv', index=False)
    except:
        market_data.to_csv(f'{symbol}_1m.csv', index=False)

In [4]:

def get_all_binance(symbol, kline_size, save = False):
    filename = '%s-%s-data.csv' % (symbol, kline_size)
    if os.path.isfile(filename): 
        data_df = pd.read_csv(filename)
    else: 
        data_df = pd.DataFrame()

    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "binance")
    
    delta_min = (newest_point - oldest_point).total_seconds()/60
    
    available_data = math.ceil(delta_min/binsizes[kline_size])
    
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'): 
        print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
    else: 
        print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))

    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    
    data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else: 
        data_df = data

    data_df.set_index('timestamp', inplace=True)
    
    if save: 
        data_df.to_csv(filename)
    
    print('All caught up..!')
    
    return data_df

In [15]:
# TEST TEST TEST

dt.datetime.strptime('2014-01-01 00:00:00:000', )

TypeError: strptime() takes exactly 2 arguments (1 given)

In [6]:
binance_client.get_historical_klines(symbol='BTCUSDT', interval='1d', start_str=pd.todatetime())

TypeError: get_historical_klines() missing 1 required positional argument: 'start_str'

In [6]:
# TEST TEST TEST
# binance_client.get_klines()
binance_client.get_historical_klines_generator(symbol='BTCUSDT', interval='1d', start_str='')

TypeError: get_historical_klines() missing 1 required positional argument: 'start_str'

In [None]:
# binance data in months
def get_all_binance_monthly(symbol, kline_size, data, source):
    filename = f'{symbol}-{kline_size}-data.csv'
    if os.path.isfile(filename):
        data_df = pd.read_csv('filename')

In [4]:
def get_all_bitmex(symbol, kline_size, save = False):
    filename = '%s-%s-data.csv' % (symbol, kline_size)
    if os.path.isfile(filename): data_df = pd.read_csv(filename)
    else: data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "bitmex")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    rounds = math.ceil(available_data / batch_size)
    if rounds > 0:
        print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data in %d rounds.' % (delta_min, symbol, available_data, kline_size, rounds))
        for round_num in tqdm_notebook(range(rounds)):
            time.sleep(1)
            new_time = (oldest_point + timedelta(minutes = round_num * batch_size * binsizes[kline_size]))
            data = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=batch_size, startTime = new_time).result()[0]
            temp_df = pd.DataFrame(data)
            data_df = data_df.append(temp_df)
    data_df.set_index('timestamp', inplace=True)
    if save and rounds > 0: data_df.to_csv(filename)
    print('All caught up..!')
    return data_df

In [None]:
# Get all binance data
binance_symbols = ["BTCUSDT", "ETHBTC", "XRPBTC"]
for symbol in binance_symbols:
    get_all_binance(symbol, '1m', save = True)

In [None]:
# Get all BitMex data
bitmex_symbols = ["XBTUSD", "ETHM19", "XRPM19"]
for symbol in bitmex_symbols:
    get_all_bitmex(symbol, '1m', save = True)