# GET DATA

In [5]:
import pandas as pd
import math
import os.path
import time
from binance.client import Client
from datetime import timedelta, datetime
from dateutil import parser
from tqdm import tqdm_notebook #(Optional, used for progress-bars)
import json

In [27]:
with open('secrets/binance.secrets','r') as f:
    secrets = json.loads(f.read())

In [33]:
binance_client = Client(api_key=secrets['api_key'], api_secret=secrets['api_secret'])

In [40]:
binsizes = {"1m": 1, "15m":15, "5m": 5, "1h": 60, "1d": 1440}

In [37]:
def minutes_of_new_data(symbol, time_interval, data):
    if len(data) > 0:  
        old = parser.parse(data["timestamp"].iloc[-1])
    else:
        old = datetime.strptime('1 Jan 2019', '%d %b %Y')
        
    new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=time_interval)[-1][0], unit='ms')
    return old, new

def get_all_binance(symbol, time_interval, save = False):
    filename = 'data/%s-%s-data.csv' % (symbol, time_interval)
    if os.path.isfile(filename): 
        data_df = pd.read_csv(filename)
    else: 
        data_df = pd.DataFrame()
        
    oldest_point, newest_point = minutes_of_new_data(symbol, time_interval, data_df)
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[time_interval])
    
    if oldest_point == datetime.strptime('1 Jan 2019', '%d %b %Y'): 
        print('Downloading all available %s data for %s. Be patient..!' % (time_interval, symbol))
    else: 
        print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, time_interval))
    binance_response = binance_client.get_historical_klines(symbol, 
                                                  time_interval, 
                                                  oldest_point.strftime("%d %b %Y %H:%M:%S"), 
                                                  newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(binance_response, 
                        columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    if len(data_df) > 0:
        data_df = data_df.append(data)
    else: 
        data_df = data
    data_df.set_index('timestamp', inplace=True)
    if save: 
        data_df.to_csv(filename)
    print('All caught up..!')
    return data_df

In [38]:
_ = get_all_binance('BTCUSDT', '1d', True)

Downloading 2020320 minutes of new data available for BTCUSDT, i.e. 1403 instances of 1d data.
All caught up..!


In [39]:
_ = get_all_binance('BTCUSDT', '1h', True)

Downloading 2021340 minutes of new data available for BTCUSDT, i.e. 33689 instances of 1h data.
All caught up..!


In [41]:
_ = get_all_binance('BTCUSDT', '15m', True)

Downloading 2021385 minutes of new data available for BTCUSDT, i.e. 134759 instances of 15m data.
All caught up..!


In [42]:
_ = get_all_binance('BTCUSDT', '5m', True)

Downloading 2021395 minutes of new data available for BTCUSDT, i.e. 404279 instances of 5m data.
All caught up..!
