In [2]:
import requests        # for making http requests to binance
import json            # for parsing what binance sends back to us
import pandas as pd    # for storing and manipulating the data we get back
import numpy as np     # numerical python, i usually need this somewhere 
                       # and so i import by habit nowadays

import matplotlib.pyplot as plt # for charts and such
    
import datetime as dt  # for dealing with times

In [3]:
root_url = 'https://api.binance.com/api/v1/klines'

In [4]:
def get_bars(symbol, interval = '1h', limit=500, starttime='', endtime=''):
    url = root_url + '?symbol=' + symbol + '&interval=' + interval + '&limit=' + str(limit)
    if starttime:
        url = url + '&startTime=' + str(starttime)
    if endtime:
        url = url + '&endTime=' + str(endtime)
    data = json.loads(requests.get(url).text)
    try:
        df = pd.DataFrame(data)
    except:
        raise ValueError(data)
    df.columns = ['open_time',
                  'o', 'h', 'l', 'c', 'v',
                  'close_time', 'qav', 'num_trades',
                  'taker_base_vol', 'taker_quote_vol', 'ignore']
    df.index = [dt.datetime.fromtimestamp(x/1000.0) for x in df.close_time]
    return df

In [116]:
df = get_bars("BNBBTC", interval='15m', limit=1000, starttime=1500004800000, endtime='')

In [117]:
len(df)

1000

In [118]:
df.head()

Unnamed: 0,open_time,o,h,l,c,v,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
2017-07-14 13:29:59.999,1500005700000,4.086e-05,4.898e-05,4e-05,4.747e-05,1259082.0,1500006599999,56.06023368,305,943579.0,41.91450043,84713908.4404695
2017-07-14 13:44:59.999,1500006600000,4.747e-05,5.48e-05,4.747e-05,5.348e-05,1468616.0,1500007499999,76.15190292,295,1173502.0,60.49395309,85284586.8034695
2017-07-14 13:59:59.999,1500007500000,5.3e-05,5.348e-05,4.746e-05,4.955e-05,977907.0,1500008399999,49.83045683,218,372278.0,19.65486654,86413164.7844695
2017-07-14 14:14:59.999,1500008400000,4.946e-05,4.946e-05,4.48e-05,4.491e-05,625622.0,1500009299999,29.46253842,153,113307.0,5.45375234,86369375.5344695
2017-07-14 14:29:59.999,1500009300000,4.491e-05,4.52e-05,3.9e-05,4.395e-05,619113.0,1500010199999,25.27678474,123,177194.0,7.30468249,86339365.2544695


In [100]:
df.tail()

Unnamed: 0,open_time,o,h,l,c,v,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
2017-07-24 21:59:59.999,1500900300000,4.201e-05,4.21e-05,4.2e-05,4.209e-05,7947.0,1500901199999,0.33422454,17,1915.0,0.08061958,100045602.2878995
2017-07-24 22:14:59.999,1500901200000,4.209e-05,4.224e-05,4.2e-05,4.218e-05,22332.0,1500902099999,0.94069304,21,20998.0,0.88465282,100040614.2878995
2017-07-24 22:29:59.999,1500902100000,4.216e-05,4.218e-05,4.208e-05,4.211e-05,7952.0,1500902999999,0.33503502,22,4852.0,0.20446648,100048799.2878995
2017-07-24 22:44:59.999,1500903000000,4.216e-05,4.225e-05,4.214e-05,4.222e-05,36773.0,1500903899999,1.55194888,30,31338.0,1.32278706,100048799.2878995
2017-07-24 22:59:59.999,1500903900000,4.228e-05,4.23e-05,4.221e-05,4.229e-05,17608.0,1500904799999,0.74418614,23,10034.0,0.42421623,100048799.2878995


In [129]:
22332.00000000 * 0.00004209

0.93995388

In [74]:
dt.datetime.fromtimestamp(1500000000000/1000).strftime("%A, %B %d, %Y %I:%M:%S")

'Friday, July 14, 2017 11:40:00'

In [112]:
repr(df['open_time'][-1])

'1500903900000'

In [131]:
dfs = []
starttime = 1500004800000

l = 1000
i = 0
while l == 1000:
# for _ in range(5):
    df = get_bars("BNBBTC", interval='15m', limit=1000, starttime=starttime, endtime='')
    dfs.append(df)
    l = len(df)
    starttime = int(df['open_time'][-1]) + 1
    i += 1
    print(i, end='\r')

70

In [124]:
len(dfs[0])

1000

In [125]:
len(dfs[1])

1000

In [127]:
len(pd.concat(dfs))

5000

In [133]:
len(dfs[-1])

812

In [134]:
huge_df = pd.concat(dfs)

In [135]:
len(huge_df)

69812

In [138]:
huge_df.to_hdf('binance_data.h5', 'STW')

In [1]:
import pandas as pd

In [5]:
df = pd.read_hdf('TradingGym/dataset/binance_data.h5', 'STW')
df.head()

Unnamed: 0,datetime,open_time,o,h,l,c,v,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
0,2017-07-14 13:14:59.999,1500004800000,5e-05,5.3e-05,1e-05,4e-05,3148043.0,1500005699999,130.301971,451,2669352.0,111.184044,84396656.0
1,2017-07-14 13:29:59.999,1500005700000,4.1e-05,4.9e-05,4e-05,4.7e-05,1259082.0,1500006599999,56.060234,305,943579.0,41.914501,84713912.0
2,2017-07-14 13:44:59.999,1500006600000,4.7e-05,5.5e-05,4.7e-05,5.3e-05,1468616.0,1500007499999,76.151901,295,1173502.0,60.493954,85284584.0
3,2017-07-14 13:59:59.999,1500007500000,5.3e-05,5.3e-05,4.7e-05,5e-05,977907.0,1500008399999,49.830456,218,372278.0,19.654867,86413168.0
4,2017-07-14 14:14:59.999,1500008400000,4.9e-05,4.9e-05,4.5e-05,4.5e-05,625622.0,1500009299999,29.462538,153,113307.0,5.453753,86369376.0


In [13]:
idx = int(len(df)*0.05)
df_train = df.iloc[:-idx]
df_test = df.iloc[-idx:]

In [14]:
df_train.shape

(66322, 13)

In [15]:
df_test.shape

(3490, 13)

In [16]:
df_train.head()

Unnamed: 0,datetime,open_time,o,h,l,c,v,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
0,2017-07-14 13:14:59.999,1500004800000,5e-05,5.3e-05,1e-05,4e-05,3148043.0,1500005699999,130.301971,451,2669352.0,111.184044,84396656.0
1,2017-07-14 13:29:59.999,1500005700000,4.1e-05,4.9e-05,4e-05,4.7e-05,1259082.0,1500006599999,56.060234,305,943579.0,41.914501,84713912.0
2,2017-07-14 13:44:59.999,1500006600000,4.7e-05,5.5e-05,4.7e-05,5.3e-05,1468616.0,1500007499999,76.151901,295,1173502.0,60.493954,85284584.0
3,2017-07-14 13:59:59.999,1500007500000,5.3e-05,5.3e-05,4.7e-05,5e-05,977907.0,1500008399999,49.830456,218,372278.0,19.654867,86413168.0
4,2017-07-14 14:14:59.999,1500008400000,4.9e-05,4.9e-05,4.5e-05,4.5e-05,625622.0,1500009299999,29.462538,153,113307.0,5.453753,86369376.0


In [17]:
df_test.head()

Unnamed: 0,datetime,open_time,o,h,l,c,v,close_time,qav,num_trades,taker_base_vol,taker_quote_vol,ignore
66322,2019-06-09 04:14:59.999,1560020400000,0.004029,0.004037,0.004028,0.004034,34306.710938,1560021299999,138.321274,733,19665.849609,79.293716,0.0
66323,2019-06-09 04:29:59.999,1560021300000,0.004034,0.004039,0.004029,0.004039,32998.460938,1560022199999,133.169037,802,21544.519531,86.948921,0.0
66324,2019-06-09 04:44:59.999,1560022200000,0.004039,0.004046,0.004035,0.004045,45193.289062,1560023099999,182.580551,1051,24448.050781,98.771172,0.0
66325,2019-06-09 04:59:59.999,1560023100000,0.004043,0.004047,0.004037,0.004043,38223.019531,1560023999999,154.570938,723,22162.119141,89.618576,0.0
66326,2019-06-09 05:14:59.999,1560024000000,0.004044,0.004047,0.00404,0.004043,41289.191406,1560024899999,166.992157,645,28049.970703,113.444862,0.0


In [18]:
df_train.to_hdf('TradingGym/dataset/binance_data_train.h5', 'STW')
df_test.to_hdf('TradingGym/dataset/binance_data_test.h5', 'STW')

---

In [8]:
df = get_bars("BNBBTC", interval='15m', limit=1000, starttime=1563160500001, endtime='')

In [9]:
len(df)

286

##Reference
https://steemit.com/python/@marketstack/how-to-download-historical-price-data-from-binance-with-python