In [81]:
from binance.client import Client
import os
import time
import pandas as pd
from datetime import timedelta, datetime
from dateutil import parser
import math
from tqdm import tqdm_notebook


from binance.websockets import BinanceSocketManager
from twisted.internet import reactor

# To run the script you need to get Binance API KEY.

API_KEY = 'YOU NEED TO WRITE API KEY HERE'
API_SECRET = 'YOU NEED TO WRITE API SECRET HERE'

binance_client = Client(API_KEY, API_SECRET)

In [82]:
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750


def dates_of_data(symbol, kline_size, data, source, date_from, date_to, date_to_isToday=True):
    if len(data) > 0:
        old = parser.parse(data["Timestamp"].iloc[-1])
    elif source == "binance": 
        old = datetime.strptime(date_from, '%d %b %Y')
    if source == "binance" and date_to_isToday == True: 
        new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
    else:
        new = datetime.strptime(date_to, '%d %b %Y')
    print(old)
    print(new)

    return old, new


def get_binance_history(symbol, kline_size, date_from, date_to, date_to_isToday=True, save = False):
    name_string = 'data/%s-%s-%sto%sdata.csv'
    filename = (name_string % (symbol, kline_size, date_from, date_to)).replace(' ', '')
    if os.path.isfile(filename):
        data_df = pd.read_csv(filename)
    else:
        data_df = pd.DataFrame()
    
    oldest_point, newest_point = dates_of_data(symbol, kline_size, data_df, "binance", date_from, date_to, date_to_isToday)
    delta_min = (newest_point - oldest_point).total_seconds() / 60
    available_data = math.ceil(delta_min / binsizes[kline_size])
    
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'):
        print('Waiting to download %s data for %s.' % (kline_size, symbol))
    else:
        print('Waiting to download %d minutes of data for %s, %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))
    
    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines, columns = ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close_time', 'Quote_av', 'Trades', 'Tb_base_av', 'Tb_quote_av', 'Ignore' ])
    data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='ms')
    
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else:
        data_df = data
    data_df.set_index('Timestamp', inplace=True)
    
    # if we want to save the data, save it.
    if save:
        data_df.to_csv(filename)
    print('Finished.')
    return data_df



In [84]:
data = get_binance_history("BTCUSDT", "1m", save=True, date_from='1 Jun 2020', date_to='1 Dec 2020', date_to_isToday=False )
df = pd.read_csv('data/BTCUSDT-1m-1Jun2020to1Dec2020data.csv')
df.head()

df.tail(10)

2020-12-01 00:00:00
2020-12-01 00:00:00
Waiting to download 0 minutes of data for BTCUSDT, 0 instances of 1m data.
Finished.


Unnamed: 0,Timestamp,Open,High,Low,Close,Volume,Close_time,Quote_av,Trades,Tb_base_av,Tb_quote_av,Ignore
263242,2020-11-30 23:52:00,19744.0,19744.99,19740.22,19744.48,60.573448,1606780379999,1195970.0,986,41.37069,816828.7,0
263243,2020-11-30 23:53:00,19744.49,19744.99,19699.52,19718.28,119.123054,1606780439999,2349155.0,1683,68.558198,1351917.0,0
263244,2020-11-30 23:54:00,19718.28,19736.03,19625.05,19641.37,97.284917,1606780499999,1915027.0,2217,37.528715,739252.8,0
263245,2020-11-30 23:55:00,19641.37,19697.58,19637.04,19663.31,112.950287,1606780559999,2221748.0,2162,65.823884,1294934.0,0
263246,2020-11-30 23:56:00,19663.25,19672.28,19635.0,19656.01,80.342827,1606780619999,1579052.0,1776,34.951323,686954.5,0
263247,2020-11-30 23:57:00,19656.01,19686.37,19650.0,19655.99,58.163503,1606780679999,1143874.0,1151,29.788851,585816.0,0
263248,2020-11-30 23:58:00,19656.1,19679.0,19647.83,19658.33,84.698943,1606780739999,1665254.0,1482,31.676067,622790.0,0
263249,2020-11-30 23:59:00,19658.33,19700.0,19656.82,19695.87,136.845571,1606780799999,2693831.0,1708,95.473753,1879199.0,0
263250,2020-12-01 00:00:00,19695.87,19720.0,19675.43,19715.93,136.104435,1606780859999,2681137.0,1828,72.113618,1420672.0,0
263251,2020-12-01 00:00:00,19695.87,19720.0,19675.43,19715.93,136.104435,1606780859999,2681137.0,1828,72.113618,1420672.0,0


In [85]:
# Check is there a missing value
print('Is there a missing value? - ',df.isnull().values.any())

Is there a missing value? -  False


In [86]:
# Drop columns that we won't use
df = df.drop('Close_time', 1)
df = df.drop('Quote_av', 1)
df = df.drop('Trades', 1)
df = df.drop('Tb_base_av', 1)
df = df.drop('Tb_quote_av', 1)
df = df.drop('Ignore', 1)

df.tail(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume
263242,2020-11-30 23:52:00,19744.0,19744.99,19740.22,19744.48,60.573448
263243,2020-11-30 23:53:00,19744.49,19744.99,19699.52,19718.28,119.123054
263244,2020-11-30 23:54:00,19718.28,19736.03,19625.05,19641.37,97.284917
263245,2020-11-30 23:55:00,19641.37,19697.58,19637.04,19663.31,112.950287
263246,2020-11-30 23:56:00,19663.25,19672.28,19635.0,19656.01,80.342827
263247,2020-11-30 23:57:00,19656.01,19686.37,19650.0,19655.99,58.163503
263248,2020-11-30 23:58:00,19656.1,19679.0,19647.83,19658.33,84.698943
263249,2020-11-30 23:59:00,19658.33,19700.0,19656.82,19695.87,136.845571
263250,2020-12-01 00:00:00,19695.87,19720.0,19675.43,19715.93,136.104435
263251,2020-12-01 00:00:00,19695.87,19720.0,19675.43,19715.93,136.104435


In [87]:
from sklearn.preprocessing import MinMaxScaler


#Columns of price data to use
columns = ['Close', 'Open', 'High', 'Low', 'Volume']

scaler = MinMaxScaler()
# normalization
for c in columns:
    df[c] = scaler.fit_transform(df[c].values.reshape(-1,1))
    
df.tail(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume
263242,2020-11-30 23:52:00,0.991386,0.989236,0.995493,0.991225,0.016859
263243,2020-11-30 23:53:00,0.991431,0.989236,0.991778,0.988839,0.033289
263244,2020-11-30 23:54:00,0.989043,0.98842,0.984982,0.981835,0.027161
263245,2020-11-30 23:55:00,0.982036,0.984918,0.986076,0.983833,0.031557
263246,2020-11-30 23:56:00,0.984029,0.982613,0.98589,0.983168,0.022407
263247,2020-11-30 23:57:00,0.983369,0.983896,0.987259,0.983166,0.016183
263248,2020-11-30 23:58:00,0.983378,0.983225,0.987061,0.983379,0.023629
263249,2020-11-30 23:59:00,0.983581,0.985138,0.987881,0.986798,0.038262
263250,2020-12-01 00:00:00,0.987001,0.98696,0.98958,0.988625,0.038054
263251,2020-12-01 00:00:00,0.987001,0.98696,0.98958,0.988625,0.038054
