This noteboook aims to retrieve bitcoin prices minutely, hourly and daily from the Binance API and save results into a csv.

In [1]:
import pandas as pd
import math
import os
from binance.client import Client
from datetime import datetime
from dateutil import parser
import config

In [2]:
### CONSTANTS
symbol = 'BTCUSDT'
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 950
binance_client = Client(api_key=config.API_KEY, api_secret=config.API_SECRET)

In [3]:
### FUNCTIONS
def minutes_of_new_data(symbol, kline_size, data):
    """Set a new, old dates for the crypto extraction to retrieve new data."""
    if len(data) > 0:
        old = parser.parse(data["timestamp"].iloc[-1])
    else:
        old = datetime.strptime('1 Jan 2021', '%d %b %Y')
    new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')

    return old, new


def get_all_binance(symbol, kline_size, save=False):
    """It gets historical btc/usd prices from the binance API
    
    Parameters
    ----------
    symbol : str. Capital letters
    kline_size : str. Period of the prices in return
    save : bool
    """
    filename = 'C:/Users/Javi/Desktop/cryptocurrency_predictor/data/crypto/%s_%s_data.csv' % (symbol, kline_size)

    if os.path.isfile(filename):
        data_df = pd.read_csv(filename)

    else:
        data_df = pd.DataFrame()

    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df)

    delta_min = (newest_point - oldest_point).total_seconds() / 60
    available_data = math.ceil(delta_min / binsizes[kline_size])

    if oldest_point == datetime.strptime('1 Jan 2021', '%d %b %Y'):
        print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
    else:
        print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol,
                                                                                                      available_data,
                                                                                                      kline_size))

    klines = binance_client.get_historical_klines(symbol,
                                                  kline_size,
                                                  oldest_point.strftime("%d %b %Y %H:%M:%S"),
                                                  newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines,
                        columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av',
                                 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')

    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else:
        data_df = data

    data_df.set_index('timestamp', inplace=True)
    if save:
        data_df.drop(['quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'], axis=1).to_csv(filename)

    print('All caught up..!')
    # return data_df.drop(['quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore'], axis=1)

In [4]:
get_all_binance(symbol=symbol, kline_size='1m', save=True)

Downloading all available 1m data for BTCUSDT. Be patient..!
All caught up..!


In [5]:
get_all_binance(symbol=symbol, kline_size='5m', save=True)

Downloading all available 5m data for BTCUSDT. Be patient..!
All caught up..!


In [6]:
get_all_binance(symbol=symbol, kline_size='1h', save=True)

Downloading all available 1h data for BTCUSDT. Be patient..!
All caught up..!


### First look at the data

In [7]:
btc_h = pd.read_csv('C:/Users/Javi/Desktop/cryptocurrency_predictor/data/crypto/BTCUSDT_1h_data.csv')

In [8]:
btc_h

Unnamed: 0,timestamp,open,high,low,close,volume,close_time
0,2021-01-01 00:00:00,28923.63,29031.34,28690.17,28995.13,2311.811445,1609462799999
1,2021-01-01 01:00:00,28995.13,29470.00,28960.35,29409.99,5403.068471,1609466399999
2,2021-01-01 02:00:00,29410.00,29465.26,29120.03,29194.65,2384.231560,1609469999999
3,2021-01-01 03:00:00,29195.25,29367.00,29150.02,29278.40,1461.345077,1609473599999
4,2021-01-01 04:00:00,29278.41,29395.00,29029.40,29220.31,2038.046803,1609477199999
...,...,...,...,...,...,...,...
1447,2021-03-02 08:00:00,48331.60,49222.00,48270.69,48858.08,2996.196635,1614675599999
1448,2021-03-02 09:00:00,48861.23,49265.54,48861.22,49144.23,2269.415485,1614679199999
1449,2021-03-02 10:00:00,49144.22,49263.95,48550.00,48721.09,2242.273409,1614682799999
1450,2021-03-02 11:00:00,48721.09,49047.06,48550.00,48844.13,1837.035863,1614686399999
