# Downloading Data
In this notebook we will download historical data via the Bitfinex API. We will be following this article: https://medium.com/coinmonks/how-to-get-historical-crypto-currency-data-954062d40d2d

but will make some alterations for our needs. 

We will be mining 1 year of data with each datapoint consisting of 30min timeframes. The data will be in the format of OHLCV which we will use to then create our candlestick chart dataset. 

This notebook will only be for mining data, the following will be used for the creation of our images & normalization. 

In [1]:
import bitfinex
import pandas as pd
import time
import datetime

In [3]:
# Creating a function to make several calls. 
# The API limits 60 calls a minute, but I have experienced lower amount of calls get reject
# Those we will make a single call to the API every 5 seconds
# Each call will retrieve one-weeks worth of data 

In [31]:
def fetch_data(start, stop, symbol, interval, tick_limit, step):
    # instantiating instance
    api_v2 = bitfinex.bitfinex_v2.api_v2()
    data = [] # will hold all data
    start = start - step
    while start < stop:
        # time window
        start = start + step
        end = start + step
        
        # getting data
        res = api_v2.candles(symbol=symbol, interval=interval, limit=tick_limit, start=start, end=end)
        data.extend(res)
        time.sleep(5) # sleeping for 5 seconds
        
    # returning 
    return data

In [32]:
m30 = 60000 * 30
h1 = m30 * 2
d1 = h1 * 24
w1 = d1 * 7 # our timestep in miliseconds

In [39]:
# Setting our hyper parameters
time_step = w1

# start and end
t_start = datetime.datetime(2019, 5, 24, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2019, 6, 23, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

pair = 'btcusd'
bin_size = '30m'

limit = w1 / m30 # limit to a week of calls

In [40]:
# Making our call
pair_data = fetch_data(start=t_start, stop=t_stop, symbol=pair, interval=bin_size, tick_limit=limit, step=time_step)

No keys, only access to public API functions


## Second Attempt

In [2]:
api_v2 = bitfinex.bitfinex_v2.api_v2()

No keys, only access to public API functions


In [3]:
# Parameters
pair = 'btcusd'
bin_size = '30m'
limit = 5000

In [4]:
# one
t_start = datetime.datetime(2017, 1, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2017, 3, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg1 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [5]:
# two
t_start = datetime.datetime(2017, 3, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2017, 5, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg2 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [6]:
# three
t_start = datetime.datetime(2017, 5, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2017, 7, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg3 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [7]:
# four
t_start = datetime.datetime(2017, 7, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2017, 9, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg4 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [8]:
# five
t_start = datetime.datetime(2017, 9, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2017, 11, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg5 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [9]:
# six
t_start = datetime.datetime(2017, 11, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 1, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg6 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [10]:
# seven
t_start = datetime.datetime(2018, 1, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 3, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg7 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [11]:
# eight
t_start = datetime.datetime(2018, 3, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 5, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg8 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [12]:
# nine
t_start = datetime.datetime(2018, 5, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 7, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg9 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [13]:
# ten
t_start = datetime.datetime(2018, 7, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 9, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg10 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [14]:
# 11
t_start = datetime.datetime(2018, 9, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2018, 11, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg11 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [15]:
# 12
t_start = datetime.datetime(2018, 11, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2019, 1, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg12 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [16]:
# 13
t_start = datetime.datetime(2019, 1, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2019, 3, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg13 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [17]:
# 14
t_start = datetime.datetime(2019, 3, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2019, 5, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg14 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [18]:
# 15
t_start = datetime.datetime(2019, 5, 1, 0, 0)
t_start = time.mktime(t_start.timetuple()) * 1000

t_stop = datetime.datetime(2019, 7, 1, 0, 0)
t_stop = time.mktime(t_stop.timetuple()) * 1000

seg15 = api_v2.candles(symbol=pair, interval=bin_size, limit=limit, start=t_start, end=t_stop)

In [19]:
# Load each into a pandas dataframe
names = ['time', 'open', 'close', 'high', 'low', 'volume']

df1 = pd.DataFrame(seg1, columns=names)
df2 = pd.DataFrame(seg2, columns=names)
df3 = pd.DataFrame(seg3, columns=names)
df4 = pd.DataFrame(seg4, columns=names)
df5 = pd.DataFrame(seg5, columns=names)
df6 = pd.DataFrame(seg6, columns=names)
df7 = pd.DataFrame(seg7, columns=names)
df8 = pd.DataFrame(seg8, columns=names)
df9 = pd.DataFrame(seg9, columns=names)
df10 = pd.DataFrame(seg10, columns=names)
df11 = pd.DataFrame(seg11, columns=names)
df12 = pd.DataFrame(seg12, columns=names)
df13 = pd.DataFrame(seg13, columns=names)
df14 = pd.DataFrame(seg14, columns=names)

In [20]:
# concatting all dataframes
datastore = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14], ignore_index=True)

In [21]:
datastore.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40797 entries, 0 to 40796
Data columns (total 6 columns):
time      40797 non-null int64
open      40797 non-null float64
close     40797 non-null float64
high      40797 non-null float64
low       40797 non-null float64
volume    40797 non-null float64
dtypes: float64(5), int64(1)
memory usage: 1.9 MB


In [22]:
# Dropping duplicates
datastore.drop_duplicates(inplace=True)

In [23]:
datastore.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40784 entries, 0 to 40795
Data columns (total 6 columns):
time      40784 non-null int64
open      40784 non-null float64
close     40784 non-null float64
high      40784 non-null float64
low       40784 non-null float64
volume    40784 non-null float64
dtypes: float64(5), int64(1)
memory usage: 2.2 MB


In [24]:
# Set index as time and sort asscendingly
datastore['time'] = pd.to_datetime(datastore['time'], unit='ms')
datastore.set_index('time', inplace=True)
datastore.sort_index(inplace=True)
datastore.reset_index(inplace=True)

In [25]:
# let's check if it works
datastore.head()

Unnamed: 0,time,open,close,high,low,volume
0,2017-01-01 05:00:00,962.98,962.98,963.0,962.11,29.46695
1,2017-01-01 05:30:00,962.9,964.88,965.2,962.56,61.575102
2,2017-01-01 06:00:00,963.9,962.83,964.95,962.72,27.068546
3,2017-01-01 06:30:00,963.49,967.45,967.45,963.36,78.595934
4,2017-01-01 07:00:00,966.48,966.67,967.41,966.48,21.279508


In [26]:
datastore.tail()

Unnamed: 0,time,open,close,high,low,volume
40779,2019-05-01 02:00:00,5619.370256,5632.2,5643.0,5619.258079,289.576023
40780,2019-05-01 02:30:00,5632.3,5620.6,5632.3,5620.6,84.910372
40781,2019-05-01 03:00:00,5622.3,5619.3,5629.7,5615.1,50.869072
40782,2019-05-01 03:30:00,5619.3,5619.1,5623.0,5591.9,229.368454
40783,2019-05-01 04:00:00,5619.1,5625.08833,5625.1,5615.7,66.739223


In [27]:
# Eveyrthing looks good, let's save it! 
import os

In [28]:
file_name = 'btcusd_ohlc_2017_2019.csv'
path = './post_processing/'

datastore.to_csv(f'{path}/{file_name}', index=False)