Import data from Deribit.com

(This code was run on Google Colab)

In [None]:
# Install the package for using Deribit's API
!pip install websockets

In [None]:
# Import packages
import asyncio
import websockets
import json
import pandas as pd
import datetime as dt
import time
import nest_asyncio
nest_asyncio.apply()

async def call_api(msg):
   async with websockets.connect('wss://www.deribit.com/ws/api/v2') as websocket:
       await websocket.send(msg)
       while websocket.open:
           response = await websocket.recv()
           return response

def async_loop(api, message):
    return asyncio.get_event_loop().run_until_complete(api(message))

def retrieve_historic_data(start, end, instrument, timeframe):
    msg = \
        {
            "jsonrpc": "2.0",
            "id": 833,
            "method": "public/get_tradingview_chart_data",
            "params": {
                "instrument_name": instrument,
                "start_timestamp": start,
                "end_timestamp": end,
                "resolution": timeframe
            }
        }
    resp = async_loop(call_api, json.dumps(msg))
    return resp

def json_to_dataframe(json_resp):
    res = json.loads(json_resp)
    df = pd.DataFrame(res['result'])
    df['ticks'] = df.ticks / 1000
    df['timestamp'] = [dt.datetime.utcfromtimestamp(date) for date in df.ticks]
    return df

def get_data(date1, instrument, tf):
    n_days = (dt.datetime.now() - date1).days
    df_master = pd.DataFrame()
    d1 = date1
    for _ in range(n_days):
        d2 = d1 + dt.timedelta(days=1)
        t1 = dt.datetime.timestamp(d1)*1000
        t2 = dt.datetime.timestamp(d2) * 1000
        json_resp = retrieve_historic_data(t1, t2, instrument, tf)
        temp_df = json_to_dataframe(json_resp)
        df_master = df_master.append(temp_df)
        print(f'collected data for dates: {d1.isoformat()} to {d2.isoformat()}')
        print('sleeping for 2 seconds')
        time.sleep(2)
        d1 = d2
    return df_master

if __name__ == '__main__':
    start = dt.datetime(2019, 1, 1, 0, 0) # Data start date
    instrument = "BTC-PERPETUAL" # Instrument whose data we want
    tf = "1" # Frequency of the data retrieved. "1" means 1 minute
    df_master = get_data(start, instrument, tf)
    df_master.to_csv('/content/drive/MyDrive/Colab_Notebooks/Data/name_dataset.csv') # Directory in which save the dataset

Save last 20% of the dataset as test set for Backtesting purposes

In [None]:
df = df_master[802081-160416:] # Last 20%
df.to_csv('/content/drive/MyDrive/Colab_Notebooks/Data/name_testset.csv')