# Example Notebook: Using YIEDL's Crypto Datasets for Numerai Crypto Tournament

This simple notebook covering the basic steps to download crypto datasets from Numerai and YIEDL.

## Install Dependencies

In [1]:
!pip install numerapi requests pyarrow fastparquet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


## Download the Numerai Crypto Datasets

In [2]:
# Initiate Numerai API connection
from numerapi import NumerAPI
napi = NumerAPI()

In [3]:
# Download the Numerai training data to the current directory
import os
napi.download_dataset(filename = "crypto/v1.0/train_targets.parquet", 
                      dest_path = os.getcwd() + "/numerai_train_targets.parquet")

2024-10-24 09:04:16,856 INFO numerapi.utils: target file already exists
2024-10-24 09:04:16,857 INFO numerapi.utils: download complete


'/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_train_targets.parquet'

In [4]:
# Download the Numerai live crypto universe to the current directory
import os
napi.download_dataset(filename = "crypto/v1.0/live_universe.parquet", 
                      dest_path = os.getcwd() + "/numerai_live_universe.parquet")

2024-10-24 09:04:17,968 INFO numerapi.utils: target file already exists
2024-10-24 09:04:17,969 INFO numerapi.utils: download complete


'/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_live_universe.parquet'

## Import and Display the Numerai Crypto Datasets

In [5]:
import pandas as pd

In [6]:
# Load the display the Numerai training targets
df_numerai_targets = pd.read_parquet("numerai_train_targets.parquet")

In [7]:
display(df_numerai_targets)

Unnamed: 0,symbol,date,target
0,0xBTC,2020-06-01,0.00
1,ABBC,2020-06-01,0.50
2,ABT,2020-06-01,0.50
3,ADA,2020-06-01,0.75
4,AE,2020-06-01,0.50
...,...,...,...
496,ZEN,2024-09-17,0.50
497,ZENT,2024-09-17,0.75
498,ZETA,2024-09-17,0.25
499,ZIG,2024-09-17,0.50


In [8]:
# Load and display the Numerai live universe
df_numerai_universe = pd.read_parquet("numerai_live_universe.parquet")
display(df_numerai_universe)

Unnamed: 0,symbol
0,BTC
12,ETH
49,BNB
198,SOL
2,XRP
...,...
846,CRTS
1027,HGPT
802,BZZ
819,LIKE


## Download YIEDL Crypto Datasets

In [9]:
# Helper Function
import requests

def download_file(url, output_filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded successfully as {output_filename}")
    else:
        print("Failed to download file")

In [9]:
# Download YIEDL crypto latest dataset to current directory
url = 'https://api.yiedl.ai/yiedl/v1/downloadDataset?type=latest'
output_filename = 'yiedl_latest.parquet'
download_file(url, output_filename)

File downloaded successfully as yiedl_latest.parquet


In [10]:
# Download YIEDL crypto historical dataset to current directory
# NOTE: it is a huge file in zip format. We need to unzip it afterwards
url = 'https://api.yiedl.ai/yiedl/v1/downloadDataset?type=historical'
output_filename = 'yiedl_historical.zip'
download_file(url, output_filename)

File downloaded successfully as yiedl_historical.zip


In [11]:
# Unzip and rename the file
!unzip -p yiedl_historical.zip > yiedl_historical.parquet

## Import and Display the YIEDL Crypto Datasets

In [10]:
# Load and display the YIEDL historical crypto dataset
df_yield_historical = pd.read_parquet("yiedl_historical.parquet",
                                      engine = "pyarrow",
                                      dtype_backend = "numpy_nullable")                                      

In [13]:
# Check dtypes
df_yield_historical.dtypes

date                    object
symbol          string[python]
pvm_0001                  Int8
pvm_0002                  Int8
pvm_0003                  Int8
                     ...      
onchain_1649              Int8
onchain_1650              Int8
onchain_1651              Int8
onchain_1652              Int8
onchain_1653              Int8
Length: 3671, dtype: object

In [14]:
# Display
display(df_yield_historical)

Unnamed: 0,date,symbol,pvm_0001,pvm_0002,pvm_0003,pvm_0004,pvm_0005,pvm_0006,pvm_0007,pvm_0008,...,onchain_1644,onchain_1645,onchain_1646,onchain_1647,onchain_1648,onchain_1649,onchain_1650,onchain_1651,onchain_1652,onchain_1653
0,2013-04-28,BTC,,,,,,,,,...,,,,,,,,,,
1,2013-04-28,LTC,,,,,,,,,...,,,,,,,,,,
2,2013-04-28,NMC,,,,,,,,,...,,,,,,,,,,
3,2013-04-28,PPC,,,,,,,,,...,,,,,,,,,,
4,2013-04-29,BTC,0,67,67,33,,33,67,67,...,100,100,100,100,100,100,100,100,100,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3783101,2024-09-30,vSXP,79,87,16,31,,32,51,49,...,,,,,,,,,,
3783102,2024-09-30,vUSDC,56,72,77,83,,67,78,77,...,,,,,,,,,,
3783103,2024-09-30,vUSDT,56,77,76,82,,82,79,78,...,,,,,,,,,,
3783104,2024-09-30,vXVS,71,88,25,40,,42,56,60,...,,,,,,,,,,


In [15]:
# Load and display the YIEDL latest crypto dataset
df_yield_latest = pd.read_parquet("yiedl_latest.parquet", 
                                  engine = "pyarrow",
                                  dtype_backend = "numpy_nullable")

In [16]:
# Check dtypes
df_yield_latest.dtypes

date            object
symbol          object
pvm_0001          Int8
pvm_0002          Int8
pvm_0003          Int8
                 ...  
onchain_1649      Int8
onchain_1650      Int8
onchain_1651      Int8
onchain_1652      Int8
onchain_1653      Int8
Length: 3671, dtype: object

In [17]:
# Display
display(df_yield_latest)

Unnamed: 0,date,symbol,pvm_0001,pvm_0002,pvm_0003,pvm_0004,pvm_0005,pvm_0006,pvm_0007,pvm_0008,...,onchain_1644,onchain_1645,onchain_1646,onchain_1647,onchain_1648,onchain_1649,onchain_1650,onchain_1651,onchain_1652,onchain_1653
0,2024-10-01,$MICHI,10,87,38,37,84,39,38,68,...,,,,,,,,,,
1,2024-10-01,$MONG,9,59,39,37,62,38,29,49,...,81,87,74,81,86,82,83,83,85,87
2,2024-10-01,$WAFFLES,28,63,24,59,46,60,40,39,...,,,,,,,,,,
3,2024-10-01,$YAWN,85,69,84,82,33,83,84,80,...,23,13,6,5,8,4,4,4,4,5
4,2024-10-01,0x0,7,10,32,31,51,33,13,16,...,23,50,74,68,66,71,72,68,70,70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43757,2024-10-20,vSXP,42,27,29,21,,21,26,28,...,,,,,,,,,,
43758,2024-10-20,vUSDC,53,47,50,36,,40,44,46,...,,,,,,,,,,
43759,2024-10-20,vUSDT,53,42,50,34,,33,43,45,...,,,,,,,,,,
43760,2024-10-20,vXVS,21,18,66,43,,41,23,27,...,,,,,,,,,,
