# Example Notebook: Using YIEDL's Crypto Datasets for Numerai Crypto Tournament

This simple notebook covering the basic steps to download crypto datasets from Numerai and YIEDL.

## Install Dependencies

In [1]:
!pip install numerapi requests pyarrow fastparquet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


## Download the Numerai Crypto Datasets

In [2]:
# Initiate Numerai API connection
from numerapi import NumerAPI
napi = NumerAPI()

In [3]:
# Download the Numerai training data to the current directory
import os
napi.download_dataset(filename = "crypto/v1.0/train_targets.parquet", 
                      dest_path = os.getcwd() + "/numerai_train_targets.parquet")

2024-10-23 19:47:52,352 INFO numerapi.utils: starting download
/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_train_targets.parquet: 679kB [00:00, 1.02MB/s]


'/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_train_targets.parquet'

In [4]:
# Download the Numerai live crypto universe to the current directory
import os
napi.download_dataset(filename = "crypto/v1.0/live_universe.parquet", 
                      dest_path = os.getcwd() + "/numerai_live_universe.parquet")

2024-10-23 19:47:53,848 INFO numerapi.utils: starting download
/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_live_universe.parquet: 8.19kB [00:00, 4.60MB/s]


'/mnt/Storage/repo_numerai/numerai_experiments/experiments/2024_10_23_yiedl_blog_post/numerai_live_universe.parquet'

## Import and Display the Numerai Crypto Datasets

In [5]:
import pandas as pd

In [6]:
# Load the display the Numerai training targets
df_numerai_targets = pd.read_parquet("numerai_train_targets.parquet")
display(df_numerai_targets)

Unnamed: 0,symbol,date,target
0,0xBTC,2020-06-01,0.00
1,ABBC,2020-06-01,0.50
2,ABT,2020-06-01,0.50
3,ADA,2020-06-01,0.75
4,AE,2020-06-01,0.50
...,...,...,...
496,ZEN,2024-09-17,0.50
497,ZENT,2024-09-17,0.75
498,ZETA,2024-09-17,0.25
499,ZIG,2024-09-17,0.50


In [7]:
# Load and display the Numerai live universe
df_numerai_universe = pd.read_parquet("numerai_live_universe.parquet")
display(df_numerai_universe)

Unnamed: 0,symbol
0,BTC
12,ETH
49,BNB
198,SOL
2,XRP
...,...
846,CRTS
1027,HGPT
802,BZZ
819,LIKE


## Download YIEDL Crypto Datasets

In [8]:
# Helper Function
import requests

def download_file(url, output_filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(output_filename, 'wb') as file:
            file.write(response.content)
        print(f"File downloaded successfully as {output_filename}")
    else:
        print("Failed to download file")

In [9]:
# Download YIEDL crypto latest dataset to current directory
url = 'https://api.yiedl.ai/yiedl/v1/downloadDataset?type=latest'
output_filename = 'yiedl_latest.parquet'
download_file(url, output_filename)

File downloaded successfully as yiedl_latest.parquet


In [10]:
# Download YIEDL crypto historical dataset to current directory
# NOTE: it is a huge file in zip format. We need to unzip it afterwards
url = 'https://api.yiedl.ai/yiedl/v1/downloadDataset?type=historical'
output_filename = 'yiedl_historical.zip'
download_file(url, output_filename)

File downloaded successfully as yiedl_historical.zip


In [11]:
# Unzip and rename the file
!unzip -p yiedl_historical.zip > yiedl_historical.parquet

## Import and Display the YIEDL Crypto Datasets

In [12]:
# Load and display the YIEDL historical crypto dataset
df_yield_historical = pd.read_parquet("yiedl_historical.parquet")
display(df_yield_historical)

Unnamed: 0,date,symbol,pvm_0001,pvm_0002,pvm_0003,pvm_0004,pvm_0005,pvm_0006,pvm_0007,pvm_0008,...,onchain_1644,onchain_1645,onchain_1646,onchain_1647,onchain_1648,onchain_1649,onchain_1650,onchain_1651,onchain_1652,onchain_1653
0,2013-04-28,BTC,,,,,,,,,...,,,,,,,,,,
1,2013-04-28,LTC,,,,,,,,,...,,,,,,,,,,
2,2013-04-28,NMC,,,,,,,,,...,,,,,,,,,,
3,2013-04-28,PPC,,,,,,,,,...,,,,,,,,,,
4,2013-04-29,BTC,0.0,67.0,67.0,33.0,,33.0,67.0,67.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3783101,2024-09-30,vSXP,79.0,87.0,16.0,31.0,,32.0,51.0,49.0,...,,,,,,,,,,
3783102,2024-09-30,vUSDC,56.0,72.0,77.0,83.0,,67.0,78.0,77.0,...,,,,,,,,,,
3783103,2024-09-30,vUSDT,56.0,77.0,76.0,82.0,,82.0,79.0,78.0,...,,,,,,,,,,
3783104,2024-09-30,vXVS,71.0,88.0,25.0,40.0,,42.0,56.0,60.0,...,,,,,,,,,,


In [13]:
# Load and display the YIEDL latest crypto dataset
df_yield_latest = pd.read_parquet("yiedl_latest.parquet")
display(df_yield_latest)

Unnamed: 0,date,symbol,pvm_0001,pvm_0002,pvm_0003,pvm_0004,pvm_0005,pvm_0006,pvm_0007,pvm_0008,...,onchain_1644,onchain_1645,onchain_1646,onchain_1647,onchain_1648,onchain_1649,onchain_1650,onchain_1651,onchain_1652,onchain_1653
0,2024-10-01,$MICHI,10.0,87.0,38.0,37.0,84.0,39.0,38.0,68.0,...,,,,,,,,,,
1,2024-10-01,$MONG,9.0,59.0,39.0,37.0,62.0,38.0,29.0,49.0,...,81.0,87.0,74.0,81.0,86.0,82.0,83.0,83.0,85.0,87.0
2,2024-10-01,$WAFFLES,28.0,63.0,24.0,59.0,46.0,60.0,40.0,39.0,...,,,,,,,,,,
3,2024-10-01,$YAWN,85.0,69.0,84.0,82.0,33.0,83.0,84.0,80.0,...,23.0,13.0,6.0,5.0,8.0,4.0,4.0,4.0,4.0,5.0
4,2024-10-01,0x0,7.0,10.0,32.0,31.0,51.0,33.0,13.0,16.0,...,23.0,50.0,74.0,68.0,66.0,71.0,72.0,68.0,70.0,70.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43757,2024-10-20,vSXP,42.0,27.0,29.0,21.0,,21.0,26.0,28.0,...,,,,,,,,,,
43758,2024-10-20,vUSDC,53.0,47.0,50.0,36.0,,40.0,44.0,46.0,...,,,,,,,,,,
43759,2024-10-20,vUSDT,53.0,42.0,50.0,34.0,,33.0,43.0,45.0,...,,,,,,,,,,
43760,2024-10-20,vXVS,21.0,18.0,66.0,43.0,,41.0,23.0,27.0,...,,,,,,,,,,
