In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#default_exp download

# Download

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
import os
import shutil
from numerapi import NumerAPI, SignalsAPI
from pathlib import Path, PosixPath
from abc import ABC
from rich import print as rich_print

## 1. Base

`BaseDownloader` is a simple object which implements logic common to all downloaders.

To implement a new Downloader, you should inherit from `BaseDownloader` and be sure to implement at least `download_training_data` and `download_inference_data`.

In [None]:
#export
class BaseDownloader(ABC):
    """
    Abstract base class for downloaders.
    :param directory_path: Base directory where data will be saved.
    """
    def __init__(self, directory_path: str):
        self.dir = Path(directory_path)
        if not self.dir.is_dir():
            rich_print(f"No existing directory found at '[blue]{self.dir}[/blue]'. Creating directory...")
            self.dir.mkdir(parents=True, exist_ok=True)

    def download_training_data(self, *args, **kwargs):
        raise NotImplementedError(f"No method for downloading training data is implemented in '{self.__class__.__name__}'")

    def download_inference_data(self, *args, **kwargs):
        raise NotImplementedError(f"No method for downloading inference data is implemented in '{self.__class__.__name__}'.")

    def remove_base_directory(self):
        """ Remove download directory with all contents. """
        abs_path = self.dir.resolve()
        rich_print(f":warning: [red]Deleting directory for '{self.__class__.__name__}[/red]' :warning:\nPath: '{abs_path}'")
        shutil.rmtree(abs_path)

    def _append_folder(self, folder: str) -> PosixPath:
        dir = Path(self.dir / folder)
        dir.mkdir(parents=True, exist_ok=True)
        return dir

    @property
    def get_all_files(self):
        """ Return all contents in directory. """
        return list(base_down.dir.iterdir())

    @property
    def is_empty(self):
        """ Check if directory is empty."""
        return not bool(self.get_all_files)

    def __call__(self, *args, **kwargs):
        """
        The most common use case will be to get weekly inference data. So calling the class itself returns inference data.
        """
        self.download_inference_data(*args, **kwargs)

In [None]:
test_dir = "test_base_1234321234321/"

# Test building class
base_down = BaseDownloader(directory_path=test_dir)
assert isinstance(base_down.dir, PosixPath)
assert base_down.dir.is_dir()

# Test properties
(base_down.dir / "test.txt").write_text("test")
rich_print(f"Directory contents:\n{base_down.get_all_files}")
assert not base_down.is_empty

# Remove contents
base_down.remove_base_directory()
assert not os.path.exists(test_dir)

## 2. Numerai Classic

In [None]:
class NumeraiClassicDownloader(BaseDownloader):
    """
    Downloading from NumerAPI for Numerai Classic data

    :param directory_path: Main folder to download data in.
    All *args, **kwargs will be passed to NumerAPI initialization.
    """
    def __init__(self, directory_path: str, *args, **kwargs):
        super(NumeraiClassicDownloader, self).__init__(directory_path=directory_path)
        self.napi = NumerAPI(*args, **kwargs)
        self.current_round = self.napi.get_current_round()

    def download_training_data(self, version: int = 2, *args, **kwargs):
        # TODO Implement train set downloading
        ...

    def download_inference_data(self, version: int = 2, round_num: int = None, *args, **kwargs):
        # TODO implement inference downloading
        ...

    def download_single_dataset(self, filename: str, dest_path: str, round_num: int = None, *args, **kwargs):
        """ Download arbitrary dataset through NumerAPI """
        self.napi.download_dataset(filename=filename,
                                   dest_path=dest_path,
                                   round_num=round_num, *args, **kwargs)


    def get_example_data(self, folder: str = "", version: int = 2, round_num: int = None):
        """
        Download all example prediction data in specified folder for given version.

        :param folder: Specify folder to create folder within directory root. Saves in directory root by default.
        :param version: Numerai version (1=classic, 2=super massive dataset (parquet)
        :param round_num: Numerai tournament round number. Downloads latest round by default.
        """
        dir = self._append_folder(folder)
        version_mapping = {1: ['example_predictions.csv', 'example_validation_predictions.csv'], 2: ['example_predictions.parquet', 'example_validation_predictions.parquet']}
        example_files = version_mapping[version]
        for file in example_files:
            rich_print(f":file_folder: [green]Downloading[/green] '{file}' :file_folder:", )
            self.napi.download_dataset(filename=file,
                                       dest_path=str(dir.joinpath(file)),
                                       round_num=round_num)




In [None]:
#slow
test_dir_classic = "test_numerai_classic_1234321"
numer_classic_downloader = NumeraiClassicDownloader(test_dir_classic)
# TODO test for ClassicDownloader (including full #slow tests)

# Downloading example data
numer_classic_downloader.get_example_data("test1/", version=1)
numer_classic_downloader.get_example_data("test2/", version=2, round_num=290)

# Remove contents
numer_classic_downloader.remove_base_directory()
assert not os.path.exists(test_dir_classic)

2022-01-04 18:35:31,058 INFO numerapi.utils: starting download
test_numerai_classic_1234321/test1/example_predictions.csv: 51.2MB [00:27, 1.87MB/s]                            


2022-01-04 18:35:59,599 INFO numerapi.utils: starting download
test_numerai_classic_1234321/test1/example_validation_predictions.csv: 19.6MB [00:03, 4.96MB/s]                            


2022-01-04 18:36:04,717 INFO numerapi.utils: starting download
test_numerai_classic_1234321/test2/example_predictions.parquet: 33.5MB [00:06, 5.10MB/s]                            


2022-01-04 18:36:12,461 INFO numerapi.utils: starting download
test_numerai_classic_1234321/test2/example_validation_predictions.parquet: 13.0MB [00:06, 2.03MB/s]                            


## 3. Yahoo Finance

## 4. FinnHub

## 5. Bloomberg?

In [None]:
#hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script; notebook2script()