diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..35c2be2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,22 @@ +version: 2 +jobs: + build: + working_directory: ~/repo + docker: + - image: circleci/python:3.5.3 + - image: circleci/mysql:5.6 + - image: circleci/mongo:3.0.14 + steps: + - checkout + - run: + name: Install Python Dependencies + command: | + python3 -m venv venv + . venv/bin/activate + cd ~/repo && pip install -e . | tee + - run: + name: Lint Python Packages + command: | + . venv/bin/activate + flake8 --config=./.flake8 ./ + find . -iname "*.py" ! -name "setup.py" ! -name "__init__.py" ! -path "./venv/*" | xargs pylint --rcfile=./.pylintrc diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..13bfe20 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +ignore = E501, E701 +exclude = setup.py, __init__.py, venv diff --git a/.gitignore b/.gitignore index ae0a4ee..6d9d397 100644 --- a/.gitignore +++ b/.gitignore @@ -96,4 +96,8 @@ ENV/ # mkdocs documentation /site +# vim +.session.vim +.swp + numerai_datasets/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..efc59ae --- /dev/null +++ b/.pylintrc @@ -0,0 +1,12 @@ +[MESSAGES CONTROL] +max-line-length=120 +max-args=7 +max-attributes=8 +max-locals=17 +disable=W0141,superfluous-parens,multiple-statements,C0111,C0103,E1101,logging-format-interpolation + +[TYPECHECK] +ignored-modules = numpy, numpy.random, tensorflow + +[MISCELLANEOUS] +notes=XXX diff --git a/README.md b/README.md index 5778879..b888a99 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,207 @@ # Numerai Python API -Automatically download and upload data for the Numerai machine learning competition +Automatically download and upload data for the Numerai machine learning +competition. -This library is a client to the Numerai API. The interface is programmed in Python and allows downloading the training data, uploading predictions, and accessing some user information. Some parts of the code were taken from [numerflow](https://github.com/ChristianSch/numerflow) by ChristianSch. Visit his [wiki](https://github.com/ChristianSch/numerflow/wiki/API-Reverse-Engineering), if you need further information on the reverse engineering process. +This library is a Python client to the Numerai API. The interface is programmed +in Python and allows downloading the training data, uploading predictions, and +accessing user and submission information. Some parts of the code were taken +from [numerflow](https://github.com/ChristianSch/numerflow) by ChristianSch. +Visit his +[wiki](https://github.com/ChristianSch/numerflow/wiki/API-Reverse-Engineering), +if you need further information on the reverse engineering process. If you encounter a problem or have suggestions, feel free to open an issue. # Installation -This library supports both Python 2 and 3. Clone this repository, then `cd` into this repository's directory. Then `pip install -e .` +1. Obtain a copy of this API + * If you do not plan on contributing to this repository, download a release. + 1. Navigate to [releases](https://github.com/numerai/NumerAPI/releases). + 2. Download the latest version. + 3. Extract with `unzip` or `tar` as necessary. + + * If you do plan on contributing, clone this repository instead. + +2. `cd` into the API directory (defaults to `numerapi`, but make sure not to go +into the sub-directory also named `numerapi`). +3. `pip install -e .` # Usage -See the example.py. You can run it as `./example.py` +See `example.py`. You can run it as `./example.py` # Documentation -### `download_current_dataset` -#### Parameters -* `dest_path`: Optional parameter. Destination folder for the dataset. Default: currrent working directory. -* `unzip`: Optional parameter. Decide if you wish to unzip the downloaded training data. Default: True - -#### Return values -* `status_code`: Status code of the requests operation. - -### `upload_prediction` -#### Parameters -* `file_path`: Path to the prediction. It should already contain the file name ('path/to/file/prediction.csv') - -#### Return values -* `status_code`: Status code of the requests operation. - -#### Notes -* Uploading a prediction shortly before a new dataset is released may result in a <400 Bad Request>. If this happens, just wait for the new dataset and upload new predictions then. -* Uploading too many predictions in a certain amount of time will result in a <429 Too Many Requests>. -* Uploading predictions to an account that has 2FA (Two Factor Authentication) enabled is not currently supported - -### `get_user` +## Layout +Parameters and return values are given with Python types. Dictionary keys are +given in quotes; other names to the left of colons are for reference +convenience only. In particular, `list`s of `dict`s have names for the `dict`s; +these names will not show up in the actual data, only the actual `dict` data +itself. + +## `login` ### Parameters -* `username`: Name of the user you want to request. - -### Return values -* `array-like`: Tuple of size nine containing the `username`, `submission_id`, `validation_logloss`, `validation_consistency`, `originality`, `concordance`, `career_usd`, `career_nmr` and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `get_scores` +* `email` (`str`, optional): email of user account + * will prompt for this value if not supplied +* `password` (`str`, optional): password of user account + * will prompt for this value if not supplied + * prompting is recommended for security reasons +* `prompt_for_mfa` (`bool`, optional): indication of whether to prompt for MFA + code + * only necessary if MFA is enabled for user account +### Return Values +* `user_credentials` (`dict`): credentials for logged-in user + * `"username"` (`str`) + * `"access_token"` (`str`) + * `"refresh_token"` (`str`) + +## `download_current_dataset` ### Parameters -* `username`: Name of the user you want to request. - -### Return values -* `array-like`: Tuple of size 2 containing a `numpy.ndarray` containing the scores of all uploaded predictions with the newest first and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `get_earnings_per_round` +* `dest_path` (`str`, optional, default: `.`): destination folder for the + dataset +* `unzip` (`bool`, optional, default: `True`): indication of whether the + training data should be unzipped +### Return Values +* `success` (`bool`): indication of whether the current dataset was + successfully downloaded + +## `get_all_competitions` +### Return Values +* `all_competitions` (`list`): information about all competitions + * `competition` (`dict`) + * `"_id"` (`int`) + * `"dataset_id"` (`str`) + * `"start_date"` (`str (datetime)`) + * `"end_date"` (`str (datetime)`) + * `"paid"` (`bool`) + * `"leaderboard`" (`list`) + * `submission` (`dict`) + * `"concordant"` (`dict`) + * `"pending"` (`bool`) + * `"value"` (`bool`) + * `"earnings"` (`dict`) + * `"career"` (`dict`) + * `"nmr"` (`str`) + * `"usd"` (`str`) + * `"competition"` (`dict`) + * `"nmr"` (`str`) + * `"usd"` (`str`) + * `"logloss"` (`dict`) + * `"consistency"` (`int`) + * `"validation"` (`float`) + * `"original"` (`dict`) + * `"pending"` (`bool`) + * `"value"` (`bool`) + * `"submission_id"` (`str`) + * `"username"` (`str`) + +## `get_competition` +### Return Values +* `competition` (`dict`): information about requested competition + * `_id` (`int`) + * `"dataset_id"` (`str`) + * `"start_date"` (`str (datetime)`) + * `"end_date"` (`str (datetime)`) + * `"paid"` (`bool`) + * `"leaderboard"` (`list`) + * `submission` (`dict`) + * `"concordant"` (`dict`) + * `"pending"` (`bool`) + * `"value"` (`bool`) + * `"earnings"` (`dict`) + * `"career"` (`dict`) + * `"nmr"` (`str`) + * `"usd"` (`str`) + * `"competition"` (`dict`) + * `"nmr"` (`str`) + * `"usd"` (`str`) + * `"logloss"` (`dict`) + `"consistency"`: (int`) + `"validation"`: (float`) + * `"original"` (`dict`) + * `"pending"` (`bool`) + * `"value"` (`bool`) + * `"submission_id"` (`str`) + * `"username"` (`str`) + +## `get_earnings_per_round` ### Parameters -* `username`: Name of the user you want to request. +* `username`: user for which earnings are requested +### Return Values +* `round_ids` (`np.ndarray(int)`): IDs of each round for which there are + earnings +* `earnings` (`np.ndarray(float)`): earnings for each round -### Return values -* `array-like`: Tuple of size 2 containing a `numpy.ndarray` containing the earnings of each round with the oldest first and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `login` -#### Return values -* `array-like`: Tuple of size four containing the `accessToken`, `refreshToken`, `id`, and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `authorize` -#### Parameters -* `file_path`: Path to the prediction. It should already contain the file name ('path/to/file/prediction.csv') - -#### Return values -* `array-like`: Tuple of size four containing the `filename`, `signedRequest`, `headers`, and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `get_current_competition` -#### Return values -* `array-like`: Tuple of size three containing the `dataset_id`, `_id` and the status code of the requests operation. If it fails all values except the status code will be `None`. - -### `get_new_leaderboard` -#### Return Values -* `list`: A list of every user that has submitted in this round of the competition, including statistics like how much USD and NMR were earned by that user in that round. +## `get_scores_for_user` +### Parameters +* `username`: user for which scores are being requested +### Return Values +* `validation_scores` (`np.ndarray(float)`): logloss validation scores +* `consistency_scoress` (`np.ndarray(float)`): logloss consistency scores +* `round_ids` (`np.ndarray(int`): IDs of the rounds for which there are scores -#### Notes -* Each round of the competition is numbered. The first competition is 1. Specify a round of the competition to get leaderboard information for that round, or leave off the round of the competition to get the current round of the competition. +## `get_user` +### Parameters +* `username`: `str` - name of requested user +### Return Values +* `user` (`dict`): information about the requested user + * `"_id"` (`str`) + * `"username"` (`str`) + * `"assignedEthAddress"` (`str`) + * `"created"` (`str (datetime)`) + * `"earnings"` (`float`) + * `"followers"` (`int`) + * `"rewards"` (`list`) + * `reward` (`dict`) + * `"_id"` (`int`) + * `"amount"` (`float`) + * `"earned"` (`float`) + * `"nmr_earned"` (`str`) + * `"start_date"` (`str (datetime)`) + * `"end_date"` (`str (datetime)`) + * `"submissions"` (`dict`) + * `"results"` (`list`) + * `result` (`dict`) + * `"_id"` (`str`) + * `"competition"` (`dict`) + * `"_id"` (`str`) + * `"start_date"` (`str (datetime)`) + * `"end_date"` (`str (datetime)`) + * `"competition_id"` (`int`) + * `"created"` (`str (datetime)`) + * `"id"` (`str`) + * `"username"` (`str`) + +## `get_submission_for_round` +### Parameters +* `username` (`str`): user for which submission is requested +* `round_id` (`int`, optional): round for which submission is requested + * if no `round_id` is supplied, the submission for the current round will be + retrieved +### Return Values +* `username` (`str`): user for which submission is requested +* `submission_id` (`str`): ID of submission for which data was found +* `logloss_val` (`float`): amount of logloss for given submission +* `logloss_consistency` (`float`): consistency of given submission +* `career_usd` (`float`): amount of USD earned by given user +* `career_nmr` (`float`): amount of NMR earned by given user +* `concordant` (`bool` OR `dict` (see note)): whether given submission is + concordant + * for rounds before 64, this was only a boolean, but from 64 on, it is a dict + which indicates whether concordance is still being computed +* `original` (`bool` OR `dict` (see note)): whether given submission is + original + * for rounds before 64, this was only a boolean, but from 64 on, it is a dict + which indicates whether originality is still being computed + +## `upload_predictions` +### Parameters +* `file_path` (`str`): path to CSV of predictions + * should already contain the file name (e.g. `"path/to/file/prediction.csv"`) + +### Return Values +* `success`: indicator of whether the upload succeeded + +### Notes +* Uploading a prediction shortly before a new dataset is released may result in + a `400 Bad Request`. If this happens, wait for the new dataset and attempt to + upload again. +* Uploading too many predictions in a certain amount of time will result in a + `429 Too Many Requests`. diff --git a/example.py b/example.py index b81496f..71378e8 100755 --- a/example.py +++ b/example.py @@ -1,26 +1,73 @@ #!/usr/bin/env python +from datetime import datetime +import json + from numerapi.numerapi import NumerAPI -# Most API calls don't require logging in: -napi = NumerAPI() -print("Downloading the current dataset...") -napi.download_current_dataset(dest_path='.', unzip=True) +def main(): + # set example username and round + example_username = "xanderai" + example_round = 51 + + # set up paths for download of dataset and upload of predictions + now = datetime.now().strftime("%Y%m%d") + dataset_parent_folder = "./dataset" + dataset_name = "numerai_dataset_{0}/example_predictions.csv".format(now) + dataset_path = "{0}/{1}".format(dataset_parent_folder, dataset_name) + + # most API calls do not require logging in + napi = NumerAPI(verbosity="info") + + # log in + credentials = napi.login() + print(json.dumps(credentials, indent=2)) + + # download current dataset + dl_succeeded = napi.download_current_dataset(dest_path=dataset_parent_folder, + unzip=True) + print("download succeeded: " + str(dl_succeeded)) + + # get competitions (returned data is too long to print practically) + # all_competitions = napi.get_all_competitions() + # current_competition = napi.get_competition() + # example_competition = napi.get_competition(round_id=example_round) + + # get user earnings per round + user_earnings = napi.get_earnings_per_round() + print("user earnings:") + print(user_earnings) + example_earnings = napi.get_earnings_per_round(username=example_username) + print("example earnings:") + print(example_earnings) + + # get scores for user + personal_scores = napi.get_scores_for_user() + print("personal scores:") + print(personal_scores) + other_scores = napi.get_scores_for_user(username=example_username) + print("other scores:") + print(other_scores) + + # get user information + current_user = napi.get_user() + print("current user:") + print(json.dumps(current_user, indent=2)) + example_user = napi.get_user(username=example_username) + print("example user:") + print(json.dumps(example_user, indent=2)) -# User-specific information -username = 'xanderai' -print("Getting information about user {}...".format(username)) -print(napi.get_user(username)) -print(napi.get_scores(username)) -print(napi.get_earnings_per_round(username)) + # get submission for given round + submission = napi.get_submission_for_round(username=example_username, + round_id=example_round) + print("submission:") + print(json.dumps(submission, indent=2)) -# Get the leaderboard for the current round of the competition -print(napi.get_new_leaderboard()) + # upload predictions + ul_succeeded = napi.upload_predictions(dataset_path) + print("upload succeeded: " + str(ul_succeeded)) -# Get the leaderboard for previous rounds of the competition -print(napi.get_new_leaderboard(40)) -# Uploading predicitons to your account require your credentials: -# napi.credentials = ("YOUR_EMAIL", "YOUR_PASSWORD") -# napi.upload_prediction('./numerai_datasets/example_predictions.csv') +if __name__ == "__main__": + main() diff --git a/numerapi/numerapi.py b/numerapi/numerapi.py index 2ab4aaf..c219809 100644 --- a/numerapi/numerapi.py +++ b/numerapi/numerapi.py @@ -2,179 +2,408 @@ # System import zipfile +import json +import os from datetime import datetime, timedelta +import getpass +import errno +import logging # Third Party import requests import numpy as np + class NumerAPI(object): - def __init__(self): - api_url = "https://api.numer.ai" - new_api_url = "https://api-hs.numer.ai" - self._login_url = api_url + '/sessions' - self._auth_url = api_url + '/upload/auth' - self._dataset_url = api_url + '/competitions/current/dataset' - self._submissions_url = api_url + '/submissions' - self._users_url = api_url + '/users' - self.leaderboard_url = api_url + '/competitions' - self.new_leaderboard_url = new_api_url + '/leaderboard' - self.new_current_leaderboard_url = new_api_url + '/currentLeaderboard' - - @property - def credentials(self): - if not hasattr(self, "_credentials"): - raise ValueError("You haven't yet set your email and password credentials. Set it first with NumeraAPI().credentials = ('YOUR_EMAIL', 'YOUR_PASSWORD')") - return self._credentials - - @credentials.setter - def credentials(self, value): - self._credentials = {"email": value[0], "password": value[1]} - - def download_current_dataset(self, dest_path='.', unzip=True): - now = datetime.now().strftime('%Y%m%d') - file_name = 'numerai_dataset_{0}.zip'.format(now) - dest_file_path = '{0}/{1}'.format(dest_path, file_name) - - r = requests.get(self._dataset_url, stream=True) - if r.status_code != 200: - return r.status_code - - with open(dest_file_path, 'wb') as f: - for chunk in r.iter_content(1024): + + """Wrapper around the Numerai API""" + + def __init__(self, verbosity="INFO"): + """ + initialize Numerai API wrapper for Python + + verbosity: indicates what level of messages should be displayed + valid values: "debug", "info", "warning", "error", "critical" + """ + self.logger = logging.getLogger(__name__) + + # set up logging + numeric_log_level = getattr(logging, verbosity.upper()) + if not isinstance(numeric_log_level, int): + raise ValueError('invalid verbosity: %s' % verbosity) + log_format = "%(asctime)s %(levelname)s %(name)s: %(message)s" + self._date_format = "%Y-%m-%dT%H:%M:%S" + logging.basicConfig(format=log_format, level=numeric_log_level, + datefmt=self._date_format) + + # Numerai API base URL + self.api_base_url = "https://api.numer.ai" + + # first round to check for scores + self._FIRST_ROUND = 51 + + # error indicating user is not logged in + not_logged_in_msg = "username not specified and not logged in" + self._not_logged_in_error = ValueError(not_logged_in_msg) + self._username = None + self._access_token = None + self.url_paths = None + + def __get_url(self, url_path_name, query_params=None): + """get url with query params for Numerai API""" + + # mappings of URL path names to URL paths + self.url_paths = { + "login": "/sessions", + "auth": "/submission_authorizations", + "dataset": "/competitions/current/dataset", + "submissions": "/submissions", + "users": "/users", + "competitions": "/competitions", + "competitions_by_id": "/competitions/id", + "current_leaderboard_url": "/currentLeaderboard" + } + + # set query params based on type + if query_params is None: + query_params_str = "" + elif isinstance(query_params, dict): + query_params_str = "?" + json.dumps(query_params) + elif isinstance(query_params, str): + query_params_str = "?" + query_params + else: + self.logger.warning("invalid query params") + query_params = "" + + return (self.api_base_url + + self.url_paths[url_path_name] + + query_params_str) + + def __get_username(self, username): + """set username if logged in and not specified""" + if username is None: + if hasattr(self, "_username"): + username = self._username + else: + raise self._not_Logged_in_error + + return username + + def __unzip_file(self, src_path, dest_path, filename): + """unzips file located at src_path into destination_path""" + self.logger.info("unzipping file...") + + # construct full path (including file name) for unzipping + unzip_path = "{0}/{1}".format(dest_path, filename) + + # create parent directory for unzipped data + try: + os.makedirs(unzip_path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + # extract data + with zipfile.ZipFile(src_path, "r") as z: + z.extractall(unzip_path) + + return True + + def __authorize_file_upload(self, file_path): + """authorize file upload""" + self.logger.info("authorizing file upload...") + + # user must be logged in in order to upload files + if not hasattr(self, "_access_token"): + self.logger.error("you must log in first") + self.login() + + # set up request parameters + auth_headers = { + "Authorization": "Bearer {0}".format(self._access_token) + } + auth_url = self.__get_url("auth") + auth_data = { + "filename": file_path.split("/")[-1], + "mimetype": "text/csv" + } + + # send auth request + auth_res = requests.post(auth_url, data=auth_data, + headers=auth_headers) + auth_res.raise_for_status() + + # parse auth response + auth_res_dict = auth_res.json() + filename = auth_res_dict["filename"] + signed_req = auth_res_dict["signedRequest"] + + return (filename, signed_req, auth_headers) + + def login(self, email=None, password=None, mfa_enabled=False): + """log user in and store credentials""" + self.logger.info("logging in...") + + # get login parameters if necessary + if email is None: + email = input("email: ") + if password is None: + password = getpass.getpass("password: ") + mfa_code = None + if mfa_enabled: + mfa_code = getpass.getpass("MFA code: ") + + # send login request + post_data = {"email": email, "password": password, "code": mfa_code} + login_url = self.__get_url("login") + login_res = requests.post(login_url, data=post_data) + login_res.raise_for_status() + + # parse login response + user = login_res.json() + access_token = user["accessToken"] + username = user["username"] + + # set instance variables + self._access_token = access_token + self._username = username + + # set up return object + whitelisted_keys = ["username", "accessToken", "refreshToken"] + user_credentials = {key: user[key] for key in whitelisted_keys} + + return user_credentials + + def download_current_dataset(self, dest_path=".", unzip=True): + """download dataset for current round + + dest_path: desired location of dataset file + unzip: indicates whether to unzip dataset + """ + self.logger.info("downloading current dataset...") + + # set up download path + now = datetime.now().strftime("%Y%m%d") + dataset_name = "numerai_dataset_{0}".format(now) + file_name = "{0}.zip".format(dataset_name) + dataset_path = "{0}/{1}".format(dest_path, file_name) + + # get data for current dataset + dataset_res = requests.get(self.__get_url("dataset"), stream=True) + dataset_res.raise_for_status() + + # create parent folder if necessary + try: + os.makedirs(dest_path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + # write dataset to file + with open(dataset_path, "wb") as f: + for chunk in dataset_res.iter_content(1024): f.write(chunk) + # unzip dataset if unzip: - with zipfile.ZipFile(dest_file_path, "r") as z: - z.extractall(dest_path) - return r.status_code - - - def get_new_leaderboard(self, n=None): - if n is None: - url = self.new_current_leaderboard_url + self.__unzip_file(dataset_path, dest_path, dataset_name) + + return True + + def get_all_competitions(self): + """get all competitions from first round stored in instance variable""" + self.logger.info("getting all competitions...") + + # get latest round to determine end of round ID range + current_round = self.get_competition() + last_round_id = current_round["_id"] + + # store data from all competitions + all_competitions = [] + for i in range(self._FIRST_ROUND, last_round_id): + all_competitions.append(self.get_competition(round_id=i)) + all_competitions.append(current_round) + + return all_competitions + + def get_competition(self, round_id=None): + """get a specific competiton, defaults to most recent""" + self.logger.info("getting competition...") + + # set up request URL + # defaults to getting most recent round + if round_id is None: + # indicates that the API returns an array and should be parsed + # accordingly + returns_array = True + + # set up JSON query + now = datetime.now() + tdelta = timedelta(microseconds=55296e5) + current_date = now - tdelta + current_date_str = current_date.strftime(self._date_format) + jsonq = { + "end_date": { + "$gt": current_date_str + } + } + + comp_req_url = self.__get_url("competitions", query_params=jsonq) + + # otherwise set up the request with the specified round ID else: - url = self.new_leaderboard_url + "?round={}".format(n) - r = requests.get(url) - return (r.json(), r.status_code) - - def get_leaderboard(self): - now = datetime.now() - tdelta = timedelta(microseconds=55296e5) - dt = now - tdelta - dt_str = dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ') - - url = self.leaderboard_url + '?{ leaderboard :' - url += ' current , end_date :{ $gt : %s }}' - r = requests.get((url % (dt_str)).replace(' ', '%22')) - if r.status_code != 200: - return (None, r.status_code) - return (r.json(), r.status_code) - - - def get_earnings_per_round(self, username): - r = requests.get('{0}/{1}'.format(self._users_url, username)) - if r.status_code != 200: - return (None, r.status_code) - - rj = r.json() - rewards = rj['rewards'] - earnings = np.zeros(len(rewards)) - for i in range(len(rewards)): - earnings[i] = rewards[i]['amount'] - return (earnings, r.status_code) - - - def get_scores(self, username): - r = requests.get('{0}/{1}'.format(self._users_url, username)) - if r.status_code != 200: - return (None, r.status_code) - - rj = r.json() - results = rj['submissions']['results'] - scores = np.zeros(len(results)) - for i in range(len(results)): - scores[i] = results[i]['accuracy_score'] - return (scores, r.status_code) - - - def get_user(self, username): - leaderboard, status_code = self.get_leaderboard() - if status_code != 200: - return (None, None, None, None, None, None, None, None, status_code) - - for user in leaderboard[0]['leaderboard']: - if user['username'] == username: - uname = user['username'] - sid = user['submission_id'] - val_logloss = np.float(user['logloss']['validation']) - val_consistency = np.float(user['logloss']['consistency']) - career_usd = np.float(user['earnings']['career']['usd'].replace(',','')) - career_nmr = np.float(user['earnings']['career']['nmr'].replace(',','')) - concordant = user['concordant'] - original = user['original'] - return (uname, sid, val_logloss, val_consistency, original, concordant, career_usd, career_nmr, status_code) - return (None, None, None, None, None, None, None, None, status_code) - - - def login(self): - r = requests.post(self._login_url, data=self.credentials) - if r.status_code != 201: - return (None, None, None, r.status_code) - - rj = r.json() - return(rj['accessToken'], rj['refreshToken'], rj['id'], r.status_code) - - - def authorize(self, file_path): - accessToken, _, _, status_code = self.login() - if status_code != 201: - return (None, None, None, status_code) - - headers = {'Authorization':'Bearer {0}'.format(accessToken)} - - r = requests.post(self._auth_url, - data={'filename':file_path.split('/')[-1], 'mimetype': 'text/csv'}, - headers=headers) - if r.status_code != 200: - return (None, None, None, r.status_code) - - rj = r.json() - return (rj['filename'], rj['signedRequest'], headers, r.status_code) - - - def get_current_competition(self): - now = datetime.now() - leaderboard, status_code = self.get_leaderboard() - if status_code != 200: - return (None, None, None, None, status_code) - - for c in leaderboard: - start_date = datetime.strptime(c['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') - end_date = datetime.strptime(c['end_date'], '%Y-%m-%dT%H:%M:%S.%fZ') - if start_date < now < end_date: - return (c['dataset_id'], c['_id'], status_code) - - - def upload_prediction(self, file_path): - filename, signedRequest, headers, status_code = self.authorize(file_path) - if status_code != 200: - return status_code - - dataset_id, comp_id, status_code = self.get_current_competition() - if status_code != 200: - return status_code - - with open(file_path, 'rb') as fp: - r = requests.Request('PUT', signedRequest, data=fp.read()) - prepped = r.prepare() - s = requests.Session() - resp = s.send(prepped) - if resp.status_code != 200: - return resp.status_code - - r = requests.post(self._submissions_url, - data={'competition_id':comp_id, 'dataset_id':dataset_id, 'filename':filename}, - headers=headers) - - return r.status_code + returns_array = False + jsonq = {"id": str(round_id)} + comp_req_url = self.__get_url("competitions_by_id", query_params=jsonq) + + # send compititon request + comp_res = requests.get(comp_req_url) + comp_res.raise_for_status() + + # parse competition response + competition = comp_res.json() + if returns_array: + competition = competition[0] + + return competition + + def get_earnings_per_round(self, username=None): + """get earnings for every round""" + self.logger.info("getting earnings...") + + # construct user request URL + username = self.__get_username(username) + user_req_url = "{0}/{1}".format(self.__get_url("users"), username) + + # send user request + user_res = requests.get(user_req_url) + user_res.raise_for_status() + + # parse response + user = user_res.json() + rewards = user["rewards"] + num_rewards = len(rewards) + round_ids = np.zeros(num_rewards, dtype="int") + earnings = np.zeros(num_rewards) + for i in range(num_rewards): + round_ids[i] = rewards[i]["_id"] + earnings[i] = rewards[i]["amount"] + + return (round_ids, earnings) + + def get_scores_for_user(self, username=None): + """get scores for specified user""" + self.logger.info("getting scores for user...") + + # get all competitions + competitions = self.get_all_competitions() + + # set up variables to parse and store scores + username = self.__get_username(username) + num_competitions = len(competitions) + validation_scores = [] + consistency_scores = [] + round_ids = [] + + # loop over compitions to append scores + for i in range(num_competitions): + # get submissions for user for round i + competition = competitions[i] + leaderboard = competition["leaderboard"] + submissions = list(filter(lambda s: s["username"] == username, + leaderboard)) + + # append scores if any exist for round i + if submissions: + logloss = submissions[0]["logloss"] + validation_scores.append(logloss["validation"]) + consistency_scores.append(logloss["consistency"]) + round_ids.append(competition["_id"]) + + # convert score arrays to numpy arrays + validation_scores = np.array(validation_scores) + consistency_scores = np.array(consistency_scores) + round_ids = np.array(round_ids, dtype="int") + + return (validation_scores, consistency_scores, round_ids) + + def get_user(self, username=None): + """get user information""" + self.logger.info("getting user...") + + # construct user request URL + username = self.__get_username(username) + user_req_url = self.__get_url("users") + "/" + username + + # send user request + user_res = requests.get(user_req_url) + user_res.raise_for_status() + + # parse user response + user = user_res.json() + + return user + + def get_submission_for_round(self, username=None, round_id=None): + """gets submission for single round""" + self.logger.info("getting user submission for round...") + + # get username for filtering competition leaderboard + username = self.__get_username(username) + + # get competition for specified round + competition = self.get_competition(round_id=round_id) + + # parse user submission data + for user in competition["leaderboard"]: + if user["username"] == username: + submission_id = user["submission_id"] + logloss_val = np.float(user["logloss"]["validation"]) + logloss_consistency = np.float(user["logloss"]["consistency"]) + career_usd = np.float(user["earnings"]["career"]["usd"].replace(",", "")) + career_nmr = np.float(user["earnings"]["career"]["nmr"].replace(",", "")) + concordant = user["concordant"] + original = user["original"] + + return (username, submission_id, logloss_val, logloss_consistency, + career_usd, career_nmr, concordant, original) + + # return an empty tuple if user is not on the leaderboard + self.logger.warning("user \"{0}\" is not on leaderboard".format(username)) + return () + + def upload_predictions(self, file_path): + """uploads predictions from file""" + self.logger.info("uploading prediction...") + + # parse information for file upload + filename, signed_url, headers = self.__authorize_file_upload(file_path) + + # get information for current competition + competition = self.get_competition() + dataset_id = competition["dataset_id"] + competition_id = competition["_id"] + + # open file + with open(file_path, "rb") as fp: + # upload file + file_res = requests.Request("PUT", signed_url, data=fp.read()) + prepared_file_res = file_res.prepare() + req_session = requests.Session() + res_prepped = req_session.send(prepared_file_res) + res_prepped.raise_for_status() + + # get submission URL + sub_url = self.__get_url("submissions") + # construct submission data + sub_data = { + "competition_id": competition_id, + "dataset_id": dataset_id, + "filename": filename + } + + # send file request + sub_res = requests.post(sub_url, data=sub_data, headers=headers) + sub_res.raise_for_status() + + return True diff --git a/setup.py b/setup.py index 0f9219f..3cbf0c1 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,6 @@ setup( name="numerapi", - install_requires=["numpy"], + install_requires=["numpy", "flake8", "pylint", "requests"], packages=["numerapi"] )