In [1]:
import os
import requests
import pandas as pd
from io import BytesIO
from zipfile import ZipFile

## Download zip file and convert to dataframe

### Future:
* check if current round data exists by checking for current dataset's round number in filename
* only download if current data doesn't exist
* save result with round number in file name


In [50]:
def numerai_api_query(query):
    
    numerai_api_url = 'https://api-tournament.numer.ai/'
    headers = {'Content-Type':'application/json',
               'Accept':'application/json'
              }
    with requests.Session() as r:
        return r.post(url=numerai_api_url,
                      json=query,
                      headers=headers).json()

In [51]:
def get_current_round():
    rounds_query = {'query': '{rounds {number}}'}
    data = numerai_api_query(rounds_query)['data']['rounds']
    round_numbers = [number for rounds in data 
                     for number in rounds.values()]
    round_numbers.sort(reverse=True)
    return round_numbers[0]

In [52]:
get_current_round()

91

In [53]:
def get_dataset_url():
    
    dataset_query = {'query':'{dataset}'}
    return numerai_api_query(dataset_query)['data']['dataset']


def download_dataset_as_df(dataset_url):
    with requests.Session() as r:
        dataset_download = r.get(dataset_url, stream=True).content
    
        with ZipFile(BytesIO(dataset_download)) as dataset_zip:
            with dataset_zip.open('numerai_training_data.csv') as train_data:
                df_train = pd.read_csv(train_data, index_col='id')
            with dataset_zip.open('numerai_tournament_data.csv') as live_data:
                df_live = pd.read_csv(live_data, index_col='id')
            
    return pd.concat([df_train, df_live])

In [54]:
dataset_url = get_dataset_url()
round_number = dataset_url.split('/')[3]
round_number

'91'

In [55]:
df = download_dataset_as_df(dataset_url)

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 637024 entries, n2b2e3dd163cb422 to n5b44b664391d4cf
Data columns (total 53 columns):
era          637024 non-null object
data_type    637024 non-null object
feature1     637024 non-null float64
feature2     637024 non-null float64
feature3     637024 non-null float64
feature4     637024 non-null float64
feature5     637024 non-null float64
feature6     637024 non-null float64
feature7     637024 non-null float64
feature8     637024 non-null float64
feature9     637024 non-null float64
feature10    637024 non-null float64
feature11    637024 non-null float64
feature12    637024 non-null float64
feature13    637024 non-null float64
feature14    637024 non-null float64
feature15    637024 non-null float64
feature16    637024 non-null float64
feature17    637024 non-null float64
feature18    637024 non-null float64
feature19    637024 non-null float64
feature20    637024 non-null float64
feature21    637024 non-null float64
feature22    637024 

In [57]:
df.head()

Unnamed: 0_level_0,era,data_type,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,...,feature42,feature43,feature44,feature45,feature46,feature47,feature48,feature49,feature50,target
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n2b2e3dd163cb422,era1,train,0.43487,0.44645,0.25802,0.37149,0.62235,0.67451,0.68103,0.45166,...,0.52962,0.42439,0.5168,0.46297,0.57426,0.57946,0.49646,0.48968,0.54194,1.0
n177021a571c94c8,era1,train,0.50038,0.39216,0.38394,0.51213,0.3666,0.46911,0.68204,0.6132,...,0.51669,0.48445,0.57587,0.5986,0.67558,0.45577,0.80908,0.50287,0.61629,0.0
n7830fa4c0cd8466,era1,train,0.47416,0.34143,0.39528,0.46337,0.72953,0.45962,0.47869,0.68118,...,0.41458,0.34804,0.29058,0.51382,0.36389,0.80602,0.39253,0.41821,0.58679,0.0
nc594a184cee941b,era1,train,0.48759,0.55903,0.43987,0.38834,0.4465,0.46389,0.70749,0.63182,...,0.28776,0.42881,0.55402,0.53695,0.48793,0.62432,0.52898,0.49009,0.49557,0.0
nc5ab8667901946a,era1,train,0.23433,0.55499,0.47849,0.5699,0.64945,0.47152,0.62085,0.57626,...,0.64405,0.32416,0.33193,0.58065,0.44587,0.4777,0.4402,0.47895,0.57978,0.0


In [None]:
save_dataset(df, round_number)


In [None]:
!ls ~/Projects/numerai/numerai/data/raw/

In [5]:
project_dir = os.path.join(os.getcwd(), os.pardir)
raw_data_path = os.path.join(project_dir, 'data', 'raw')
files = [csv for csv in os.listdir(raw_data_path)]
    

In [9]:
result.text

'No query document supplied'

In [5]:
import requests
numerai_api_url = 'https://api-tournament.numer.ai/'
headers = {'Content-Type':'application/json',
           'Accept':'application/json',
           'Authorization':'Token {}${}'.format(public_id, secret_key,
          }

query = {'mutation': '{createSubmission (filename: "/home/sean/Projects/numerai/numerai/notebooks/predictions.csv") {validationLogloss} }'}
with requests.Session() as r:
    result = r.post(url=numerai_api_url,
                  json=query,
                  headers=headers)

In [2]:
from numerapi import numerapi
import os
from dotenv import load_dotenv, find_dotenv

In [3]:
# find .env automatically by walking up directories until it's found
dotenv_path = find_dotenv()
# load up the entries as environment variables
load_dotenv(dotenv_path)

True

In [4]:
public_id = os.environ.get("NUMERAI_SUBMIT_ID")
secret_key = os.environ.get("NUMERAI_SUBMIT_KEY")

In [11]:
predict_file = raw_data_path = os.path.join(os.getcwd(), 'predictions_groupkfold.csv')
predict_file

'/home/sean/Projects/numerai/numerai/notebooks/predictions_groupkfold.csv'

In [12]:
napi = numerapi.NumerAPI(public_id, secret_key, verbosity='info')
submission_id = napi.upload_predictions(predict_file)
napi.submission_status()


2018-01-27 22:08:25,427 INFO numerapi.numerapi: uploading prediction...


{'concordance': None,
 'consistency': None,
 'originality': None,
 'validation_logloss': None}

In [14]:
napi.submission_status()

{'concordance': {'pending': False, 'value': True},
 'consistency': 83.33333333333334,
 'originality': {'pending': False, 'value': False},
 'validation_logloss': 0.6925454807645405}