In [3]:
"""
som_regression model
"""

!pip install numerapi

import os
import gc

import pandas as pd
from numerapi import NumerAPI
import zipfile
import os

import susi
import matplotlib.pyplot as plt


from sklearn.linear_model import Ridge
from sklearn.ensemble import BaggingClassifier

TOURNAMENT_NAME = "kazutsugi"
TARGET_NAME = f"target"
PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"

data_directory = "../kazutsugi/datasets/"

BENCHMARK = 0.002
BAND = 0.04


# Submissions are scored by Spearman correlation
def score(df):
    return df[[TARGET_NAME, PREDICTION_NAME]].corr(method="spearman")[TARGET_NAME][PREDICTION_NAME]


# The payout function
def payout(scores):
    return ((scores - BENCHMARK)/BAND).clip(lower=-1, upper=1)


def download_data():

    data_archive = NumerAPI().download_current_dataset(dest_path='../tmp', unzip=False)

    with zipfile.ZipFile(data_archive, "r") as zip_ref:
        zip_ref.extractall("../kazutsugi/datasets")



You should consider upgrading via the '/Users/andrewjennings/PycharmProjects/numerai_tree_regression/venv/tree_regression/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
def get_data():

    download_data()

    print("# Loading data...")
    # The training data is used to train your model how to predict the targets.
    training_data = pd.read_csv(data_directory + "numerai_training_data.csv").set_index("id")
    # The tournament data is the data that Numerai uses to evaluate your model.
    tournament_data = pd.read_csv(data_directory + "numerai_tournament_data.csv").set_index("id")

    feature_names = [ f for f in training_data.columns if f.startswith("feature")]

    print(f"Loaded {len(feature_names)} features")


    print("Training model")



    return training_data,feature_names


In [4]:
def make_model(training_data,feature_names):

    X = training_data[feature_names]
    Y = training_data[TARGET_NAME]

    print(X.head())

    som = susi.SOMClustering()
    som.fit(X)
    bmu_list = som.get_bmus(X)
    plt.hist2d([x[0] for x in bmu_list], [x[1] for x in bmu_list])


    model = Ridge(alpha=0.9)
    model.fit(X, Y)

    print("Generating predictions")

    training_data[PREDICTION_NAME] = model.predict(training_data[feature_names])

    tournament_data[PREDICTION_NAME] = model.predict(tournament_data[feature_names])

    # Check the per-era correlations on the training set

    train_correlations = training_data.groupby("era").apply(score)
    print(
        f"On training the correlation has mean {train_correlations.mean()} and std {train_correlations.std()}")
    print(
        f"On training the average per-era payout is {payout(train_correlations).mean()}")

    # Check the per-era correlations on the validation set

    validation_data = tournament_data[tournament_data.data_type == "validation"]
    validation_correlations = validation_data.groupby("era").apply(score)
    print(
        f"On validation the correlation has mean {validation_correlations.mean()} and std {validation_correlations.std()}")
    print(
        f"On validation the average per-era payout is {payout(validation_correlations).mean()}")

    # create destination directory if it does not exist
    #

    destination_dir = "../kazutsugi/submissions/"

    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    submission_file = destination_dir + TOURNAMENT_NAME + "_submission.csv"

    tournament_data[PREDICTION_NAME].to_csv(submission_file,header=True)


In [5]:

make_model()







2021-09-13 11:37:33,383 INFO numerapi.utils: target file already exists
2021-09-13 11:37:33,384 INFO numerapi.utils: download complete


# Loading data...
Loaded 310 features
Training model
                  feature_intelligence1  feature_intelligence2  \
id                                                               
n000315175b67977                   0.00                   0.50   
n0014af834a96cdd                   0.00                   0.00   
n001c93979ac41d4                   0.25                   0.50   
n0034e4143f22a13                   1.00                   0.00   
n00679d1a636062f                   0.25                   0.25   

                  feature_intelligence3  feature_intelligence4  \
id                                                               
n000315175b67977                   0.25                   0.00   
n0014af834a96cdd                   0.00                   0.25   
n001c93979ac41d4                   0.25                   0.25   
n0034e4143f22a13                   0.00                   0.50   
n00679d1a636062f                   0.25                   0.25   

                  fea

AttributeError: 'str' object has no attribute 'astype'

In [None]:
from numerapi import NumerAPI


n_id = "OML65REYFDPC5O7N22XCRP44BG2M74XH"
key = "YSTL455VERL7WZ4D7OQ6XEYEQN2MRCCICBMILNFP3DUZC4MSAS2WSH2MV7ED6WB3"

api = NumerAPI(public_id=n_id,secret_key=key)

base_path = "../kazutsugi/submissions/"

path = base_path + 'kazutsugi' + "_submission.csv"
#print('uploading')
#api.upload_predictions(path)