[View in Colaboratory](https://colab.research.google.com/github/hcsumanth/lazyNMR/blob/master/numeraieasy.ipynb)

In [1]:
!pip install numerox
import numerox as nx
import pandas as pd
data = nx.download('numerai_dataset.zip')
!unzip numerai_dataset.zip
#Training Data 
train = pd.read_csv('numerai_training_data.csv', header=0)
# To evaluate on the Model
tournament = pd.read_csv('numerai_tournament_data.csv', header=0)

Collecting numerox
[?25l  Downloading https://files.pythonhosted.org/packages/69/d4/9e902b7b7ca51e79b18dcfdc499a480d885a1417a64219aedbb2f8be7791/numerox-3.3.0.tar.gz (1.4MB)
[K    100% |████████████████████████████████| 1.4MB 8.4MB/s 
Collecting tables (from numerox)
[?25l  Downloading https://files.pythonhosted.org/packages/d7/1b/21f4c7f296b718575c17ef25e61c05742a283c45077b4c8d5a190b3e0b59/tables-3.4.4-cp36-cp36m-manylinux1_x86_64.whl (3.8MB)
[K    100% |████████████████████████████████| 3.8MB 8.3MB/s 
Collecting numerapi (from numerox)
  Downloading https://files.pythonhosted.org/packages/98/85/8861a07f0cb4ce3a4a8474b64370cde3767658b7cc92c7f52c0e5e896201/numerapi-1.3.0.tar.gz
Collecting nose (from numerox)
[?25l  Downloading https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl (154kB)
[K    100% |████████████████████████████████| 163kB 22.2MB/s 
Collecting numexpr>=2.5.2 (from tables->numerox)
[?

numerai_dataset.zip: 116MB [00:04, 24.6MB/s]                           


Archive:  numerai_dataset.zip
  inflating: example_predictions_target_elizabeth.csv  
  inflating: example_predictions_target_jordan.csv  
  inflating: example_predictions_target_charles.csv  
  inflating: numerai_training_data.csv  
  inflating: example_predictions_target_bernie.csv  
  inflating: example_model.r         
  inflating: example_predictions_target_ken.csv  
  inflating: numerai_tournament_data.csv  
  inflating: example_model.py        
  inflating: numerox_example.py      


In [0]:

validation_data = tournament[tournament.data_type=='validation']
complete_training_data = pd.concat([train, validation_data])


In [0]:
features = [f for f in list(train) if 'feature' in f]  # taking only columns with feature
X = complete_training_data[features]
Y = complete_training_data["target_bernie"]

In [0]:

import numpy
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GroupKFold
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization, Dropout, Activation
from keras.wrappers.scikit_learn import KerasClassifier

def create_model(neurons=2000, dropout=0.2):
    model = Sequential()
    model.add(Dense(neurons, input_shape=(50,), kernel_initializer='glorot_uniform', use_bias=False))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Activation('relu'))
    model.add(Dense(1, activation='sigmoid', kernel_initializer='glorot_normal'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_crossentropy', 'accuracy'])
    return model

model = KerasClassifier(build_fn=create_model, epochs=8, batch_size=128, verbose=0)

neurons = [10, 14]
dropout = [0.01, 0.26]
param_grid = dict(neurons=neurons, dropout=dropout)

gkf = GroupKFold(n_splits=5)
kfold_split = gkf.split(X, Y, groups=complete_training_data.era)

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kfold_split, scoring='neg_log_loss',n_jobs=1, verbose=1)
grid_result = grid.fit(X.values, Y.values)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Using TensorFlow backend.


Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [0]:
!ls

In [0]:
from google.colab import files
files.download('example.txt') 

In [44]:
complete_training_data.shape

(439975, 58)

In [0]:

# Validation Data for local validation 
validation = tournament[tournament['data_type']=='validation']
# Using ERAS kind of an encrypted timestamp.
training_eras = train[train.data_type == 'train'].copy()
eras = [e for e in training_eras.era.unique()]


In [0]:
iter_eras = eras.copy()

In [32]:
iter_eras

['era1',
 'era2',
 'era3',
 'era4',
 'era5',
 'era6',
 'era7',
 'era8',
 'era9',
 'era10',
 'era11',
 'era12',
 'era13',
 'era14',
 'era15',
 'era16',
 'era17',
 'era18',
 'era19',
 'era20',
 'era21',
 'era22',
 'era23',
 'era24',
 'era25',
 'era26',
 'era27',
 'era28',
 'era29',
 'era30',
 'era31',
 'era32',
 'era33',
 'era34',
 'era35',
 'era36',
 'era37',
 'era38',
 'era39',
 'era40',
 'era41',
 'era42',
 'era43',
 'era44',
 'era45',
 'era46',
 'era47',
 'era48',
 'era49',
 'era50',
 'era51',
 'era52',
 'era53',
 'era54',
 'era55',
 'era56',
 'era57',
 'era58',
 'era59',
 'era60',
 'era61',
 'era62',
 'era63',
 'era64',
 'era65',
 'era66',
 'era67',
 'era68',
 'era69',
 'era70',
 'era71',
 'era72',
 'era73',
 'era74',
 'era75',
 'era76',
 'era77',
 'era78',
 'era79',
 'era80',
 'era81',
 'era82',
 'era83',
 'era84',
 'era85',
 'era86',
 'era87',
 'era88',
 'era89',
 'era90',
 'era91',
 'era92',
 'era93',
 'era94',
 'era95',
 'era96',
 'era97',
 'era98',
 'era99',
 'era100',
 'era101

In [0]:
def bernie_training():
  
  # Here we select the bernie_target.
    train_bernie = train.drop([
        'id', 'era', 'data_type',
        'target_charles', 'target_elizabeth',
        'target_jordan', 'target_ken'], axis=1)
    
    # Transform the loaded CSV data into numpy arrays
    features = [f for f in list(train_bernie) if "feature" in f]
    X = train_bernie[features]
    Y = train_bernie['target_bernie']
    x_prediction = validation[features]
    ids = tournament['id']
    
    

numerai_dataset.zip: 116MB [00:02, 48.5MB/s]                           


In [17]:
type(data)

numerox.data.Data

Archive:  numerai_dataset.zip
  inflating: example_predictions_target_elizabeth.csv  
  inflating: example_predictions_target_jordan.csv  
  inflating: example_predictions_target_charles.csv  
  inflating: numerai_training_data.csv  
  inflating: example_predictions_target_bernie.csv  
  inflating: example_model.r         
  inflating: example_predictions_target_ken.csv  
  inflating: numerai_tournament_data.csv  
  inflating: example_model.py        
  inflating: numerox_example.py      


In [18]:
!ls

logistic.csv  numerai_dataset.zip  sample_data
