In [1]:
from os.path import join
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
inges_dir = 'AutoDL_ingestion_program/'           # Ingestion program directory
score_dir = 'AutoDL_scoring_program/'             # Scoring program directory
model_dir = 'AutoDL_sample_code_submission/'      # Where model code is, called model.py
baseline_dir = 'Models/'   # some baseline methods are implemented here
model_simple = join(baseline_dir, 'modelcnn.py') # choose one simple baseline model

output_dir = 'AutoDL_scoring_output'
detailed_results_page = join(output_dir, 'detailed_results.html')

from sys import path; 
path.append(model_dir); path.append(inges_dir); path.append(score_dir); path.append(baseline_dir);

# Part 1: Dataset Overview

In [3]:
data_dir = 'practice_dataset/offline_data'            # Change this directory and the dataset as needed
data_name = 'O3'
!ls $data_dir

[34mO1[m[m [34mO2[m[m [34mO3[m[m [34mO4[m[m [34mO5[m[m


In [4]:
# read train / test datasets
from dataset import AutoNLPDataset # The module 'dataset' is defined at AutoDL_ingestion_program/dataset.py
D = AutoNLPDataset(join(data_dir, data_name + '/' + data_name + '.data'))
D.read_dataset()
D_train = D.get_train()
D_test = D.get_test()

# Part 2: Model, Prediction and Metrics

In [5]:
# copy simple model
model_submit = join(model_dir, 'model.py') # submitted models must be called model.py
!cp $model_simple $model_submit

In [6]:
# set time budget and instanciate the model with dataset
from model import Model
time_budget=2400
M = Model(D.metadata_)

Using TensorFlow backend.


In [7]:
# train the model for a certain time
M.train(D_train, remaining_time_budget=time_budget)

Found 2000000 fastText word vectors.
fastText oov words: 19999
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train on 9051 samples, validate on 2263 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/1000
 - 90s - loss: 2.9933 - acc: 0.0550 - val_loss: 2.9925 - val_acc: 0.0490
Epoch 2/1000
 - 96s - loss: 2.9919 - acc: 0.0529 - val_loss: 2.9930 - val_acc: 0.0513
Epoch 3/1000
 - 99s - loss: 2.9909 - acc: 0.0508 - val_loss: 2.9931 - val_acc: 0.0446
Epoch 4/1000
 - 99s - loss: 2.9913 - acc: 0.0525 - val_loss: 2.9929 - val_acc: 0.0446
Epoch 5/1000
 - 98s - loss: 2.9913 - acc: 0.0551 - val_loss: 2.9928 - val_acc: 0.0495
Epoch 6/1000
 - 102s - loss: 2.9912 - acc: 0.0526 - val_loss: 2.9928 - val_acc: 0.0446
Epoch 7/1000
 - 100s - loss: 2.9908 - acc: 0.0549 - val_loss: 2.9929 - val_acc: 0.0446
Epoch 8/1000
 - 102s - loss: 2.9908 - acc: 0.0494 

In [8]:
# get prediction by calling test method
prediction = M.test(D_test, remaining_time_budget=time_budget)

In [9]:
# calculate scores
from score import autodl_auc, accuracy
from libscores import read_array
solution_file = join(data_dir, data_name + '/' + data_name + '.solution')
solution = read_array(solution_file)

acc = accuracy(solution, prediction) # note that accuracy is not evaluation metric in the challenge
current_bac = autodl_auc(solution, prediction)
# print('Number of test examples: %d \n\t\t Solution \t\t\t\t\t Prediction ' % len(solution))
# [print(z) for z in zip(solution, prediction)]
print ("Classification Accuracy: ", acc)
print ("Normalized Area Under ROC Curve (NAUC) = {:.4f}".format(current_bac))
print ("ALC can be read from the result page as shown in the next part.")

Classification Accuracy:  0.05270844397238443
Normalized Area Under ROC Curve (NAUC) = 0.0000
ALC can be read from the result page as shown in the next part.


# Part 3: Test and Submission

In [10]:
# run local test
!python run_local_test.py -code_dir=./AutoDL_sample_code_submission -dataset_dir=practice_dataset/offline_data/O2

2020-03-19 19:42:50 INFO run_local_test.py: ##################################################
2020-03-19 19:42:50 INFO run_local_test.py: Begin running local test using
2020-03-19 19:42:50 INFO run_local_test.py: code_dir = AutoDL_sample_code_submission
2020-03-19 19:42:50 INFO run_local_test.py: dataset_dir = O2
2020-03-19 19:42:50 INFO run_local_test.py: ##################################################
/Users/vaish/anaconda3/envs/ml/bin/python: can't find '__main__' module in '/Users/vaish/Desktop/AutoNLP/AutoNLP'
/Users/vaish/anaconda3/envs/ml/bin/python: can't find '__main__' module in '/Users/vaish/Desktop/AutoNLP/AutoNLP'


In [11]:
# result report
from IPython.core.display import display, HTML

display(HTML(detailed_results_page))