In [1]:
%cd /home/slav/ai/claim-rank
!export PYTHONPATH=.
%load_ext autoreload
%autoreload 2

/home/slav/ai/claim-rank


In [2]:
from tempfile import NamedTemporaryFile
import numpy as np
np.random.seed(42) # ! before importing keras!

In [3]:
from keras.layers import Input, Embedding, Conv1D, Dense
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout
from keras import optimizers
from sklearn.metrics import average_precision_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
def load_crossvalidation_data(name='crossval'):
    folds = np.load('folds/' + name + '.npy')
    return folds;

In [None]:
folds = load_crossvalidation_data(name='cb')

In [None]:
def create_baseline_model(in_count, out_count):
    input_layer = Input(shape=(in_count,))
    x = Dense(100, kernel_initializer='normal', activation='relu')(input_layer)
    
    outputs = list(map(lambda _: Dense(1, kernel_initializer='normal', activation='sigmoid')(x), range(out_count)))

    model = Model(inputs=[input_layer], outputs=outputs)
                         
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model;

In [None]:
def run_single_model(X_train, X_val, X_test, train_target, val_targets, test_targets, create_model = create_baseline_model, iters = 20, epochs=5):
    model = create_model(X_train.shape[1], 1);
    best_models = [{'av_p_val': 0., 'iter': -1, 'av_p_test': 0., 'file': NamedTemporaryFile()} for _ in test_targets]

    for ite in range(iters):
        
        model.fit(X_train, y=train_target, epochs=epochs, verbose=0, batch_size=550)

        print(str(epochs * (ite + 1)) + ' ', end='')
        
        predicted_val = model.predict(X_val)[:, 0]
        
        for j in range(len(test_targets)):
            av_p_val = average_precision_score(val_targets[j], predicted_val)
            if (av_p_val > best_models[j]['av_p_val']):
                file = best_models[j]['file']
                model.save(file.name)
                best_models[j] = {'av_p_val': av_p_val, 'iter': ite, 'file': file}
    
    print()
    
    for j in range(len(test_targets)):
        file = best_models[j]['file']
        model = load_model(file.name)
        file.close()
        best_models[j]['av_p_test'] = average_precision_score(test_targets[j], model.predict(X_test)[:, 0])
    
    return best_models

In [None]:
def run_multiple_model(X_train, X_val, X_test, train_targets, val_targets, test_targets, log_indexes, create_model = create_baseline_model, iters = 20, epochs=5):
    model = create_model(X_train.shape[1], len(train_targets));
    best_models = [{'av_p_val': 0., 'iter': -1, 'av_p_test': 0., 'file': NamedTemporaryFile()} for _ in test_targets]

    for ite in range(iters):
        
        model.fit(X_train, y=train_targets, epochs=epochs, verbose=0, batch_size=550)
    
        print(str(epochs * (ite + 1)) + ' ', end='')
        
        predicted_val = np.array(model.predict(X_val))
        
        for j, i in enumerate(log_indexes):
            av_p_val = average_precision_score(val_targets[j], predicted_val[i][:, 0])
            if (av_p_val > best_models[j]['av_p_val']):
                file = best_models[j]['file']
                model.save(file.name)
                best_models[j] = {'av_p_val': av_p_val, 'iter': ite, 'file': file}
    
    print()
    
    for j, i in enumerate(log_indexes):
        file = best_models[j]['file']
        model = load_model(file.name)
        file.close()
        best_models[j]['av_p_test'] = average_precision_score(test_targets[j], model.predict(X_test)[i][:, 0])
    
    return best_models

In [None]:
def crossValidateSingle(folds, train_targets, test_targets):
    models_sums = [0 for _ in test_targets]
    for fold in folds:
        X_train, X_val, X_test, y_train, y_val, y_test = fold
        y_train = list(y_train[:, train_targets].T)
        y_val = y_val[:, test_targets].T
        y_test = y_test[:, test_targets].T
        print(X_train.shape)
        models = run_single_model(X_train, X_val, X_test, y_train, y_val, y_test)
        print(models)
        models_sums = [prev + curr['av_p_test'] for prev, curr in zip(models_sums, models)]
        
    print(np.array(models_sums) / len(folds))
    

In [None]:
def crossValidateMultiple(folds, train_targets, test_targets, log_indexes):
    models_sums = [0 for _ in test_targets]
    for fold in folds:
        X_train, X_val, X_test, y_train, y_val, y_test = fold
        y_train = list(y_train[:, train_targets].T)
        y_val = y_val[:, test_targets].T
        y_test = y_test[:, test_targets].T
        print(X_train.shape)
        models = run_multiple_model(X_train, X_val, X_test, y_train, y_val, y_test, log_indexes)
        print(models)
        models_sums = [prev + curr['av_p_test'] for prev, curr in zip(models_sums, models)]
        
    
    print(np.array(models_sums) / len(folds))

In [None]:
folds = load_crossvalidation_data(name='cb')
crossValidateSingle(folds, [6], [0, 6])

In [None]:
crossValidateMultiple(folds, [0, 6], [0, 6], [0, 1])

In [None]:
crossValidateMultiple(folds, range(10), [0, 6], [0, 6])

In [None]:
crossValidateMultiple(folds, range(1, 10), [6], [5])

In [None]:
folds = load_crossvalidation_data(name='experimental')
print()
crossValidateSingle(folds, [0], [0, 6])

print()
crossValidateSingle(folds, [6], [0, 6])

print()
crossValidateMultiple(folds, [0, 6], [0, 6], [0, 1])

print()
crossValidateMultiple(folds, range(10), [0, 6], [0, 6])

print()
crossValidateMultiple(folds, range(1,10), [6], [5])

In [None]:
crossValidateMultiple(folds, range(10), range(1, 10), range(1, 10))

In [None]:
for i in range(1,10):
    print(i)
    crossValidateSingle(folds, [i], [i])