In [1]:
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam, Adadelta
from keras.models import Model
from keras.models import model_from_json
from sklearn.model_selection import train_test_split
from data_helpers import load_data
from data_helpers import load_data_embeddings_vocab
from data_helpers import get_embeddings
from data_helpers import load_test_data_separate_files
from data_helpers import string_to_input_data
import h5py
from imblearn.over_sampling import SMOTE
import numpy as np
from collections import Counter
import simplejson
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def countF1(predicted, actual):
    true_pos = 0
    true_neg = 0
    false_pos = 0
    false_neg = 0
    for i in range(len(predicted)):
        if(predicted[i] == 1 and actual[i] == 1):
            true_pos = true_pos+1
        if(predicted[i] == 1 and actual[i] == 0):
            false_pos = false_pos+1
        if(predicted[i] == 0 and actual[i] == 1):
            false_neg = false_neg+1
        if(predicted[i] == 0 and actual[i] == 0):
            true_neg = true_neg+1
    precision = true_pos / (true_pos + false_pos)
    recall = true_pos / (true_pos + false_neg)
    print("true positive: " + str(true_pos))
    print("true negative: " + str(true_neg))
    print("false positive: " + str(false_pos))
    print("false negative: " + str(false_neg))
    if (precision + recall > 0):
        return (2*precision*recall) / (precision+recall)
    else:
        return 0

In [9]:
def parseTestLabel(labels):
    labels_array = labels.split(';')
    label_dict = {}
    for i in range (1, 11):
        label_dict[i] = 0
    for label in labels_array:
        if(label is not ''):
            key_label = (int(label)//10)+1
            if key_label == 9:
                label_dict[10] = 1
            else :
                if key_label == 10:
                    label_dict[9] = 1
                else:
                    label_dict[key_label] = 1
                
    return label_dict

def loadTestMultilabel(testFile, testLabel):
    data_test = list(open(testFile, "r", encoding='latin-1').readlines())
    data_label = list(open(testLabel, "r", encoding='latin-1').readlines())
    data_temp=[]
    label_temp=[]
    for data in data_test:
        data_temp.append(data.rstrip('\n'))
#     print(data_temp)
    for label in data_label:
        label_temp.append(parseTestLabel(label.strip()))
#     print(label_temp)
    return data_temp, label_temp

In [10]:
def loadData(testFile, testLabel):
    pickle_in = open(testFile,"rb")
    X_test = pickle.load(pickle_in)
    pickle_in.close()
    
    pickle_in = open(testLabel,"rb")
    Y_test = pickle.load(pickle_in)
    pickle_in.close()
    
    return Xtest, Ytest

In [11]:
def loadModel(model_path, class_id):
    # load json and create model
    json_file = open(model_path + str(class_id) + '/model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    
    # load weights into new model
    loaded_model.load_weights(model_path + str(class_id) + '/model_weights.h5')
    print("Loaded model from disk")
    print(loaded_model.input_shape[1])
    return loaded_model

In [12]:
def load_vocabulary(model_path):
    pickle_in = open(model_path + "/vocabulary.pickle","rb")
    vocabulary = pickle.load(pickle_in)
    pickle_in.close()
    return vocabulary

In [16]:
def process_test_data(x_raw, y_raw, model_path):
#     load model
    model = {}
    vocabulary={}
    print(" --- Loading model --- ")
    for i in range(1,11):
        model[i]=loadModel(model_path, i)
        vocabulary[i]=load_vocabulary(model_path + str(i))
    print(" --- MODEL LOADED ---")
#     load data test
    x_raw, y_raw = loadTestMultilabel("D:/ITB/S2/TESIS/cnn/dataTest110218/test_text.txt", "D:/ITB/S2/TESIS/cnn/dataTest110218/test_label.txt")    
#     print(x_raw)
#     print(y_raw)
#     predict test data
    for i in range(len(x_raw)):
        for j in range(1,11):
            x_test, y_test = string_to_input_data([x_raw[i]], y_raw[i].get(j), vocabulary, model.get(j).input_shape[1])
            doPredict(model.get(j), x_test, y_test)

In [17]:
def doPredict(model, x_test, y_test):
    y_prob = model.predict(x_test) 
    print(y_prob)
    y_classes=[]
    for cls in y_prob:
        if(cls[0] > cls[1]):
            y_classes.append(0)
        else:
            y_classes.append(1)
    print('predict: ' + str(y_classes))
#     print(y_classes)

    y_test_classes=[]
    for cls in y_test:
        print(cls)
        if(cls[0] > cls[1]):
            y_test_classes.append(0)
        else:
            y_test_classes.append(1)
    print('actual: ' + str(y_test_classes))
#     print(y_test_classes)
#     print(countF1(y_classes, y_test_classes))

In [None]:
x_raw, y_raw = loadTestMultilabel("D:/ITB/S2/TESIS/cnn/dataTest110218/test_text.txt", "D:/ITB/S2/TESIS/cnn/dataTest110218/test_label.txt")    
process_test_data(x_raw, y_raw, 'model110218_newsagg/')

# print(X_test)
# print(y_test)

 --- Loading model --- 
Loaded model from disk
1082
Loaded model from disk
1392
