In [1]:
filename = ''

In [2]:
import numpy as np
import tensorflow as tf

class TextCNN(object):
    def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/device:GPU:0'), tf.name_scope('embedding'):
            W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name='W')
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.device('/device:GPU:0'), tf.name_scope('conv-maxpool-%s' % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W')
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name='b')
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='conv')
                
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='pool')
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs,3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
        
        # Add dropout
        with tf.name_scope('dropout'):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.device('/device:GPU:0'), tf.name_scope('output'):
            W = tf.get_variable(
                'W',
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b')
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name='scores')
            self.predictions = tf.argmax(self.scores, 1, name='predictions')

        # Calculate mean cross-entropy loss
        with tf.name_scope('loss'):
            losses = tf.nn.softmax_cross_entropy_with_logits(labels = self.input_y, logits = self.scores) #  only named arguments accepted            
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope('accuracy'):
            self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, 'float'), name='accuracy')

        with tf.name_scope('num_correct'):
            self.correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.num_correct = tf.reduce_sum(tf.cast(self.correct_predictions, 'float'), name='num_correct')


In [3]:
import os
from os.path import exists
import re
import logging
import numpy as np
import pandas as pd
from collections import Counter

def clean_str(s):
    """Clean sentence"""
    s = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", s)
    s = re.sub(r"\'s", " \'s", s)
    s = re.sub(r"\'ve", " \'ve", s)
    s = re.sub(r"n\'t", " n\'t", s)
    s = re.sub(r"\'re", " \'re", s)
    s = re.sub(r"\'d", " \'d", s)
    s = re.sub(r"\'ll", " \'ll", s)
    s = re.sub(r",", " , ", s)
    s = re.sub(r"!", " ! ", s)
    s = re.sub(r"\(", " \( ", s)
    s = re.sub(r"\)", " \) ", s)
    s = re.sub(r"\?", " \? ", s)
    s = re.sub(r"\s{2,}", " ", s)
    s = re.sub(r'\S*(x{2,}|X{2,})\S*',"xxx", s)
    s = re.sub(r'[^\x00-\x7F]+', "", s)
    return s.strip().lower()

def load_data_and_labels(filename):
    """Load sentences and labels"""
    df = pd.read_csv(filename, compression='zip', dtype={'consumer_complaint_narrative': object})
    selected = ['class_name', 'sentence']
    non_selected = list(set(df.columns) - set(selected))

    df = df.drop(non_selected, axis=1) # Drop non selected columns
    df = df.dropna(axis=0, how='any', subset=selected) # Drop null rows
    df = df.reindex(np.random.permutation(df.index)) # Shuffle the dataframe

    # Map the actual labels to one hot labels
    labels = sorted(list(set(df[selected[0]].tolist())))
    one_hot = np.zeros((len(labels), len(labels)), int)
    np.fill_diagonal(one_hot, 1)
    label_dict = dict(zip(labels, one_hot))
    num_labels = []
    for z in range(len(labels)):
        num_labels.append(z)
        
    num_label_dict = dict(zip(labels, num_labels))
    
    x_raw = df[selected[1]].apply(lambda x: clean_str(x)).tolist()
    y_raw = df[selected[0]].apply(lambda y: label_dict[y]).tolist()
    y_raw_num = df[selected[0]].apply(lambda y: num_label_dict[y]).tolist()
    
    return x_raw, y_raw, y_raw_num, df, labels

def batch_iter(data, batch_size, num_epochs, shuffle=True):
    #Loads the training data in batches instead of all at once.
    #Iterates the data batch by batch.
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int(data_size / batch_size) + 1

    for epoch in range(num_epochs):
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data

        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]

In [4]:
import os
import sys
import json
import logging
#import data_helper
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.contrib import learn
from sklearn.metrics import accuracy_score, f1_score, precision_score, \
recall_score, classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

fold_averages_list = []
logging.getLogger().setLevel(logging.INFO)

def predict_unseen_data(x_dev, y_dev, cycle, actual_labels):
    """Step 0: load trained model and parameters"""
    params = json.loads(open('./parameters.json').read())
    checkpoint_dir = "./" + filename
    if not checkpoint_dir.endswith('/'):
        checkpoint_dir += '/'
    checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir + 'checkpoints')
    
    logging.critical('Loaded the trained model: {}'.format(checkpoint_file))

    """Step 1: load data for prediction"""

    # labels.json was saved during training, and it has to be loaded during prediction
    labels = json.loads(open('./labels.json').read())
    one_hot = np.zeros((len(labels), len(labels)), int)
    np.fill_diagonal(one_hot, 1)
    label_dict = dict(zip(labels, one_hot))

    logging.info('The number of x_test: {}'.format(len(x_dev)))
    logging.info('The number of y_test: {}'.format(len(y_dev)))
    
    """Step 2: compute the predictions"""
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
        sess = tf.Session(config=session_conf)

        with sess.as_default():
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            input_x = graph.get_operation_by_name("input_x").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]
            batches = batch_iter(list(x_dev), params['batch_size'], 1, shuffle=False)
            
            all_predictions = []
            for x_test_batch in batches:
                batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0})
                all_predictions = np.concatenate([all_predictions, batch_predictions])

    if y_dev is not None:
        y_dev = np.argmax(y_dev, axis=1)
        correct_predictions = sum(all_predictions == y_dev)

        # Save the actual labels back to file
        prediction_labels = [labels[int(prediction)] for prediction in all_predictions]
        
        print('\nConfusion Matrix: \n', confusion_matrix(actual_labels, all_predictions))
        print('\n', classification_report(actual_labels, all_predictions))
        print(f'\nAccuracy --> {accuracy_score(actual_labels, all_predictions)}')

        logging.critical('The accuracy is: {}'.format(correct_predictions / float(len(y_dev))))
        logging.critical('The prediction is complete')
        
        # Collect the averages of each fold to calculate the average of the model
        accuracy = accuracy_score(actual_labels, all_predictions)
        scores = precision_recall_fscore_support(actual_labels, all_predictions, average='weighted')
        fold_dict = {'Precision':scores[0], 'Recall':scores[1], 'Fscore':scores[2], 'Accuracy':accuracy}
        fold_averages_list.append(fold_dict)

In [5]:
import os
import sys
import json
import time
import logging
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.contrib import learn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from pprint import pprint
import tflearn
import numpy

logging.getLogger().setLevel(logging.INFO)

actual_labels = []
x_test1 = []
y_test1= []

def train_cnn():
    
    skf = StratifiedKFold(n_splits=10, random_state=None)
    cycle = 0
    
    """Step 0: load sentences, labels, and training parameters"""
    print(os.getcwd())
    
    train_file = './data/NFR_CSV_11_13_18_Reduced_Classes.zip'
    
    x_raw, y_raw, y_raw_num, df, labels = load_data_and_labels(train_file)
    
    parameter_file = 'parameters.json'
    params = json.loads(open(parameter_file).read())

    """Step 1: pad each sentence to the same length and map each word to an id"""
    max_document_length = max([len(x.split(' ')) for x in x_raw])
    logging.info('The maximum length of all sentences: {}'.format(max_document_length))
    vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
    x = np.array(list(vocab_processor.fit_transform(x_raw)))
    y = np.array(y_raw)
    y_num = np.array(y_raw_num)
    
    """Step 2: split the original dataset into train and test sets"""
    x_train1, x_test1, y_train1, y_test1 = train_test_split(x, y, test_size=0.2, stratify=y, random_state=42)
    x_train2, x_test2, y_train2, y_test2 = train_test_split(x, y_num, test_size=0.2, stratify=y, random_state=42)
    
    """Step 3: split the train set into train and dev sets"""
    
    for train_index, val_index in skf.split(x_train2, y_train2):
        
        dt = datetime.datetime.now()
        print(dt.strftime("%A, %d. %B %Y %I:%M%p"))
        
        results = []
        total_results_pred = numpy.empty(1,)
        cycle = cycle + 1
        print('***************************')
        print("CROSS VALIDATION CYCLE: %d" % cycle)
        print('***************************')
        print("Train:", train_index)
        print("Validation:", val_index)
        print('')
        
        x_train, x_dev = x_train2[train_index], x_train2[val_index]
        y_train, y_dev = y_train1[train_index], y_train1[val_index]
        y_dev_alt = y_train2[val_index]
        
        """Step 4: save the labels into labels.json since predict.py needs it"""
        with open('./labels.json', 'w') as outfile:
            json.dump(labels, outfile, indent=4)
    
        logging.info('x_train: {}, x_dev: {}'.format(len(x_train), len(x_dev)))
        logging.info('y_train: {}, y_dev: {}'.format(len(y_train), len(y_dev)))
    
        """Step 5: build a graph and cnn object"""
        graph = tf.Graph()
        with graph.as_default():
            session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)
            
            # Create TensorFlow Sessionstoch
            sess = tf.Session(config=session_conf)
            with sess.as_default():
                cnn = TextCNN(
                    sequence_length=x_train.shape[1],
                    num_classes=y_train.shape[1],
                    vocab_size=len(vocab_processor.vocabulary_),
                    embedding_size=params['embedding_dim'],
                    filter_sizes=list(map(int, params['filter_sizes'].split(","))),
                    num_filters=params['num_filters'],
                    l2_reg_lambda=params['l2_reg_lambda'])
    
                global_step = tf.Variable(0, name="global_step", trainable=False)
                optimizer = tf.train.AdamOptimizer(1e-3)
                grads_and_vars = optimizer.compute_gradients(cnn.loss)
                train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
    
                timestamp = str(int(time.time()))
                out_dir = os.path.abspath(os.path.join(os.path.curdir, "trained_model_" + timestamp))
                
                global filename
                filename = "trained_model_" + timestamp
    
                checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                saver = tf.train.Saver()
    
                # One training step: train the model with one batch
                def train_step(x_batch, y_batch):
                    feed_dict = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: params['dropout_keep_prob']}
                    _, step, loss, acc = sess.run([train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)
                    
                # One evaluation step: evaluate the model with one batch
                def dev_step(x_batch, y_batch):
                    feed_dict = {cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0}
                    step, loss, acc, num_correct, pred, corr_pred, scores = sess.run([global_step, cnn.loss, cnn.accuracy, cnn.num_correct, cnn.predictions, cnn.correct_predictions, cnn.scores], feed_dict)
                    results = [step, loss, acc, num_correct, pred, corr_pred, scores]
                    return results
    
                # Save the word_to_id map since predict.py needs it
                vocab_processor.save(os.path.join(out_dir, "vocab.pickle"))
                sess.run(tf.global_variables_initializer())
    
                # Training starts here
                train_batches = batch_iter(list(zip(x_train, y_train)), params['batch_size'], params['num_epochs'])
                best_accuracy, best_at_step = 0, 0
    
                count = 0
                """Step 6: train the cnn model with x_train and y_train (batch by batch)"""
                for train_batch in train_batches:
                    x_train_batch, y_train_batch = zip(*train_batch)
                    train_step(x_train_batch, y_train_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    
                    """Step 6.1: evaluate the model with x_dev and y_dev (batch by batch)"""
                    if current_step % params['evaluate_every'] == 0:
                        dev_batches = batch_iter(list(zip(x_dev, y_dev)), params['batch_size'], 1, shuffle=False)
                        
                        total_dev_correct = 0
                        for dev_batch in dev_batches:
                            x_dev_batch, y_dev_batch = zip(*dev_batch)
                            results = dev_step(x_dev_batch, y_dev_batch)
                            total_results_pred = np.concatenate((total_results_pred, results[4]), axis=None)
                            num_dev_correct = results[3]
                            total_dev_correct += num_dev_correct
    
                        dev_accuracy = float(total_dev_correct) / len(y_dev)
                        logging.critical('Accuracy on dev set: {}'.format(dev_accuracy))
    
                        writer = tf.summary.FileWriter("tf_logs")
                        writer.add_graph(sess.graph)
                        
                        """Step 6.2: save the model if it is the best based on accuracy on dev set"""
                        if dev_accuracy >= best_accuracy:
                            best_accuracy, best_at_step = dev_accuracy, current_step
                            path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                            
                            logging.critical('Saved model at {} at step {}'.format(path, best_at_step))
                            logging.critical('Best accuracy is {} at step {}'.format(best_accuracy, best_at_step))
            
            print('*********************************')
            print("VALIDATION RESULTS FOR CYCLE: %d" % cycle)
            print('*********************************')
            
            actual_labels = sess.run(tf.argmax(y_test1,1))
            
            predict_unseen_data(x_test1, y_test1, cycle, actual_labels)
            
            dt = datetime.datetime.now()
            print(dt.strftime("%A, %d. %B %Y %I:%M%p"))

if __name__ == '__main__':
    # python3 train.py ./data/consumer_complaints.csv.zip ./parameters.json
    train_cnn()


INFO:root:The maximum length of all sentences: 118


curses is not supported on this machine (please install/reinstall curses for an optimal experience)
/home/jovyan
ALSO LOOK FOR ME!
2 out of 3 program administrators nursing staff members shall successfully be able to use the system to manage the scheduling of classes and clinicals
Instructions for updating:
Please use tensorflow/transform or tf.data.


Instructions for updating:
Please use tensorflow/transform or tf.data.


Instructions for updating:
Please use tensorflow/transform or tf.data.


Instructions for updating:
Please use tensorflow/transform or tf.data.


Instructions for updating:
Please use tensorflow/transform or tf.data.


Instructions for updating:
Please use tensorflow/transform or tf.data.
INFO:root:x_train: 656, x_dev: 75
INFO:root:y_train: 656, y_dev: 75


x_test2
[ 16 198  10 446  54 447   3 448  49 449 450 451   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0]
x_train2
[ 16 126  28 127 128  16 129 130 131 132 133  32  16 134 135 136 126  66
 137  34 138 139 140 128   4 141   3  16 142 143 144   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0]
Tuesday, 04. December 2

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

CRITICAL:root:Accuracy on dev set: 0.5866666666666667
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958499/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.5866666666666667 at step 100
CRITICAL:root:Accuracy on dev set: 0.52
CRITICAL:root:Accuracy on dev set: 0.52
CRITICAL:root:Accuracy on dev set: 0.64
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958499/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.64 at step 400
CRITICAL:root:Accuracy on dev set: 0.6533333333333333
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958499/checkpoints/model-500 at step 500
CRITICAL:root:Best accuracy is 0.6533333333333333 at step 500
CRITICAL:root:Accuracy on dev set: 0.6533333333333333
CRITICAL:root:Saved model at /home/jovyan/trained_m

*********************************
VALIDATION RESULTS FOR CYCLE: 1
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543958499/checkpoints/model-2100
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958499/checkpoints/model-2100


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958499/checkpoints/model-2100
CRITICAL:root:The accuracy is: 0.7650273224043715
CRITICAL:root:The prediction is complete
INFO:root:x_train: 656, x_dev: 75
INFO:root:y_train: 656, y_dev: 75



Confusion Matrix: 
 [[14  8  0  4  1]
 [ 3 23  0  5  0]
 [ 0  4 18  1  0]
 [ 0  2  0 68  1]
 [ 1  3  2  8 17]]

               precision    recall  f1-score   support

           0       0.78      0.52      0.62        27
           1       0.57      0.74      0.65        31
           2       0.90      0.78      0.84        23
           3       0.79      0.96      0.87        71
           4       0.89      0.55      0.68        31

   micro avg       0.77      0.77      0.77       183
   macro avg       0.79      0.71      0.73       183
weighted avg       0.78      0.77      0.76       183


Accuracy --> 0.7650273224043715
Tuesday, 04. December 2018 09:24PM
Tuesday, 04. December 2018 09:24PM
***************************
CROSS VALIDATION CYCLE: 2
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.68
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958684/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.68 at step 100
CRITICAL:root:Accuracy on dev set: 0.6933333333333334
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958684/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.6933333333333334 at step 200
CRITICAL:root:Accuracy on dev set: 0.6666666666666666
CRITICAL:root:Accuracy on dev set: 0.6933333333333334
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958684/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.6933333333333334 at step 400
CRITICAL:root:Accuracy on dev set: 0.72
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958684/checkpoints/model-500 at step 500
CRITICAL:root:Best accuracy is 0.72 at step 500
CRITICAL:root:Accuracy on dev set: 0.7333333333333333
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958684/checkpoints/m

*********************************
VALIDATION RESULTS FOR CYCLE: 2
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543958684/checkpoints/model-2000
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958684/checkpoints/model-2000


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958684/checkpoints/model-2000
CRITICAL:root:The accuracy is: 0.7923497267759563
CRITICAL:root:The prediction is complete
INFO:root:x_train: 657, x_dev: 74
INFO:root:y_train: 657, y_dev: 74



Confusion Matrix: 
 [[17  5  0  4  1]
 [ 2 23  1  5  0]
 [ 1  1 19  1  1]
 [ 0  2  0 67  2]
 [ 3  1  2  6 19]]

               precision    recall  f1-score   support

           0       0.74      0.63      0.68        27
           1       0.72      0.74      0.73        31
           2       0.86      0.83      0.84        23
           3       0.81      0.94      0.87        71
           4       0.83      0.61      0.70        31

   micro avg       0.79      0.79      0.79       183
   macro avg       0.79      0.75      0.77       183
weighted avg       0.79      0.79      0.79       183


Accuracy --> 0.7923497267759563
Tuesday, 04. December 2018 09:27PM
Tuesday, 04. December 2018 09:27PM
***************************
CROSS VALIDATION CYCLE: 3
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.6081081081081081
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958867/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.6081081081081081 at step 100
CRITICAL:root:Accuracy on dev set: 0.581081081081081
CRITICAL:root:Accuracy on dev set: 0.6891891891891891
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958867/checkpoints/model-300 at step 300
CRITICAL:root:Best accuracy is 0.6891891891891891 at step 300
CRITICAL:root:Accuracy on dev set: 0.581081081081081
CRITICAL:root:Accuracy on dev set: 0.6756756756756757
CRITICAL:root:Accuracy on dev set: 0.7567567567567568
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543958867/checkpoints/model-600 at step 600
CRITICAL:root:Best accuracy is 0.7567567567567568 at step 600
CRITICAL:root:Accuracy on dev set: 0.7432432432432432
CRITICAL:root:Accuracy on dev set: 0.7297297297297297
CRITICAL:root:Accuracy on dev set: 0.7297297297297297
CRITICAL:root:Accuracy on d

*********************************
VALIDATION RESULTS FOR CYCLE: 3
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543958867/checkpoints/model-2000
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958867/checkpoints/model-2000


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543958867/checkpoints/model-2000
CRITICAL:root:The accuracy is: 0.7759562841530054
CRITICAL:root:The prediction is complete
INFO:root:x_train: 658, x_dev: 73
INFO:root:y_train: 658, y_dev: 73



Confusion Matrix: 
 [[17  5  0  5  0]
 [ 2 21  0  6  2]
 [ 0  1 19  2  1]
 [ 0  2  0 66  3]
 [ 2  2  1  7 19]]

               precision    recall  f1-score   support

           0       0.81      0.63      0.71        27
           1       0.68      0.68      0.68        31
           2       0.95      0.83      0.88        23
           3       0.77      0.93      0.84        71
           4       0.76      0.61      0.68        31

   micro avg       0.78      0.78      0.78       183
   macro avg       0.79      0.74      0.76       183
weighted avg       0.78      0.78      0.77       183


Accuracy --> 0.7759562841530054
Tuesday, 04. December 2018 09:30PM
Tuesday, 04. December 2018 09:30PM
***************************
CROSS VALIDATION CYCLE: 4
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.6301369863013698
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959051/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.6301369863013698 at step 100
CRITICAL:root:Accuracy on dev set: 0.6301369863013698
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959051/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.6301369863013698 at step 200
CRITICAL:root:Accuracy on dev set: 0.6027397260273972
CRITICAL:root:Accuracy on dev set: 0.684931506849315
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959051/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.684931506849315 at step 400
CRITICAL:root:Accuracy on dev set: 0.6575342465753424
CRITICAL:root:Accuracy on dev set: 0.684931506849315
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959051/checkpoints/model-600 at step 600
CRITICAL:root:Best accuracy is 0.684931506849315 at step 600
CRITICAL:root:Accuracy on de

*********************************
VALIDATION RESULTS FOR CYCLE: 4
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959051/checkpoints/model-1200
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959051/checkpoints/model-1200


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959051/checkpoints/model-1200
CRITICAL:root:The accuracy is: 0.7704918032786885
CRITICAL:root:The prediction is complete
INFO:root:x_train: 658, x_dev: 73
INFO:root:y_train: 658, y_dev: 73



Confusion Matrix: 
 [[13  5  0  6  3]
 [ 1 22  0  7  1]
 [ 1  2 17  2  1]
 [ 0  2  0 69  0]
 [ 1  2  3  5 20]]

               precision    recall  f1-score   support

           0       0.81      0.48      0.60        27
           1       0.67      0.71      0.69        31
           2       0.85      0.74      0.79        23
           3       0.78      0.97      0.86        71
           4       0.80      0.65      0.71        31

   micro avg       0.77      0.77      0.77       183
   macro avg       0.78      0.71      0.73       183
weighted avg       0.78      0.77      0.76       183


Accuracy --> 0.7704918032786885
Tuesday, 04. December 2018 09:33PM
Tuesday, 04. December 2018 09:33PM
***************************
CROSS VALIDATION CYCLE: 5
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.589041095890411
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959236/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.589041095890411 at step 100
CRITICAL:root:Accuracy on dev set: 0.589041095890411
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959236/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.589041095890411 at step 200
CRITICAL:root:Accuracy on dev set: 0.6027397260273972
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959236/checkpoints/model-300 at step 300
CRITICAL:root:Best accuracy is 0.6027397260273972 at step 300
CRITICAL:root:Accuracy on dev set: 0.6438356164383562
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959236/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.6438356164383562 at step 400
CRITICAL:root:Accuracy on dev set: 0.6712328767123288
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959236/checkpoints/mod

*********************************
VALIDATION RESULTS FOR CYCLE: 5
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959236/checkpoints/model-2100
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959236/checkpoints/model-2100


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959236/checkpoints/model-2100
CRITICAL:root:The accuracy is: 0.7650273224043715
CRITICAL:root:The prediction is complete
INFO:root:x_train: 658, x_dev: 73
INFO:root:y_train: 658, y_dev: 73



Confusion Matrix: 
 [[17  6  1  3  0]
 [ 3 21  0  6  1]
 [ 1  1 16  1  4]
 [ 0  2  0 66  3]
 [ 1  2  1  7 20]]

               precision    recall  f1-score   support

           0       0.77      0.63      0.69        27
           1       0.66      0.68      0.67        31
           2       0.89      0.70      0.78        23
           3       0.80      0.93      0.86        71
           4       0.71      0.65      0.68        31

   micro avg       0.77      0.77      0.77       183
   macro avg       0.77      0.72      0.74       183
weighted avg       0.77      0.77      0.76       183


Accuracy --> 0.7650273224043715
Tuesday, 04. December 2018 09:37PM
Tuesday, 04. December 2018 09:37PM
***************************
CROSS VALIDATION CYCLE: 6
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.684931506849315
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959421/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.684931506849315 at step 100
CRITICAL:root:Accuracy on dev set: 0.684931506849315
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959421/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.684931506849315 at step 200
CRITICAL:root:Accuracy on dev set: 0.7534246575342466
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959421/checkpoints/model-300 at step 300
CRITICAL:root:Best accuracy is 0.7534246575342466 at step 300
CRITICAL:root:Accuracy on dev set: 0.7671232876712328
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959421/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.7671232876712328 at step 400
CRITICAL:root:Accuracy on dev set: 0.7671232876712328
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959421/checkpoints/mod

*********************************
VALIDATION RESULTS FOR CYCLE: 6
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959421/checkpoints/model-1700
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959421/checkpoints/model-1700


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959421/checkpoints/model-1700
CRITICAL:root:The accuracy is: 0.7923497267759563
CRITICAL:root:The prediction is complete
INFO:root:x_train: 659, x_dev: 72
INFO:root:y_train: 659, y_dev: 72



Confusion Matrix: 
 [[17  5  0  2  3]
 [ 2 22  0  4  3]
 [ 0  1 16  1  5]
 [ 0  0  0 68  3]
 [ 1  3  1  4 22]]

               precision    recall  f1-score   support

           0       0.85      0.63      0.72        27
           1       0.71      0.71      0.71        31
           2       0.94      0.70      0.80        23
           3       0.86      0.96      0.91        71
           4       0.61      0.71      0.66        31

   micro avg       0.79      0.79      0.79       183
   macro avg       0.79      0.74      0.76       183
weighted avg       0.80      0.79      0.79       183


Accuracy --> 0.7923497267759563
Tuesday, 04. December 2018 09:40PM
Tuesday, 04. December 2018 09:40PM
***************************
CROSS VALIDATION CYCLE: 7
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.6527777777777778
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959603/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.6527777777777778 at step 100
CRITICAL:root:Accuracy on dev set: 0.6666666666666666
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959603/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.6666666666666666 at step 200
CRITICAL:root:Accuracy on dev set: 0.6944444444444444
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959603/checkpoints/model-300 at step 300
CRITICAL:root:Best accuracy is 0.6944444444444444 at step 300
CRITICAL:root:Accuracy on dev set: 0.6805555555555556
CRITICAL:root:Accuracy on dev set: 0.7222222222222222
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959603/checkpoints/model-500 at step 500
CRITICAL:root:Best accuracy is 0.7222222222222222 at step 500
CRITICAL:root:Accuracy on dev set: 0.7361111111111112
CRITICAL:root:Saved mode

*********************************
VALIDATION RESULTS FOR CYCLE: 7
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959603/checkpoints/model-2100
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959603/checkpoints/model-2100


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959603/checkpoints/model-2100
CRITICAL:root:The accuracy is: 0.7431693989071039
CRITICAL:root:The prediction is complete
INFO:root:x_train: 659, x_dev: 72
INFO:root:y_train: 659, y_dev: 72



Confusion Matrix: 
 [[11  8  0  6  2]
 [ 2 24  0  4  1]
 [ 1  1 19  1  1]
 [ 0  4  0 63  4]
 [ 1  3  3  5 19]]

               precision    recall  f1-score   support

           0       0.73      0.41      0.52        27
           1       0.60      0.77      0.68        31
           2       0.86      0.83      0.84        23
           3       0.80      0.89      0.84        71
           4       0.70      0.61      0.66        31

   micro avg       0.74      0.74      0.74       183
   macro avg       0.74      0.70      0.71       183
weighted avg       0.75      0.74      0.73       183


Accuracy --> 0.7431693989071039
Tuesday, 04. December 2018 09:43PM
Tuesday, 04. December 2018 09:43PM
***************************
CROSS VALIDATION CYCLE: 8
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.6527777777777778
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959793/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.6527777777777778 at step 100
CRITICAL:root:Accuracy on dev set: 0.5972222222222222
CRITICAL:root:Accuracy on dev set: 0.625
CRITICAL:root:Accuracy on dev set: 0.6805555555555556
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959793/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.6805555555555556 at step 400
CRITICAL:root:Accuracy on dev set: 0.7361111111111112
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959793/checkpoints/model-500 at step 500
CRITICAL:root:Best accuracy is 0.7361111111111112 at step 500
CRITICAL:root:Accuracy on dev set: 0.8194444444444444
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959793/checkpoints/model-600 at step 600
CRITICAL:root:Best accuracy is 0.8194444444444444 at step 600
CRITICAL:root:Accuracy on dev set: 0.

*********************************
VALIDATION RESULTS FOR CYCLE: 8
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959793/checkpoints/model-2000
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959793/checkpoints/model-2000


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959793/checkpoints/model-2000
CRITICAL:root:The accuracy is: 0.7595628415300546
CRITICAL:root:The prediction is complete
INFO:root:x_train: 659, x_dev: 72
INFO:root:y_train: 659, y_dev: 72



Confusion Matrix: 
 [[13  5  0  6  3]
 [ 2 22  1  5  1]
 [ 1  1 20  1  0]
 [ 0  2  0 65  4]
 [ 4  1  2  5 19]]

               precision    recall  f1-score   support

           0       0.65      0.48      0.55        27
           1       0.71      0.71      0.71        31
           2       0.87      0.87      0.87        23
           3       0.79      0.92      0.85        71
           4       0.70      0.61      0.66        31

   micro avg       0.76      0.76      0.76       183
   macro avg       0.75      0.72      0.73       183
weighted avg       0.75      0.76      0.75       183


Accuracy --> 0.7595628415300546
Tuesday, 04. December 2018 09:46PM
Tuesday, 04. December 2018 09:46PM
***************************
CROSS VALIDATION CYCLE: 9
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  5

CRITICAL:root:Accuracy on dev set: 0.625
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959980/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.625 at step 100
CRITICAL:root:Accuracy on dev set: 0.6527777777777778
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959980/checkpoints/model-200 at step 200
CRITICAL:root:Best accuracy is 0.6527777777777778 at step 200
CRITICAL:root:Accuracy on dev set: 0.6111111111111112
CRITICAL:root:Accuracy on dev set: 0.6805555555555556
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959980/checkpoints/model-400 at step 400
CRITICAL:root:Best accuracy is 0.6805555555555556 at step 400
CRITICAL:root:Accuracy on dev set: 0.6805555555555556
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543959980/checkpoints/model-500 at step 500
CRITICAL:root:Best accuracy is 0.6805555555555556 at step 500
CRITICAL:root:Accuracy on dev set: 0.7083333333333334
CRITICAL:root:Saved model at /home/jovyan/trained_

*********************************
VALIDATION RESULTS FOR CYCLE: 9
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543959980/checkpoints/model-2100
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959980/checkpoints/model-2100


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543959980/checkpoints/model-2100
CRITICAL:root:The accuracy is: 0.7759562841530054
CRITICAL:root:The prediction is complete
INFO:root:x_train: 659, x_dev: 72
INFO:root:y_train: 659, y_dev: 72



Confusion Matrix: 
 [[15  7  0  3  2]
 [ 1 22  0  7  1]
 [ 0  1 19  1  2]
 [ 0  3  0 67  1]
 [ 2  3  2  5 19]]

               precision    recall  f1-score   support

           0       0.83      0.56      0.67        27
           1       0.61      0.71      0.66        31
           2       0.90      0.83      0.86        23
           3       0.81      0.94      0.87        71
           4       0.76      0.61      0.68        31

   micro avg       0.78      0.78      0.78       183
   macro avg       0.78      0.73      0.75       183
weighted avg       0.78      0.78      0.77       183


Accuracy --> 0.7759562841530054
Tuesday, 04. December 2018 09:49PM
Tuesday, 04. December 2018 09:49PM
***************************
CROSS VALIDATION CYCLE: 10
***************************
Train: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  

CRITICAL:root:Accuracy on dev set: 0.6944444444444444
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543960168/checkpoints/model-100 at step 100
CRITICAL:root:Best accuracy is 0.6944444444444444 at step 100
CRITICAL:root:Accuracy on dev set: 0.6388888888888888
CRITICAL:root:Accuracy on dev set: 0.7083333333333334
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543960168/checkpoints/model-300 at step 300
CRITICAL:root:Best accuracy is 0.7083333333333334 at step 300
CRITICAL:root:Accuracy on dev set: 0.6666666666666666
CRITICAL:root:Accuracy on dev set: 0.6944444444444444
CRITICAL:root:Accuracy on dev set: 0.7222222222222222
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543960168/checkpoints/model-600 at step 600
CRITICAL:root:Best accuracy is 0.7222222222222222 at step 600
CRITICAL:root:Accuracy on dev set: 0.75
CRITICAL:root:Saved model at /home/jovyan/trained_model_1543960168/checkpoints/model-700 at step 700
CRITICAL:root:Best accuracy is 0.75 at step 70

*********************************
VALIDATION RESULTS FOR CYCLE: 10
*********************************
LOOK FOR ME!
/home/jovyan/trained_model_1543960168/checkpoints/model-1500
INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543960168/checkpoints/model-1500


INFO:tensorflow:Restoring parameters from /home/jovyan/trained_model_1543960168/checkpoints/model-1500
CRITICAL:root:The accuracy is: 0.7486338797814208
CRITICAL:root:The prediction is complete



Confusion Matrix: 
 [[13  4  0  7  3]
 [ 2 21  0  7  1]
 [ 1  3 15  1  3]
 [ 0  1  0 67  3]
 [ 2  1  1  6 21]]

               precision    recall  f1-score   support

           0       0.72      0.48      0.58        27
           1       0.70      0.68      0.69        31
           2       0.94      0.65      0.77        23
           3       0.76      0.94      0.84        71
           4       0.68      0.68      0.68        31

   micro avg       0.75      0.75      0.75       183
   macro avg       0.76      0.69      0.71       183
weighted avg       0.75      0.75      0.74       183


Accuracy --> 0.7486338797814208
Tuesday, 04. December 2018 09:52PM


In [6]:
# Calculating Mean Average of model scores
p_list = []
r_list = []
f_list = []
a_list = []

for r in range(len(fold_averages_list)):
    p_list.append(fold_averages_list[r].get('Precision'))
    r_list.append(fold_averages_list[r].get('Recall'))
    f_list.append(fold_averages_list[r].get('Fscore'))
    a_list.append(fold_averages_list[r].get('Accuracy'))

def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)

print("\nModel Mean Precision: %f " % mean(p_list))
print("Model Mean Recall: %f " % mean(r_list))
print("Model Mean Fscore: %f " % mean(f_list))
print("Model Mean Accuracy: %f " % mean(a_list))


dt = datetime.datetime.now()
print("Finish time: ", dt.strftime("%A, %d. %B %Y %I:%M%p"))


Model Mean Precision: 0.773431 
Model Mean Recall: 0.768852 
Model Mean Fscore: 0.762604 
Model Mean Accuracy: 0.768852 
Finish time:  Tuesday, 04. December 2018 09:52PM
