In [1]:
#
# Import Dataset
#
import os
from sklearn.datasets import load_files

DATA_SETS_PATH = 'data/data_sets'
DATA_SET_NAME = 'bbcsport'
CATEGORIES = ['athletics', 'cricket', 'football', 'rugby', 'tennis']

container_path = os.path.join(DATA_SETS_PATH, DATA_SET_NAME)
bunch = load_files(container_path=container_path, description=DATA_SET_NAME, categories=CATEGORIES, decode_error='ignore', encoding='utf-8')

In [2]:
# Helper Methods
#

import numpy as np

def get_dataset_info_string(dataset):
    """Ridiculous way to print out dataset info"""
    
    # count number of samples/documents in dataset
    num_docs = lambda i: list(zip(*np.unique(dataset.target, return_counts=True)))[i][1]
    
    # ordering of output
    display_column_order = ['Target', 'Target Name', 'Documents']
    
    # uses target as index
    column_param_funcs = {
        'Target' : lambda i: i,
        'Target Name' : lambda i: dataset.target_names[i],
        'Documents' : lambda i: num_docs(i)
    }
    
    column_names = list(column_param_funcs.keys())
    column_headers_dict = {column_name:column_name for column_name in column_names}
    column_values = zip(*[[v(i) for v in column_param_funcs.values()] for i in range(len(dataset.target_names))])

    # useful dictionaries 
    info_dict = [{k:v(i) for k,v in column_param_funcs.items()} for i in range(len(dataset.target_names))]
    merged_values_by_column = dict(zip(column_names, column_values))    
    
    # get maximum length string for each column name in dataset
    get_max_str_len = lambda list: max([len(str(i)) for i in list])
    max_header_len = {k: max(len(k),get_max_str_len(v)) for k,v in merged_values_by_column.items()}
    ordered_max_header_len = [(column_name, max_header_len[column_name]) for column_name in display_column_order] 
    
    # format output
    template = '|'.join(["{%s:%d}" % (column_name, max_len) for column_name, max_len in ordered_max_header_len])
    
    # create header
    header = template.format(**column_headers_dict)
    bar = '-'*(sum([o[1] for o in ordered_max_header_len]) + len(ordered_max_header_len))

    # add category info to display string
    description = dataset.DESCR
    if dataset.DESCR is None:
        description = "None"
    data_set_info_string = 'Dataset Description: \n' + dataset.DESCR + '\n' + bar + '\n' + header + '\n' + bar + '\n'
    for rec in info_dict: 
          data_set_info_string += template.format(**rec) + '\n'
    data_set_info_string += bar
    
    # add total number of documents to string
    total_documents = dataset.target.shape[0]
    data_set_info_string += "\nTotal Documents:\t" + str(total_documents)

            
    return data_set_info_string    

In [3]:
import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import seaborn as sns

#
# Preproses Text, Convert Documents to Sequences, and One-Hot Encode Targets
#

# tokenize, build vocab and generate document sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(bunch.data)

sequences = tokenizer.texts_to_sequences(bunch.data)
word_index = tokenizer.word_index
VOCAB_SIZE = len(word_index)

y_ohe = keras.utils.to_categorical(bunch.target)

#
# display dataset info
#

print(get_dataset_info_string(bunch))
print("Vocabulary Size:\t" + str(VOCAB_SIZE))

#
# padding and clipping documents for reccurent network input
#

print("\nPad and limit inputs to RNN... \n")

# pad and clip
MAX_ART_LEN = 500
X = pad_sequences(sequences, maxlen=MAX_ART_LEN)

# display document lengths and stats
document_lengths = np.array([len(d) for d in sequences])
MAX_DOCUEMNT_LENGTH, mean_length, std_length = max(document_lengths), np.mean(document_lengths), np.std(document_lengths)

print("Maximum Document Length: %s (words)" % MAX_DOCUEMNT_LENGTH)
print("Mean Document Length: %s (words)" % mean_length)
print("STD Document Lengths: %s " % std_length)
sns.distplot(list(document_lengths), kde=False, rug=True);

# MAX_ART_LEN parameter effects
docs_covered = document_lengths[ np.where( document_lengths <= MAX_ART_LEN ) ]
percent_docs_covered = len(docs_covered)/len(document_lengths)
stds_covered = (MAX_ART_LEN - mean_length)/std_length

print("\nMaximum Document Length Allowed: %s (words)" % MAX_ART_LEN)
print("Chosen Document Length Parameter Effects...\n")
print("Percentage of Documents Within Limit: %s" % (percent_docs_covered*100))
print("STD's Covered: %s" % stds_covered)

# display final specs
print("\nEncoded Dataset Dimensions:\n")
print('Shape of Data Tensor:', X.shape)
print('Shape of Label Tensor:', y_ohe.shape)


Using TensorFlow backend.
Using TensorFlow backend.


Dataset Description: 
bbcsport
-----------------------------
Target|Target Name|Documents
-----------------------------
     0|athletics  |      101
     1|cricket    |      124
     2|football   |      265
     3|rugby      |      147
     4|tennis     |      100
-----------------------------
Total Documents:	737
Vocabulary Size:	14224

Pad and limit inputs to RNN... 

Maximum Document Length: 1724 (words)
Mean Document Length: 346.472184531886 (words)
STD Document Lengths: 190.3401998942172 

Maximum Document Length Allowed: 500 (words)
Chosen Document Length Parameter Effects...

Percentage of Documents Within Limit: 82.4966078697422
STD's Covered: 0.8065969015133854

Encoded Dataset Dimensions:

Shape of Data Tensor: (737, 500)
Shape of Label Tensor: (737, 5)


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


Dataset Description: 
bbcsport
-----------------------------
Target|Target Name|Documents
-----------------------------
     0|athletics  |      101
     1|cricket    |      124
     2|football   |      265
     3|rugby      |      147
     4|tennis     |      100
-----------------------------
Total Documents:	737
Vocabulary Size:	14224

Pad and limit inputs to RNN... 

Maximum Document Length: 1724 (words)
Mean Document Length: 346.472184531886 (words)
STD Document Lengths: 190.3401998942172 

Maximum Document Length Allowed: 500 (words)
Chosen Document Length Parameter Effects...

Percentage of Documents Within Limit: 82.4966078697422
STD's Covered: 0.8065969015133854

Encoded Dataset Dimensions:

Shape of Data Tensor: (737, 500)
Shape of Label Tensor: (737, 5)


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


In [4]:
#
# Seperate Train and Test Data
#

from sklearn.model_selection import train_test_split

TEST_2_SET_RATIO = 0.2
DOUGLAS_ADAMS = 42
NUM_CLASSES = len(bunch.target_names)

# Split it into train / test subsets
X_train, X_test, y_train_ohe, y_test_ohe = train_test_split(X, y_ohe, test_size=TEST_2_SET_RATIO,
                                                            stratify=bunch.target, 
                                                            random_state=DOUGLAS_ADAMS)
#
# display dataset train / test split info
#

print(get_dataset_info_string(bunch))

print("\nEncoded Dataset Dimensions:\n")
print('Shape of Data Tensor:', X.shape)
print('Shape of Label Tensor:', y_ohe.shape)

print("\nTrain Test Split: ", TEST_2_SET_RATIO)

print("\nTrain Dataset Dimensions:\n")
print('Shape of Data Tensor:', X_train.shape)
print('Shape of Label Tensor:', y_train_ohe.shape)

print("\nTest Dataset Dimensions:\n")
print('Shape of Data Tensor:', X_test.shape)
print('Shape of Label Tensor:', y_test_ohe.shape)

Dataset Description: 
bbcsport
-----------------------------
Target|Target Name|Documents
-----------------------------
     0|athletics  |      101
     1|cricket    |      124
     2|football   |      265
     3|rugby      |      147
     4|tennis     |      100
-----------------------------
Total Documents:	737

Encoded Dataset Dimensions:

Shape of Data Tensor: (737, 500)
Shape of Label Tensor: (737, 5)

Train Test Split:  0.2

Train Dataset Dimensions:

Shape of Data Tensor: (589, 500)
Shape of Label Tensor: (589, 5)

Test Dataset Dimensions:

Shape of Data Tensor: (148, 500)
Shape of Label Tensor: (148, 5)
Dataset Description: 
bbcsport
-----------------------------
Target|Target Name|Documents
-----------------------------
     0|athletics  |      101
     1|cricket    |      124
     2|football   |      265
     3|rugby      |      147
     4|tennis     |      100
-----------------------------
Total Documents:	737

Encoded Dataset Dimensions:

Shape of Data Tensor: (737, 500)
S

In [5]:
#
# Helper Methods
#

#
# create glove embedding matrix from gloves pretrained models
#

def format_glove_embedding_matrix(dimensions, word_index):
    """ 
        returns embedding_matrix corresponding to word_index columns
        
        embdedding_index 
            format: {key : word, value : word vector}
            
        Note: unfound words in word_index will be zero vectors
    """
    
    # the embedding dimensions should match the file you load glove from
    assert dimensions in [50, 100, 200, 300]
    
    GLOVE_EMBEDDINGS_FILE_TEMPLATE = 'data/glove/glove.6B.%sd.txt'
    glove_file = GLOVE_EMBEDDINGS_FILE_TEMPLATE % dimensions
    
    #
    # create embeddings index
    #
    
    # format: {key : word, value : word vector} 
    embeddings_index = {}
    
    # load glove embeddings file and fill index
    f = open(glove_file)
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    #
    # build embedding matrix coressponding to given word_index
    #    Note: words not found in embedding index will be all-zeros.
    
    embedding_matrix = np.zeros((len(word_index) + 1, dimensions))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

    # return index and matrix
    return embedding_matrix
    

In [6]:
#
#
#
#
#              Grid Search / Hyper Parameter Tuning 
#
#                   selecting model parameters 
#
#

#
#
#           Test/Search Settings -- apply to each model tested
#
#

# - Single Dense Layer Variable Activation
# - Shared Glove Embeddings

DATA_SETS_PATH = 'data/data_sets'
DATA_SET_NAME = 'bbcsport'
CATEGORIES = ['athletics', 'cricket', 'football', 'rugby', 'tennis']

MAXIMUM_ARTICLE_LENGTH = 500
EMBEDDING_DIMENSIONS = 50 # Possible: 50, 100, 200, 300

TEST_SIZE = 0.2
RANDOM_SEED = 42

VERBOSE = 2
EPOCHS = 1
BATCH_SIZE = 500

#
# Grid Settings
#

GRID_RNN_TYPE = ['GRU'] #, 'LSTM'] # Possible : 'GRU', 'LSTM'
GRID_STATE_SIZE = [10] #[5*i for i in range(20)]
GRID_DROPOUT = [0.1] #np.linspace(0, 0.8, 8)
GRID_RECURRENT_DROPOUT = [0.1] #np.linspace(0, 0.8, 8)
GRID_ACTIVATION = ['sigmoid']
GRID_LOSS = ['categorical_crossentropy']
GRID_OPTIMIZER = ['rmsprop']
GRID_METRICS = [['accuracy']]


#
# Import Dataset
#

import os
from sklearn.datasets import load_files

container_path = os.path.join(DATA_SETS_PATH, DATA_SET_NAME)
bunch = load_files(container_path=container_path, description=DATA_SET_NAME, categories=CATEGORIES, decode_error='ignore', encoding='utf-8')

#
# Build Tokenizer and Embeddings Matrix
#   Convert Documents to Sequences, and One-Hot Encode Targets
# 

import keras
from keras.preprocessing.text import Tokenizer

# build tokenizer
tokenizer_master = Tokenizer()
tokenizer_master.fit_on_texts(bunch.data)

# build embeddings matrix
embedding_matrix_master = format_glove_embedding_matrix(EMBEDDING_DIMENSIONS, tokenizer_master.word_index)

#
# Convert Documents to Sequences, and One-Hot Encode Targets
#

from keras.preprocessing.sequence import pad_sequences

# tokenize, build vocab and generate document sequences
sequences = tokenizer.texts_to_sequences(bunch.data)

# pad and clip
X = pad_sequences(sequences, maxlen=MAXIMUM_ARTICLE_LENGTH)
y_ohe = keras.utils.to_categorical(bunch.target)

#
# Split dataset it into train / test subsets
#

from sklearn.model_selection import train_test_split

# train / test data
X_train, X_test, y_train_ohe, y_test_ohe = train_test_split(X, y_ohe, test_size=TEST_SIZE,
                                                            stratify=bunch.target, 
                                                            random_state=RANDOM_SEED)




In [7]:
import inspect
from sklearn.base import BaseEstimator, ClassifierMixin

class RNNTextClassifier(BaseEstimator, ClassifierMixin):  
    """RNN Text classifier"""

    def __init__(self, rnn_type='GRU', num_outputs=None, state_size=50, dropout=0, recurrent_dropout=0,
            activation='sigmoid', loss='categorical_crossentropy', optimizer='rmsprop',
               metrics=['accuracy'], max_input_len=500, embeddings_matrix=None, tokenizer=None):
  
        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")

        for arg, val in values.items():
            #print("{} = {}".format(arg,val))
            setattr(self, arg, val)


    def fit(self, X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=100, batch_size=1, verbose=2):

        # build model
        self.model = Sequential()
        
        # build embedding layer
        self.embedding_size = self.embeddings_matrix.shape[1]
        embedding_layer = Embedding(len(self.tokenizer.word_index) + 1, self.embedding_size,
                            weights=[self.embeddings_matrix], input_length=self.max_input_len,
                            trainable=False)
        self.model.add(embedding_layer)
        
        # build recurent layer
        if self.rnn_type == 'LSTM':
            self.model.add(LSTM(self.embedding_size, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout ))
        else:
            self.model.add(GRU(self.embedding_size, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout ))
        
        # build single dense layer
        self.model.add(Dense(self.num_outputs, activation=self.activation))
        
        # compile model
        self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics)
        
        # fit model
        self.model.fit(X_train, y_train_ohe, epochs=epochs, batch_size=batch_size, validation_data=validation_data, verbose=verbose )
               #callbacks=callbacks, verbose=verbose )

        return self

    def predict(self, X, y=None):
        return self.model.predict(X)

    def score(self, X, y, batch_size=None, verbose=1):
        return self.model.evaluate(X, y,batch_size=batch_size, verbose=verbose)
        
    def summary(self):
        print(self.model.summary())
        
    def get_params(self,deep=False):
        # fix this
        return vars(self)
    



In [8]:
#
# Simple RNN class for building and saving models
#

#
# tried making a custom estimator but ran into issues.
# this is used for easy saving and loading of models
#

import inspect
#import json

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU, SimpleRNN, Embedding
#from keras.callbacks import ModelCheckpoint

class text_RNN:
    
    def __init__(self, rnn_type='GRU', num_outputs=None, \
                 state_size=50, dropout=0, recurrent_dropout=0, \
                 activation='sigmoid', loss='categorical_crossentropy', \
                 optimizer='rmsprop', metrics=['accuracy']):

        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")

        for arg, val in values.items():
            #print("{} = {}".format(arg,val))
            setattr(self, arg, val)
            
        # enforce a few settings
        assert self.rnn_type in ['LSTM', 'GRU']
        
    def compile_model(self, num_outputs=None, max_input_len=None, embeddings_matrix=None, tokenizer=None):
        
        assert num_outputs is not None
        assert max_input_len is not None
        assert embeddings_matrix is not None
        assert tokenizer is not None
        
        self.num_outputs = num_outputs
        self.max_input_len = max_input_len
        self.embeddings_matrix = embeddings_matrix
        self.tokenizer = tokenizer
        
        # build model
        self.model = Sequential()
        
        # build embedding layer
        self.embedding_size = self.embeddings_matrix.shape[1]
        embedding_layer = Embedding(len(self.tokenizer.word_index) + 1, self.embedding_size,
                            weights=[self.embeddings_matrix], input_length=self.max_input_len,
                            trainable=False)
        self.model.add(embedding_layer)
        
        # build recurent layer
        if self.rnn_type == 'LSTM':
            self.model.add(LSTM(self.embedding_size, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout ))
        else:
            self.model.add(GRU(self.embedding_size, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout ))
        
        # build single dense layer
        self.model.add(Dense(self.num_outputs, activation=self.activation))
        
        # compile model
        self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics)

    # ah the worst
    def get_model(self):
        return self.model
    
#
# Ugly wrapper method for KerasClassifier, GridSearchCV and cross_val_score
# tried making a custom estimator but ran into issues 
#

#
# build, compile model and return model
#

def create_model(rnn_type=None, num_outputs=None, \
                 state_size=None, dropout=None, recurrent_dropout=None, \
                 activation=None, loss=None, \
                 optimizer=None, metrics=None, \
                 max_input_len=None, embeddings_matrix=None, tokenizer=None):

    rnn_co = text_RNN(rnn_type=rnn_type, num_outputs=num_outputs, \
                 state_size=state_size, dropout=dropout, recurrent_dropout=dropout, \
                 activation=activation, loss=loss, \
                 optimizer=optimizer, metrics=metrics)
    
    rnn_co.compile_model(num_outputs=len(bunch.target_names), max_input_len=max_input_len, embeddings_matrix=embeddings_matrix, tokenizer=tokenizer)

    return rnn_co.get_model()

In [9]:
#
# Grid Search / Hyper Parameter Tuning and Cross Validation
#

import numpy as np
from sklearn.model_selection import GridSearchCV
#from sklearn.model_selection import cross_val_score, GridSearchCV
#from keras.utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier

#
# Create Estimator
#

# Keras Classifier Wrapper
model = KerasClassifier(build_fn=create_model,
                        epochs=EPOCHS, 
                        batch_size=BATCH_SIZE,
                        verbose=VERBOSE )
#
# Setup Grid
#

param_grid = {
    
    # const
    'max_input_len' : [MAXIMUM_ARTICLE_LENGTH],
    'num_outputs' : [len(bunch.target_names)],
    'embeddings_matrix': [embedding_matrix_master],
    'tokenizer' : [tokenizer_master],
    
    # variable
    'rnn_type' : GRID_RNN_TYPE,
    'state_size' : GRID_STATE_SIZE, 
    'dropout' : GRID_DROPOUT, 
    'recurrent_dropout' : GRID_RECURRENT_DROPOUT,
    'activation' : GRID_ACTIVATION,
    'loss' : GRID_LOSS,
    'optimizer' : GRID_OPTIMIZER,
    'metrics' : GRID_METRICS
}

#
# Perform Grid Search
#

grid = GridSearchCV(model,param_grid=param_grid, refit='precision_macro') #return_train_score=True,
grid_results = grid.fit(X_train,y_train_ohe) 


# summarize results
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


Epoch 1/1
Epoch 1/1
 - 1s - loss: 1.6442 - acc: 0.1658
 - 1s - loss: 1.6442 - acc: 0.1658
Epoch 1/1
Epoch 1/1
 - 1s - loss: 1.6278 - acc: 0.1908
 - 1s - loss: 1.6278 - acc: 0.1908
Epoch 1/1
Epoch 1/1
 - 1s - loss: 1.6208 - acc: 0.2061
 - 1s - loss: 1.6208 - acc: 0.2061
Epoch 1/1
Epoch 1/1
 - 2s - loss: 1.6479 - acc: 0.1749
 - 2s - loss: 1.6479 - acc: 0.1749
Best: 0.293718 using {'metrics': ['accuracy'], 'rnn_type': 'GRU', 'state_size': 10, 'embeddings_matrix': array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.41800001,  0.24968   , -0.41242   , ..., -0.18411   ,
        -0.11514   , -0.78580999],
       [ 0.68046999, -0.039263  ,  0.30186   , ..., -0.073297  ,
        -0.064699  , -0.26043999],
       ...,
       [-0.024146  , -0.1061    , -0.43426999, ...,  0.78873998,
        -0.33500001,  1.18540001],
       [-0.24098   , -0.39517   ,  0.0020529 , ...,  0.79488999,
         0.21684   ,  0.23828   ],
       [-0.16064   , -1

'\n#results = cross_val_score(model, X_train,y_train_ohe)\n'

'\n#results = cross_val_score(model, X_train,y_train_ohe)\n'

In [11]:
from matplotlib import pyplot

def plot_training_history(history):
    # [TODO]: make not shitty
    #print(history.history.keys())

    pyplot.plot(history.history['acc'], label='accuracy')
    pyplot.plot(history.history['loss'], label='categorical_crossentropy')

    pyplot.legend()
    pyplot.show()

history = grid_results.best_estimator_.model.model.history.history
plot_training_history(history)



AttributeError: 'dict' object has no attribute 'history'



AttributeError: 'dict' object has no attribute 'history'

In [None]:


print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))


means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']


for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
    
    

In [None]:
def plot_history(history):
    
    
    loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' not in s]
    val_loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' in s]
    acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' not in s]
    val_acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' in s]
    
    if len(loss_list) == 0:
        print('Loss is missing in history')
        return 
    
    ## As loss always exists
    epochs = range(1,len(history.history[loss_list[0]]) + 1)
    
    ## Loss
    plt.figure(1)
    for l in loss_list:
        plt.plot(epochs, history.history[l], 'b', label='Training loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))
    for l in val_loss_list:
        plt.plot(epochs, history.history[l], 'g', label='Validation loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))
    
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    ## Accuracy
    plt.figure(2)
    for l in acc_list:
        plt.plot(epochs, history.history[l], 'b', label='Training accuracy (' + str(format(history.history[l][-1],'.5f'))+')')
    for l in val_acc_list:    
        plt.plot(epochs, history.history[l], 'g', label='Validation accuracy (' + str(format(history.history[l][-1],'.5f'))+')')

    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [None]:
"""
# summarize results
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

"""

In [None]:
from sklearn.model_selection import cross_val_score,
"""
#results = cross_val_score(model, X_train,y_train_ohe)
"""