In [1]:
from pathlib import Path
import os
import time

%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns

import keras
# Metrics
from sklearn.metrics import f1_score

# Notebook helper methods
from bella import notebook_helper
# Models
from bella.models.tdlstm import LSTM, TDLSTM, TCLSTM
# Tokenisers
from bella.tokenisers import ark_twokenize
# Word Vectors
from bella.word_vectors import SSWE, GloveTwitterVectors, GloveCommonCrawl
# Get the data
from bella.helper import read_config
from bella.parsers import dong, semeval_14

Using TensorFlow backend.


In [2]:
##
#  ADD YOUR CONFIG FILE PATH HERE 
##
CONFIG_FP = Path('..', 'config.yaml')

# Load the datasets
dong_train = dong(read_config('dong_twit_train_data', CONFIG_FP))
dong_test = dong(read_config('dong_twit_test_data', CONFIG_FP))
train_words = dong_train.word_list(ark_twokenize)
test_words = dong_test.word_list(ark_twokenize)
all_words = list(set(train_words + test_words))
# Load the word vectors
sswe = SSWE(filter_words=all_words)

In [3]:
lstm = TCLSTM(tokeniser=ark_twokenize, embeddings=sswe, reproducible=42, 
              optimiser=keras.optimizers.Adam, epochs=1)

In [4]:
X_train = np.array(dong_train.data_dict())
y_train = np.array(dong_train.sentiment_data())
X_val = np.array(dong_test.data_dict())
y_val = np.array(dong_test.sentiment_data())

In [5]:
sent_vals = {-1: 0, 0: 0, 1: 1}
y_val = np.array([sent_vals[val] for val in y_val])
y_train = np.array([sent_vals[val] for val in y_train])

In [6]:
lstm.fit(X_train, y_train, (X_val, y_val), verbose=1)

Number of classes in the data 2
Train on 6248 samples, validate on 692 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6872bb00>

In [7]:
lstm.fit(X_train, y_train, (X_val, y_val), verbose=1)

Number of classes in the data 2
Train on 6248 samples, validate on 692 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6858ed30>

In [8]:
lstm.fit(X_train, y_train, (X_val, y_val), verbose=1, continue_training=True)

Number of classes in the data 2
Train on 6248 samples, validate on 692 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e68287e10>

In [9]:
lstm.fit(X_train, y_train, (X_val, y_val), verbose=1, continue_training=True)

Number of classes in the data 2
Train on 6248 samples, validate on 692 samples
Epoch 1/1


<keras.callbacks.History at 0x7f1e6828f4a8>

In [7]:
lstm.model_parameters()

{'class': bella.models.tdlstm.TCLSTM,
 'class_attrs': {'left_test_pad_size': 38, 'right_test_pad_size': 37},
 'class_params': {'batch_size': 32,
  'dense_layer_kwargs': {},
  'embedding_layer_kwargs': {},
  'embeddings': sswe,
  'epochs': 1,
  'include_target': True,
  'lower': True,
  'lstm_layer_kwargs': {},
  'optimiser': <keras.optimizers.Adam at 0x7f71b13bb9b0>,
  'pad_size': -1,
  'patience': 10,
  'reproducible': 42,
  'tokeniser': <function bella.tokenisers.ark_twokenize>}}

In [8]:
from pathlib import Path
model_loc = Path('.', 'model.txt')
lstm.save(lstm, model_loc)

In [9]:
ano = lstm.load(model_loc)

In [10]:
ano.model_parameters()

{'class': bella.models.tdlstm.TCLSTM,
 'class_attrs': {'left_test_pad_size': 38, 'right_test_pad_size': 37},
 'class_params': {'batch_size': 32,
  'dense_layer_kwargs': {},
  'embedding_layer_kwargs': {},
  'embeddings': sswe,
  'epochs': 1,
  'include_target': True,
  'lower': True,
  'lstm_layer_kwargs': {},
  'optimiser': <keras.optimizers.Adam at 0x7f71b04fd828>,
  'pad_size': -1,
  'patience': 10,
  'reproducible': 42,
  'tokeniser': <function bella.tokenisers.ark_twokenize>}}

In [10]:
lstm.model_parameters()

{'class': bella.models.tdlstm.LSTM,
 'class_attrs': {'test_pad_size': 43},
 'class_params': {'batch_size': 32,
  'dense_layer_kwargs': {},
  'embedding_layer_kwargs': {},
  'embeddings': sswe,
  'epochs': 1,
  'lower': True,
  'lstm_layer_kwargs': {},
  'optimiser': <keras.optimizers.Adam at 0x7f8796d70240>,
  'pad_size': -1,
  'patience': 10,
  'reproducible': 42,
  'tokeniser': <function bella.tokenisers.ark_twokenize>}}

In [11]:
ano.predict(X_val)

array([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0,

In [12]:
lstm.predict(X_val)

array([0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 0,

In [13]:
lstm.fitted

True

In [14]:
ano.fitted

True

In [16]:
ano.fit

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_inbound_node',
 '_built',
 '_check_num_samples',
 '_check_trainable_weights_consistency',
 '_collected_trainable_weights',
 '_container_nodes',
 '_feed_input_names',
 '_feed_input_shapes',
 '_feed_inputs',
 '_feed_loss_fns',
 '_feed_output_names',
 '_feed_output_shapes',
 '_feed_outputs',
 '_feed_sample_weight_modes',
 '_feed_sample_weights',
 '_feed_targets',
 '_fit_loop',
 '_function_kwargs',
 '_get_deduped_metrics_names',
 '_get_node_attribute_at_index',
 '_inbound_nodes',
 '_internal_input_shapes',
 '_internal_output_shapes',
 '_make_predict_function',
 '_make_test_function',
 '_make_train_function'