In [None]:
# default_exp core

# SGNN

> Implementation of Self-Governing Neural Networks for speech act classification

Implementation of the [SGNN paper](https://www.aclweb.org/anthology/D19-1402.pdf) for speech act classification.
This repository is inspired by Guillaume Chevalier's [implementation](https://github.com/guillaume-chevalier/SGNN-Self-Governing-Neural-Networks-Projection-Layer), as well as his [discussion](https://github.com/guillaume-chevalier/SGNN-Self-Governing-Neural-Networks-Projection-Layer/issues/1) with [Sava Kalbachou](https://github.com/thinline).
This version implements some things differently from Guillaume's code, and extends beyond the projection layer all the way to a fully trainable network.

The network is trained to classify the [SwDA corpus](https://web.stanford.edu/~jurafsky/ws97/) utterances according to their speech act. The corpus was was pre-processed using Cristopher Pott's [project](https://github.com/cgpotts/swda/) related to it; the pre-processed data is included in [`data/swda-acttags-and-text.csv`](data/swda-acttags-and-text.csv) for repeateability.

In [None]:
#hide
from nbdev.showdoc import *

We first import the data from file

In [None]:
import pandas as pd
data_filepath = '/home/andres/repositories/SGNN/data/swda-acttags-and-text.csv'

data = pd.read_csv(data_filepath)

In [None]:
# See the data
data.describe

<bound method NDFrame.describe of        DamslActTag                                               Text
0               ad               So, uh, describe your family budget.
1               sd  Well, I've, uh, for a lot of years I, I've pre...
2               sd  and, uh, just recently, uh, we, we set up a bu...
3               sd              and, and we're trying to stick to it.
4               sd                        We just bought a new house.
...            ...                                                ...
221611           %                                           so, yeah
221612          sd                                               I --
221613           %                                         I don't, -
221614           +  -- pretty much plan on, I mean, so far I'm pla...
221615          ba                                   That's good, uh,

[221616 rows x 2 columns]>

Divide data in test and train sets

In [None]:
#export
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

def import_data(filepath):
    data = pd.read_csv(data_filepath)
    data = data.dropna(axis=0)  # Drop rows with NA values
    y = data.DamslActTag
    X = data.Text
    
    # Convert labels to categories
    le = LabelEncoder()
    y = le.fit_transform(y)

    return X, y

In [None]:
X, y = import_data(data_filepath)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

See the data division

In [None]:
# print(X_train.head(),"\n", y_train.head())

Convert the data labels to one-hot vectors

## Let's build SGNN the pipeline for processing the data

In [None]:
#export
import scipy.sparse as sp
import random as rand
from sklearn.base import BaseEstimator
import numpy as np
import scipy.sparse
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline, FeatureUnion
from nearpy.hashes import RandomBinaryProjections

class MyRBP(BaseEstimator, RandomBinaryProjections):
    rand_seed = None  # Declare it as class variable
    def __init__(self, hash_name='hasher', projection_count=1, rand_seed=None):
        RandomBinaryProjections.__init__(self, hash_name, projection_count, rand_seed=rand_seed)
        
    def fit(self, X, y):
        self.rand = np.random.RandomState(self.rand_seed)  # rand seed after param setting
        self.reset(X.shape[1])
        
    def transform(self, X):
        return self.hash_vector(X)

    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.transform(X)
    
    def hash_vector(self, v, querying=False):
        """
        Hashes the vector and returns the binary bucket key as string.
        """
        if scipy.sparse.issparse(v):
            # If vector is sparse, make sure we have the CSR representation
            # of the projection matrix
            if self.normals_csr == None:
                self.normals_csr = scipy.sparse.csr_matrix(self.normals)
            # Make sure that we are using CSR format for multiplication
            if not scipy.sparse.isspmatrix_csr(v):
                v = scipy.sparse.csr_matrix(v)
            # Project vector onto all hyperplane normals
            # projection = self.normals_csr.dot(v)
            projection = v.dot(scipy.sparse.csr_matrix.transpose(self.normals_csr))
        else:
            # Project vector onto all hyperplane normals
            projection = np.dot(v, np.matrix.transpose(self.normals))
        # Return binary key
        return projection > 0

In [None]:
#export

def build_preprocessor(T=80, d=14, char_ngram_range=(1, 4)):
    # T=80 projections for each of dimension d=14: 80 * 14 = 1120-dimensionnal word projections

    char_term_frequency_params = {
        'char_term_frequency__analyzer': 'char',
        'char_term_frequency__lowercase': True,
        'char_term_frequency__ngram_range': char_ngram_range,
        'char_term_frequency__strip_accents': None,
        'char_term_frequency__min_df': 2,
        'char_term_frequency__max_df': 0.99,
        'char_term_frequency__max_features': int(1e7),
    }

    rand_seeds = [rand.randint(0,T*100) for i in range(T)] # Need a different seed for each hasher

    hashing_feature_union_params = {
        **{'union__random_binary_projection_hasher_{}__projection_count'.format(t): d
           for t in range(T)
        },
        **{'union__random_binary_projection_hasher_{}__hash_name'.format(t): 'hasher' + str(t)
           for t in range(T)
        },
        **{'union__random_binary_projection_hasher_{}__rand_seed'.format(t): rand_seeds[t]  # only AFTER hashing.
           for t in range(T)
        }
    }

    preprocessor = Pipeline([
        ("char_term_frequency", CountVectorizer()),
        ('union', FeatureUnion([
            ('random_binary_projection_hasher_{}'.format(t), MyRBP())
            for t in range(T)
        ]))
    ])
    
    params = dict()
    params.update(char_term_frequency_params)
    params.update(hashing_feature_union_params)
    preprocessor.set_params(**params)
    return preprocessor

In [None]:
preprocessor = build_preprocessor()
preprocessor.fit_transform(["Esta es na pruebs","fadsf", "Oh no fasd", "fasdfaaaaaaaa" ])

<4x1120 sparse matrix of type '<class 'numpy.bool_'>'
	with 2260 stored elements in Compressed Sparse Row format>

In [None]:
#export
import tensorflow as tf

def build_keras_model(train_labels):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(256, activation=tf.nn.sigmoid, input_shape=(1120,)))
    model.add(tf.keras.layers.Dropout(0.25))
    model.add(tf.keras.layers.Dense(256, activation=tf.nn.sigmoid))
    model.add(tf.keras.layers.Dropout(0.25))
    model.add(tf.keras.layers.Dense(train_labels.shape[1], activation=tf.nn.softmax))

    # Cosine annealing decay
    lr_schedule = tf.keras.experimental.CosineDecay(0.025, decay_steps=1000000)
    # SGD optimizer with Nesterov momentum
    opt = tf.keras.optimizers.SGD(nesterov=True, learning_rate=lr_schedule)
    #opt = tf.keras.optimizers.SGD(nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    return model

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [None]:
#export
EPOCHS=50
BATCH_SIZE=100
data_filepath = "/home/andres/repositories/SGNN/data/swda-acttags-and-text.csv"

def main():
    X, y = import_data(data_filepath)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
    
    # Convert categories to one-hot-encodings, as apparently needed by keras
    train_labels = tf.keras.utils.to_categorical(y_train)
    test_labels = tf.keras.utils.to_categorical(y_test)
    
    preprocessor = build_preprocessor()
    keras_model = build_keras_model(train_labels)
    
    train_features = preprocessor.fit_transform(X_train)

    keras_model.fit(train_features, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE)
    test_features = preprocessor.transform(X_test)
    keras_model.evaluate(test_features, test_labels)

In [None]:
#export
main()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
