In [None]:
# default_exp core

# SGNN

> Implementation of Self-Governing Neural Networks for speech act classification

Implementation of the [SGNN paper](https://www.aclweb.org/anthology/D19-1402.pdf) for speech act classification.
This repository is inspired by Guillaume Chevalier's [implementation](https://github.com/guillaume-chevalier/SGNN-Self-Governing-Neural-Networks-Projection-Layer), as well as his [discussion](https://github.com/guillaume-chevalier/SGNN-Self-Governing-Neural-Networks-Projection-Layer/issues/1) with [Sava Kalbachou](https://github.com/thinline).
This version implements some things differently from Guillaume's code, and extends beyond the projection layer all the way to a fully trainable network.

The network is trained to classify the [SwDA corpus](https://web.stanford.edu/~jurafsky/ws97/) utterances according to their speech act. The corpus was was pre-processed using Cristopher Pott's [project](https://github.com/cgpotts/swda/) related to it; the pre-processed data is included in [`data/swda-acttags-and-text.csv`](data/swda-acttags-and-text.csv) for repeateability.

In [None]:
#hide
from nbdev.showdoc import *

We first import the data from file

In [None]:
# export
import pandas as pd

data_filepath = 'data/swda-acttags-and-text.csv'
data = pd.read_csv(data_filepath)

In [None]:
# See the data
data.describe

<bound method NDFrame.describe of        DamslActTag                                               Text
0               ad               So, uh, describe your family budget.
1               sd  Well, I've, uh, for a lot of years I, I've pre...
2               sd  and, uh, just recently, uh, we, we set up a bu...
3               sd              and, and we're trying to stick to it.
4               sd                        We just bought a new house.
...            ...                                                ...
221611           %                                           so, yeah
221612          sd                                               I --
221613           %                                         I don't, -
221614           +  -- pretty much plan on, I mean, so far I'm pla...
221615          ba                                   That's good, uh,

[221616 rows x 2 columns]>

Divide data in test and train sets

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

data = data.dropna(axis=0)  # Drop rows with NA values
y = data.DamslActTag
X = data.Text
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

See the data division

In [None]:
print(X_train.head(),"\n", y_train.head())

160273                                             You too.
164670                                                  Um.
145561                                                 yes.
160987                                             but, # -
92347     And I think that it's just is good for your se...
Name: Text, dtype: object 
 160273    fc
164670     %
145561    aa
160987     %
92347     sv
Name: DamslActTag, dtype: object


Convert the data labels to one-hot vectors

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

## Let's build SGNN the pipeline for processing the data

In [None]:
#export

# CountVectorizer Parameters
char_ngram_range = (1, 4)

char_term_frequency_params = {
    'char_term_frequency__analyzer': 'char',
    'char_term_frequency__lowercase': True,
    'char_term_frequency__ngram_range': char_ngram_range,
    'char_term_frequency__strip_accents': None,
    'char_term_frequency__min_df': 2,
    'char_term_frequency__max_df': 0.99,
    'char_term_frequency__max_features': int(1e7),
}

In [None]:
#export 
import scipy.sparse as sp
import random as rand

T = 80
d = 14
# T=80 projections for each of dimension d=14: 80 * 14 = 1120-dimensionnal word projections
rand_seeds = [rand.randint(0,T*100) for i in range(T)] # Need a different seed for each hasher

hashing_feature_union_params = {
    **{'union__random_binary_projection_hasher_{}__projection_count'.format(t): d
       for t in range(T)
    },
    **{'union__random_binary_projection_hasher_{}__hash_name'.format(t): 'hasher' + str(t)
       for t in range(T)
    },
    **{'union__random_binary_projection_hasher_{}__rand_seed'.format(t): rand_seeds[t]  # only AFTER hashing.
       for t in range(T)
    }
}

In [None]:
from nearpy.hashes import RandomBinaryProjections
from sklearn.base import BaseEstimator
import numpy as np
d = 14
class MyRBP(BaseEstimator, RandomBinaryProjections):
    rand_seed = None  # Declare it as class variable
    def __init__(self, hash_name='hasher', projection_count=1, rand_seed=None):
        RandomBinaryProjections.__init__(self, hash_name, projection_count, rand_seed=rand_seed)
        
    def fit(self, X, y):
        self.rand = np.random.RandomState(self.rand_seed)  # rand seed after param setting
        self.reset(X.shape[0])
        
    def transform(self, X):
        digits = self.hash_vector(X)[0]
        return [int(digit) for digit in digits]

    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.transform(X)

In [None]:
params = dict()
params.update(hashing_feature_union_params)
union_pipe = Pipeline([
    ('union', FeatureUnion([
        ('random_binary_projection_hasher_{}'.format(t), MyRBP())
        for t in range(T)
    ]))
])
union_pipe.set_params(**params)

Pipeline(steps=[('union',
                 FeatureUnion(transformer_list=[('random_binary_projection_hasher_0',
                                                 MyRBP(hash_name='hasher0',
                                                       projection_count=14,
                                                       rand_seed=3450)),
                                                ('random_binary_projection_hasher_1',
                                                 MyRBP(hash_name='hasher1',
                                                       projection_count=14,
                                                       rand_seed=5161)),
                                                ('random_binary_projection_hasher_2',
                                                 MyRBP(hash_name='hasher2',
                                                       projection_count=14,
                                                       rand_seed=2...
                                            

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline, FeatureUnion
from nearpy.hashes import RandomBinaryProjections

params = dict()
params.update(char_term_frequency_params)
params.update(hashing_feature_union_params)

preprocessor = Pipeline([
    ("char_term_frequency", CountVectorizer()),
    ('union', FeatureUnion([
        ('random_binary_projection_hasher_{}'.format(t), MyRBP())
        for t in range(T)
    ]))
])
preprocessor.set_params(**params)

Pipeline(steps=[('char_term_frequency',
                 CountVectorizer(analyzer='char', max_df=0.99,
                                 max_features=10000000, min_df=2,
                                 ngram_range=(1, 4))),
                ('union',
                 FeatureUnion(transformer_list=[('random_binary_projection_hasher_0',
                                                 MyRBP(hash_name='hasher0',
                                                       projection_count=14,
                                                       rand_seed=3450)),
                                                ('random_binary_projection_hasher_1',
                                                 MyRBP(hash_name='hasher1',
                                                       projection_co...
                                                       projection_count=14,
                                                       rand_seed=3194)),
                                                ('random

In [None]:
import tensorflow as tf
# Convert categories to one-hot-encodings, as apparently needed by keras
train_labels = tf.keras.utils.to_categorical(y_train)
test_labels = tf.keras.utils.to_categorical(y_test)

train_features = preprocessor.fit_transform(X_train)
train_features[0]

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all().

In [None]:
train_labels.shape

(177288, 43)

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(256, activation=tf.nn.sigmoid, input_shape=(1120,)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Dense(256, activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Dense(train_labels.shape[1], activation=tf.nn.softmax))

# Cosine annealing decay
lr_schedule = tf.keras.experimental.CosineDecay(0.025, decay_steps=100000)
# SGD optimizer with Nesterov momentum
opt = tf.keras.optimizers.SGD(nesterov=True, learning_rate=lr_schedule)
#opt = tf.keras.optimizers.SGD(nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.summary()

In [None]:
EPOCHS=500
BATCH_SIZE=100

model.fit(train_features, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE)

In [None]:
test_features = preprocessor.transform(X_test)
model.evaluate(test_features, test_labels)