### TTIC 31190
### HW2
#### Yingzi Jin

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.initializers import RandomUniform
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping

In [9]:
from sklearn.preprocessing import LabelEncoder

In [4]:
import numpy as np
import random

#### 1.1 

In [352]:
def load_data(filename):
    """
    Load tweet data from a file, returning tokens and POS tags.

    Inputs:
        filename (str): The name of the file to be read.

    Returns:
        - tokens (list of list of str): Tokens from the tweets.
        - pos_tags (list of list of str): Corresponding POS tags.
    """
    with open(filename, 'r') as file:
        tweets = file.read().strip().split('\n\n')

    tokens = []
    pos_tags = []

    for tweet in tweets:
        tk_lst, pos_lst = zip(*[item.split("\t") for item in tweet.split('\n')])
        tokens.append(list(tk_lst))
        pos_tags.append(list(pos_lst))
    

    return tokens, pos_tags

In [353]:
def create_tokenizer(tokens):
    """
    Create a tokenizer, fit it on a list of tokens

    Inputs:
        tokens (list of list of str): A list of tokenized text sequences.

    Returns:
        Tokenizer: A tokenizer object fit on the given tokens
    """
    tokenizer = Tokenizer(oov_token="UNK")
    tokenizer.fit_on_texts(tokens)
    
    max_index = max(tokenizer.word_index.values())
    tokenizer.word_index["<s>"] = max_index + 1
    tokenizer.word_index["</s>"] = max_index + 2

    return tokenizer 

In [354]:
def create_context_windows(X, w, tokenizer):
    """
    Create context windows for each token in the given sentences

    Inputs:
        X (list of list of int): A list of sentences
        w (int): The window size
        tokenizer (Tokenizer): A tokenizer object 

    Returns:
        A list of context windows, each represented as a list of token indices.
    """
    context_windows = []

    for sentence in X:
        n = len(sentence)
        extended_sentence = [tokenizer.word_index['<s>']] * w + sentence + [tokenizer.word_index['</s>']] * w

        for i in range(w, len(extended_sentence) - w):
            
            window = extended_sentence[i-w : i+w+1]
            context_windows.append(window)

    
    return context_windows

In [355]:
def preprocess_x(tokens, w, tokenizer):
    """
    Convert a list of tokenized sentences into numerical format and create 
        context windows for each token.

    Inputs:
        tokens (list of list of str): A list of tokenized text sequences.
        w (int): The window size
        tokenizer (Tokenizer): A tokenizer object 

    Returns:
        A NumPy array containing the context windows for each token 
    """
    X = tokenizer.texts_to_sequences(tokens)
    X_padded = create_context_windows(X, w, tokenizer)

    return np.array(X_padded)

In [356]:
def create_labelencoder(pos_tags):
    """
    Encode a list of POS tags into numerical labels and return the encoder and 
        encoded labels.

    Inputs:
        pos_tags (list of list of str): A list of lists where each inner list 
            contains POS tags of a sentence

    Returns:
        - label_encoder (LabelEncoder): The LabelEncoder object 
        - y_train (array): The encoded POS tags as a 1D NumPy array
    """

    all_tags = [tag for tags in pos_tags for tag in tags]
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(all_tags)

    return label_encoder, y_train

In [357]:
def preprocess_y(pos_tags, label_encoder):
    """
    Encode a list of POS tags into numerical labels using a given label encoder.

    Inputs:
        pos_tags (list of list of str): A list of lists where each inner list 
            contains POS tags of a sentence.
    label_encoder (LabelEncoder): The LabelEncoder object

    Returns:
       The encoded POS tags as a 1D NumPy array
    """
    all_tags = [tag for tags in pos_tags for tag in tags]
    y = label_encoder.transform(all_tags)
    
    return y

Training data

In [12]:
filename = 'twpos-data/twpos-train.tsv'
tokens, pos_tags = load_data(filename)

In [14]:
tokenizer = create_tokenizer(tokens)

In [15]:
X_0 = preprocess_x(tokens, 0, tokenizer)

In [16]:
X_1 = preprocess_x(tokens, 1, tokenizer)

In [17]:
label_encoder, y = create_labelencoder(pos_tags)

DEV data

In [18]:
filename = 'twpos-data/twpos-dev.tsv'
tokens_dev, pos_tags_dev = load_data(filename)

In [19]:
X_dev_0 = preprocess_x(tokens_dev, 0, tokenizer)

In [20]:
X_dev_1 = preprocess_x(tokens_dev, 1, tokenizer)

In [21]:
y_dev = preprocess_y(pos_tags_dev, label_encoder)

DEVTEST data

In [22]:
filename = 'twpos-data/twpos-devtest.tsv'
tokens_devtest, pos_tags_devtest = load_data(filename)

In [23]:
X_devtest_0 = preprocess_x(tokens_devtest, 0, tokenizer)

In [24]:
X_devtest_1 = preprocess_x(tokens_devtest, 1, tokenizer)

In [25]:
y_devtest = preprocess_y(pos_tags_devtest, label_encoder)

In [159]:
dev_data_0 = (X_dev_0, y_dev)
dev_data_1 = (X_dev_1, y_dev)

Model

In [214]:
def FFNN(tokenizer, label_encoder, w, seed=0, embedding_matrix=None, random_min=-0.01, 
         random_max=0.01, embedding_dim=50, features=None, trainable=True, 
         hidden_num=1, hidden_width=128, hidden_activation='tanh', optimizer=SGD, 
         learning_rate=0.02, 
         output_activation='softmax'):
    
    """
    Build and compile a Feed-Forward Neural Network (FFNN) using specified parameters.
    
    """

    tf.random.set_seed(seed)
    random.seed(seed)
    
    vocab_size = len(tokenizer.word_index) + 1
    num_tags = len(label_encoder.classes_)

    input_token = Input(shape=(1 + 2*w, ))  


    if embedding_matrix is not None:
        embedding = Embedding(input_dim=vocab_size, 
                              output_dim=embedding_dim,
                              weights=[embedding_matrix],
                              trainable=trainable)(input_token)
    else:
        embedding = Embedding(input_dim=vocab_size, 
                              output_dim=embedding_dim, 
                              embeddings_initializer=RandomUniform(minval=random_min, 
                                                                   maxval=random_max))(input_token)


    flattened = Flatten()(embedding)
    weights = flattened

    if features is not None:
        input_feature = Input(shape=(features.shape[1], ))
        input_layer = [input_token, input_feature]
        weights = Concatenate()([flattened, input_feature])
        
    else:
        input_layer = input_token
        
    x = weights

    for _ in range(hidden_num):
        x = Dense(hidden_width, activation=hidden_activation)(x) 
    
    
    output = Dense(num_tags, activation=output_activation)(x)
    model = Model(inputs=input_layer, outputs=output)
    model.compile(optimizer=optimizer(learning_rate), loss=SparseCategoricalCrossentropy(), metrics=['accuracy'])

    return model

In [358]:
def evaluate_model(tokenizer, label_encoder, w, X, y, validation_data, 
                   X_test, y_test, embedding_matrix=None, features=None, 
                   trainable=True, hidden_num=1, hidden_width=128, optimizer=SGD, 
                   hidden_activation='tanh', output_activation='softmax', 
                   epochs=10, batch_size=1):
    """
     Evaluate a FFNN model on specified validation and test data, with varying 
        random seeds.
        
    """
    results = {}

    early_stopping = EarlyStopping(monitor='val_accuracy',  
                                   patience=2,  
                                   restore_best_weights=True)

    for seed in [0, 21, 42]:
        model = FFNN(tokenizer, label_encoder, w, seed=seed, 
                     embedding_matrix=embedding_matrix, features=features, 
                     trainable=trainable, hidden_num=hidden_num,
                     hidden_width=hidden_width, optimizer=optimizer, 
                     hidden_activation=hidden_activation, 
                     output_activation=output_activation)

        model.fit(X, y, epochs=epochs, batch_size=batch_size, 
                  validation_data=validation_data, callbacks=[early_stopping])
        loss, accuracy = model.evaluate(X_test, y_test)
        
        results[seed] = accuracy
    
    return results

In [299]:
results_0_random = evaluate_model(tokenizer, label_encoder, 0, X_0, y, dev_data_0, 
                                  X_devtest_0, y_devtest, batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [300]:
results_1_random = evaluate_model(tokenizer, label_encoder, 1, X_1, y, dev_data_1, 
                                  X_devtest_1, y_devtest, batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [325]:
results_0_random

{0: 0.7945678234100342, 21: 0.795430064201355, 42: 0.7904720902442932}

In [326]:
results_1_random

{0: 0.8303513526916504, 21: 0.836171567440033, 42: 0.8290579915046692}

- When w = 0, the accuracies on the devtest for three different random seeds are:

        (0.7945678234100342, 0.795430064201355, 0.7904720902442932)

- When w = 1, the accuracies on the devtest for three different random seeds are:

        (0.8303513526916504, 0.836171567440033, 0.8290579915046692)

#### 1.2

In [117]:
filename_org = "twpos-data/orig-train.tsv"
filename_dev_org = "twpos-data/orig-dev.tsv"
filename_devtest_org = "twpos-data/orig-devtest.tsv"

In [118]:
tokens_org, _ = load_data(filename_org)
tokens_dev_org, _ = load_data(filename_dev_org)
tokens_devtest_org, _ = load_data(filename_devtest_org)

- Features selected: 
    - whether the first letter is capitalized
    - whether the token is digit
    - whether the token contains:
        - \#
        - @
        - http
    - whether the token ends with:
        - ing
        - ed

In [147]:
def create_features(tokens_list):
    """
    Extract a set of features from a list of tokenized sentences for each token.

    Inputs:
        tokens_list (list of list of str): A list of tokenized text sequences

    Returns:
        A NumPy array containing the extracted features for each token
    """
    features = []
    for tokens in tokens_list:
        for token in tokens:
            is_capitalized = float(token[0].isupper())
            is_numeric = float(token.isdigit())
            contains_hash = float("#" in token)
            contains_at = float("@" in token)
            contains_http = float("http" in token)
            end_ing = float(token[-3:] == "ing")
            end_ed = float(token[-2:] == "ed")
            length = len(token)
            features.append([is_capitalized, is_numeric, contains_hash, contains_at,
                             contains_http, end_ing, end_ed, length])

    return np.array(features)

In [148]:
features_train = create_features(tokens)
features_dev = create_features(tokens_dev)
features_devtest = create_features(tokens_devtest)

In [239]:
X_train_features_0 = [X_0, features_train]
X_train_features_1 = [X_1, features_train]

In [158]:
dev_data_features_0 = ([X_dev_0, features_dev], y_dev)
dev_data_features_1 = ([X_dev_1, features_dev], y_dev)

In [221]:
X_devtest_features_0 = [X_devtest_0,features_devtest]
X_devtest_features_1 = [X_devtest_1,features_devtest]

In [301]:
results_0_random_features = evaluate_model(tokenizer, label_encoder, 0, 
                                           X_train_features_0, y, 
                                           dev_data_features_0, 
                                           X_devtest_features_0,
                                           y_devtest,features=features_train, 
                                           batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [302]:
results_1_random_features = evaluate_model(tokenizer, label_encoder, 1, 
                                           X_train_features_1, y, 
                                           dev_data_features_1, 
                                           X_devtest_features_1,
                                           y_devtest,features=features_train, 
                                           batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [327]:
results_0_random_features

{0: 0.789178729057312, 21: 0.8064237833023071, 42: 0.8042681813240051}

In [328]:
results_1_random_features

{0: 0.8387583494186401, 21: 0.8443630337715149, 42: 0.8366026878356934}

In [330]:
results_1_random

{0: 0.8303513526916504, 21: 0.836171567440033, 42: 0.8290579915046692}

- When w = 0, for each random seed, adding features changes the accuracies from:
    
        0.7945678234100342 -> 0.789178729057312

        0.795430064201355 -> 0.8064237833023071

        0.7904720902442932 -> 0.8042681813240051

- When w = 1, for each random seed, adding features changes the accuracies from:
    
        0.8303513526916504 -> 0.8387583494186401

        0.836171567440033 -> 0.8443630337715149
        
        0.8290579915046692 -> 0.8366026878356934


- When w = 0, two out of three times adding features improved the accuracies. 

- When w = 1, all three times adding features improved the accuracies.

#### 1.3

##### Updating the pretrained embeddings

In [92]:
def load_embeddings(filename):
    """
    Load pre-trained word embeddings from a file into a dictionary.

    Inputs:
        filename (str): The name of the file containing the pre-trained word 
            embeddings

    Returns:
        A dictionary where keys are words and values are embedding vectors as 
            NumPy arrays
    """
    
    embedding_dict = {}

    with open(filename) as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float64')
            embedding_dict[word] = vector

    return embedding_dict

In [93]:
embedd_file = 'twitter-embeddings.txt'

In [94]:
embedding_dict = load_embeddings(embedd_file)

In [95]:
embedding_dict["<s>"] = embedding_dict["</s>"]

In [359]:
def create_embedding_matrix(tokenizer, embedding_dict):
    """
    Create an embedding matrix for the vocabulary based on pre-trained word 
        embeddings.

    Inputs:
        tokenizer (Tokenizer): A tokenizer object 
        embedding_dict (dict): A dictionary containing pre-trained word embeddings.

    Returns:
        An embedding matrix where each row corresponds to a word in the vocabulary.
        
    """

    vocab_size = len(tokenizer.word_index) + 1
    embedding_dim = 50
    embedding_matrix = np.zeros((vocab_size, embedding_dim))

    for word, i in tokenizer.word_index.items():

        if word in embedding_dict:
            embedding_matrix[i] = embedding_dict.get(word)
        else:
            embedding_matrix[i] = embedding_dict.get("UUUNKKK")

    return embedding_matrix

In [97]:
embedding_matrix = create_embedding_matrix(tokenizer, embedding_dict)

In [303]:
results_0_pretrained = evaluate_model(tokenizer, label_encoder, 0, 
                                      X_0, y, 
                                      dev_data_0, 
                                      X_devtest_0,y_devtest,
                                      embedding_matrix=embedding_matrix,
                                      batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [304]:
results_1_pretrained = evaluate_model(tokenizer, label_encoder, 1, 
                                      X_1, y, 
                                      dev_data_1, 
                                      X_devtest_1,y_devtest,
                                      embedding_matrix=embedding_matrix,
                                      batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [331]:
results_0_pretrained

{0: 0.7840051651000977, 21: 0.7893942594528198, 42: 0.7852985262870789}

In [332]:
results_1_pretrained

{0: 0.8331537246704102, 21: 0.8409140110015869, 42: 0.830566942691803}

- When w = 0, for each random seed, pretrained embeddings changes the accuracies from:
    
        0.7945678234100342 -> 0.7840051651000977

        0.795430064201355 -> 0.7893942594528198

        0.7904720902442932 -> 0.7852985262870789

- When w = 1, for each random seed, pretrained embeddings changes the accuracies from:
    
        0.8303513526916504 -> 0.8331537246704102

        0.836171567440033 -> 0.8409140110015869
        
        0.8290579915046692 -> 0.830566942691803


- When w = 0, the accuracies were not improved by the pretrained embeddings. 

- However, when w = 1, all three times the pretrained embeddings improved the accuracies.

##### Fixed pretrained embeddings

In [305]:
results_1_fixed_pretrained = evaluate_model(tokenizer, label_encoder, 1, 
                                            X_1, y, 
                                            dev_data_1, 
                                            X_devtest_1,y_devtest,
                                            trainable=False,
                                            embedding_matrix=embedding_matrix,
                                            batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [334]:
results_1_fixed_pretrained

{0: 0.7917654514312744, 21: 0.7840051651000977, 42: 0.785082995891571}

- When w = 1, for each random seed, fixed pretrained embeddings changes the accuracies from:
    
        0.8331537246704102 -> 0.7917654514312744

        0.8409140110015869 -> 0.7840051651000977
        
        0.830566942691803 -> 0.785082995891571


- When w = 1, compared with updating pretrained embeddings, the fixed pretrained embeddings impair the performance.

##### Pretrained embeddings with features

In [306]:
results_0_pretrained_features = evaluate_model(tokenizer, label_encoder, 0, 
                                               X_train_features_0, y, 
                                               dev_data_features_0, 
                                               X_devtest_features_0,y_devtest,
                                               features=features_train,
                                               embedding_matrix=embedding_matrix,
                                               batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [307]:
results_1_pretrained_features = evaluate_model(tokenizer, label_encoder, 1, 
                                               X_train_features_1, y, 
                                               dev_data_features_1, 
                                               X_devtest_features_1,y_devtest,
                                               features=features_train,
                                               embedding_matrix=embedding_matrix,
                                               batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [337]:
results_0_pretrained_features

{0: 0.765897810459137, 21: 0.7962923049926758, 42: 0.8064237833023071}

In [338]:
results_1_pretrained_features

{0: 0.836171567440033, 21: 0.8443630337715149, 42: 0.8441474437713623}

- When w = 0, for each random seed, adding features changes the accuracies from:
    
        0.7840051651000977 -> 0.765897810459137

        0.7893942594528198 -> 0.7962923049926758

        0.7852985262870789 -> 0.8064237833023071

- When w = 1, for each random seed, adding features changes the accuracies from:
    
        0.8331537246704102 -> 0.836171567440033

        0.8409140110015869 -> 0.8443630337715149
        
        0.830566942691803 -> 0.8441474437713623


When w = 0, two out of three times adding features improved the accuracies. 

When w = 1, all three times adding features improved the accuracies.

#### 1.4

##### Number of hidden layers X Hidden widths

In [271]:
hidden_nums = [0, 1, 2]
hidden_widths = [256, 512]

In [341]:
hidden_num_width_pretrained_featrues = {}
for hidden_num in hidden_nums:
    for hidden_width in hidden_widths:
        results = evaluate_model(tokenizer, label_encoder, 1, 
                                 X_train_features_1, y, 
                                 dev_data_features_1, 
                                 X_devtest_features_1,y_devtest,
                                 features=features_train, 
                                 embedding_matrix=embedding_matrix,
                                 hidden_num=hidden_num, hidden_width=hidden_width,
                                 batch_size=1)
        hidden_num_width_pretrained_featrues[(hidden_num, hidden_width)] = results

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

In [276]:
import pandas as pd

In [348]:
pd.DataFrame(hidden_num_width_pretrained_featrues).T

Unnamed: 0,Unnamed: 1,0,21,42
0,256,0.850399,0.853417,0.849537
0,512,0.850399,0.853417,0.849537
1,256,0.839189,0.847165,0.839405
1,512,0.844579,0.843932,0.838327
2,256,0.840052,0.838112,0.825609
2,512,0.803406,0.839189,0.813753


- Surprisingly, when number of hidden layer = 0, the model performs the best. Increasing the hidden width does not show consistent improvement on the accuracy.

##### Different nonlinearities

In [282]:
hidden_activations = ['linear', 'tanh', 'relu', 'sigmoid']

In [339]:
hidden_actv_pretrained_features = {}
for hidden_activation in hidden_activations:
    results = evaluate_model(tokenizer, label_encoder, 1, 
                             X_train_features_1, y, 
                             dev_data_features_1, 
                             X_devtest_features_1,y_devtest,
                             features=features_train, 
                             embedding_matrix=embedding_matrix,
                             hidden_activation=hidden_activation,
                             batch_size=1)
    hidden_actv_pretrained_features[hidden_activation] = results

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


In [346]:
pd.DataFrame(hidden_actv_pretrained_features).T

Unnamed: 0,0,21,42
linear,0.842854,0.841776,0.835956
tanh,0.836172,0.844363,0.844147
relu,0.837034,0.850614,0.846734
sigmoid,0.841776,0.847596,0.832507


- With fixed number of layer (1) and layer width (128), for seed=0, linear has the best performance, For seed=21, relu has the best performance. For seed=42, relu has the best performance again. Overall, relu has the best performance across random seeds.

##### w = 2

In [285]:
X_2 = preprocess_x(tokens, 2, tokenizer)
X_dev_2 = preprocess_x(tokens_dev, 2, tokenizer)
X_devtest_2 = preprocess_x(tokens_devtest, 2, tokenizer)

In [287]:
dev_data_2 = (X_dev_2, y_dev)

In [293]:
X_train_features_2 = [X_2, features_train]
dev_data_features_2 = ([X_dev_2, features_dev], y_dev)
X_devtest_features_2 = [X_devtest_2, features_devtest]

In [350]:
results_2_pretrained_features_0 = evaluate_model(tokenizer, label_encoder, 2, 
                                               X_train_features_2, y, 
                                               dev_data_features_2, 
                                               X_devtest_features_2,y_devtest,
                                               embedding_matrix=embedding_matrix, 
                                               features=features_train,
                                               hidden_num=0,
                                               batch_size=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [351]:
results_2_pretrained_features_0

{0: 0.852554440498352, 21: 0.8534166812896729, 42: 0.852554440498352}

- Using number of hidden layer=0, pretrained embeddings, and features, set w = 2, the model's performance is consistantly improved to have accuracy 85%-86%