In [1]:
import os
import keras
import pandas as pd
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Input, Add, Concatenate
from keras.layers import BatchNormalization, LayerNormalization

2024-02-11 01:00:36.479611: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def load_data(dataset):
    data_path = {
        'train': './data/train_enc.tsv',
        'dev': './data/dev_enc.tsv',
        'test': './test_enc_unlabeled.tsv'
    }
    res = []
    if dataset in ['train','dev']:
        for x in open(data_path[dataset], encoding='utf-8'):
            x = x.rstrip('\n\r').split('\t')
            x[0] = int(x[0])
            res.append(x)
    elif dataset == 'test':
        for x in open(data_path[dataset], encoding='utf-8'):
            x = x.rstrip('\n\r')
            res.append(x)
    return res

In [3]:
train = load_data('train')
dev = load_data('dev')

In [4]:
train_labels = [x[0] for x in train]
train_texts = [x[1] for x in train]
dev_labels = [x[0] for x in dev]
dev_texts = [x[1] for x in dev]

In [5]:
print("Train size:", len(train_labels))
print("Dev size:", len(dev_labels))

Train size: 16220
Dev size: 2027


In [6]:
cv = CountVectorizer()
cv.fit(train_texts)
cv_vec = cv.transform(train_texts)
print(cv_vec.shape)

(16220, 17248)


In [7]:
# extract features using TF-IDF
tfidf = TfidfVectorizer(
    analyzer='word',
    ngram_range=(1,4),
    max_df=0.6,
    min_df=0.001
)
tfidf.fit(train_texts)

In [8]:
tfidf_vec = tfidf.transform(train_texts)
print(tfidf_vec.shape)

(16220, 3967)


In [9]:
# build the feats_matrix
train_feats_matrix = tfidf.transform(train_texts).toarray()
dev_feats_matrix = tfidf.transform(dev_texts).toarray()
# convert labels to label_matrix
num_classes = 2
# convert each label to a ont-hot vector, and then stack vectors as a matrix
train_label_matrix = keras.utils.to_categorical(train_labels, num_classes=num_classes)
dev_label_matrix = keras.utils.to_categorical(dev_labels, num_classes=num_classes)

In [10]:
def build_MLP(input_size, output_size, num_layers, hidden_size,
              activation="relu",
              dropout_rate=0.0,
              batch_norm=False,
              layer_norm=False,
              l2_reg=0.0,
              loss="categorical_crossentropy",
              optimizer="SGD",
              learning_rate=0.1,
              metric="accuracy"):
   
    model = Sequential()
    
    if num_layers == 1:
        model.add(Dense(output_size,
                        activation="softmax",
                        input_dim=input_size,
                        kernel_initializer=keras.initializers.he_normal(seed=0),
                        bias_initializer="zeros",
                        kernel_regularizer=keras.regularizers.l2(l2_reg)))
    else:
        for i in range(num_layers-1):
            if i == 0:
                # fitst layer: input -> hidden
                model.add(Dense(hidden_size,
                                input_dim=input_size,
                                kernel_initializer=keras.initializers.he_normal(seed=0),
                                bias_initializer="zeros",
                                kernel_regularizer=keras.regularizers.l2(l2_reg)))
            else:
                # hidden layers: hidden -> hidden
                model.add(Dense(hidden_size,
                                input_dim=hidden_size,
                                kernel_initializer=keras.initializers.he_normal(seed=0),
                                bias_initializer="zeros",
                                kernel_regularizer=keras.regularizers.l2(l2_reg)))
            # add layer_norm
            if layer_norm:
                model.add(LayerNormalization())
            # add batch_norm
            if batch_norm:
                model.add(BatchNormalization())
            # add activation
            model.add(Activation(activation))
            # add dropout here (set seed as 0 in order to reproduce)
            if dropout_rate > 0.0:
                model.add(Dropout(dropout_rate, seed=0))
        # last layer: hidden -> class
        model.add(Dense(output_size,
                        activation="softmax",
                        input_dim=hidden_size,
                        kernel_initializer=keras.initializers.he_normal(seed=0),
                        bias_initializer="zeros"))
    
    # set the loss, the optimizer, and the metric
    if optimizer == "SGD":
        optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == "RMSprop":
        optmizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer == "Adam":
        optmizer = keras.optimizers.Adam(learning_rate=learning_rate)
    else:
        raise NotImplementedError
    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
    
    return model

In [None]:
def build_LSTM(input_size, output_size, num_layers, hidden_size,
              activation="relu",
              dropout_rate=0.0,
              batch_norm=False,
              layer_norm=False,
              l2_reg=0.0,
              loss="categorical_crossentropy",
              optimizer="SGD",
              learning_rate=0.1,
              metric="accuracy"):
   
    model = Sequential()
    
    if num_layers == 1:
        model.add(LSTM(output_size,
                       kernel_initializer=keras.initializers.he_normal(seed=0),
                       bias_initializer="zeros",
                       kernel_regularizer=keras.regularizers.l2(l2_reg)))
        # last layer: hidden -> class
        model.add(Dense(output_size,
                        activation="softmax",
                        input_dim=hidden_size,
                        kernel_initializer=keras.initializers.he_normal(seed=0),
                        bias_initializer="zeros"))
    else:
        for i in range(num_layers-1):
            if i == 0:
                # fitst layer: input -> hidden
                model.add(LSTM(hidden_size,
                               kernel_initializer=keras.initializers.he_normal(seed=0),
                               bias_initializer="zeros",
                               kernel_regularizer=keras.regularizers.l2(l2_reg)))
            else:
                # hidden layers: hidden -> hidden
                model.add(LSTM(hidden_size,
                               kernel_initializer=keras.initializers.he_normal(seed=0),
                               bias_initializer="zeros",
                               kernel_regularizer=keras.regularizers.l2(l2_reg)))
            # add layer_norm
            if layer_norm:
                model.add(LayerNormalization())
            # add batch_norm
            if batch_norm:
                model.add(BatchNormalization())
            # add dropout here (set seed as 0 in order to reproduce)
            if dropout_rate > 0.0:
                model.add(Dropout(dropout_rate, seed=0))
        # last layer: hidden -> class
        model.add(Dense(output_size,
                        activation="softmax",
                        input_dim=hidden_size,
                        kernel_initializer=keras.initializers.he_normal(seed=0),
                        bias_initializer="zeros"))
    
    # set the loss, the optimizer, and the metric
    if optimizer == "SGD":
        optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == "RMSprop":
        optmizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer == "Adam":
        optmizer = keras.optimizers.Adam(learning_rate=learning_rate)
    else:
        raise NotImplementedError
    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
    
    return model

In [11]:
histories = []

In [21]:
num_classes = 2
model = build_MLP(input_size=train_feats_matrix.shape[1], output_size=num_classes,
                  num_layers=3, hidden_size=200, activation="relu",
                  batch_norm=True, layer_norm=True, dropout_rate=0.5)
checkpointer = keras.callbacks.ModelCheckpoint(
    filepath="./models/mlp.keras",
    monitor="val_accuracy",
    verbose=0,
    save_best_only=True)

tf.random.set_seed(42)
history = model.fit(train_feats_matrix, train_label_matrix,
                    validation_data=(dev_feats_matrix, dev_label_matrix),
                    epochs=30, batch_size=32, verbose=1,
                    callbacks=[checkpointer])
model = keras.models.load_model("./models/mlp.keras",
                                custom_objects={"LayerNormalization": LayerNormalization})

train_score = model.evaluate(train_feats_matrix, train_label_matrix,
                             batch_size=32)
dev_score = model.evaluate(dev_feats_matrix, dev_label_matrix,
                           batch_size=32)

histories.append(history)

print("training loss:", train_score[0], "training accuracy", train_score[1])
print("validation loss:", dev_score[0], "validation accuracy", dev_score[1])

Epoch 1/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - accuracy: 0.5924 - loss: 0.7535 - val_accuracy: 0.7928 - val_loss: 0.4556
Epoch 2/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.7814 - loss: 0.4785 - val_accuracy: 0.8185 - val_loss: 0.4559
Epoch 3/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8381 - loss: 0.3746 - val_accuracy: 0.8397 - val_loss: 0.4411
Epoch 4/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8765 - loss: 0.2996 - val_accuracy: 0.8411 - val_loss: 0.4604
Epoch 5/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.8953 - loss: 0.2586 - val_accuracy: 0.8367 - val_loss: 0.5235
Epoch 6/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.9095 - loss: 0.2263 - val_accuracy: 0.8485 - val_loss: 0.5016
Epoch 7/30
[1m507/507

In [12]:
num_classes = 2
model = build_MLP(input_size=train_feats_matrix.shape[1], output_size=num_classes,
                  num_layers=2, hidden_size=200, activation="relu",
                  batch_norm=True, layer_norm=True, dropout_rate=0.5)
checkpointer = keras.callbacks.ModelCheckpoint(
    filepath="./models/mlp.keras",
    monitor="val_accuracy",
    verbose=0,
    save_best_only=True)

tf.random.set_seed(42)
history = model.fit(train_feats_matrix, train_label_matrix,
                    validation_data=(dev_feats_matrix, dev_label_matrix),
                    epochs=30, batch_size=32, verbose=1,
                    callbacks=[checkpointer])
model = keras.models.load_model("./models/mlp.keras",
                                custom_objects={"LayerNormalization": LayerNormalization})

train_score = model.evaluate(train_feats_matrix, train_label_matrix,
                             batch_size=32)
dev_score = model.evaluate(dev_feats_matrix, dev_label_matrix,
                           batch_size=32)

histories.append(history)

print("training loss:", train_score[0], "training accuracy", train_score[1])
print("validation loss:", dev_score[0], "validation accuracy", dev_score[1])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
training loss: 0.03251943364739418 training accuracy 0.9863748550415039
validation loss: 0.47187790274620056 validation accuracy 0.8806117177009583


In [16]:
num_classes = 2
model = build_MLP(input_size=train_feats_matrix.shape[1], output_size=num_classes,
                  num_layers=1, hidden_size=200, activation="relu",
                  batch_norm=True, layer_norm=True, dropout_rate=0.5)
checkpointer = keras.callbacks.ModelCheckpoint(
    filepath="./models/mlp.keras",
    monitor="val_accuracy",
    verbose=0,
    save_best_only=True)

tf.random.set_seed(42)
history = model.fit(train_feats_matrix, train_label_matrix,
                    validation_data=(dev_feats_matrix, dev_label_matrix),
                    epochs=100, batch_size=32, verbose=1,
                    callbacks=[checkpointer])
model = keras.models.load_model("./models/mlp.keras",
                                custom_objects={"LayerNormalization": LayerNormalization})

train_score = model.evaluate(train_feats_matrix, train_label_matrix,
                             batch_size=32)
dev_score = model.evaluate(dev_feats_matrix, dev_label_matrix,
                           batch_size=32)

histories.append(history)

print("training loss:", train_score[0], "training accuracy", train_score[1])
print("validation loss:", dev_score[0], "validation accuracy", dev_score[1])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [18]:
history = model.fit(train_feats_matrix, train_label_matrix,
                    validation_data=(dev_feats_matrix, dev_label_matrix),
                    epochs=200, batch_size=32, verbose=1,
                    callbacks=[checkpointer], initial_epoch=100)
model = keras.models.load_model("./models/mlp.keras",
                                custom_objects={"LayerNormalization": LayerNormalization})

train_score = model.evaluate(train_feats_matrix, train_label_matrix,
                             batch_size=32)
dev_score = model.evaluate(dev_feats_matrix, dev_label_matrix,
                           batch_size=32)

histories[-1] = history

print("training loss:", train_score[0], "training accuracy", train_score[1])
print("validation loss:", dev_score[0], "validation accuracy", dev_score[1])

Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 

In [None]:
history = model.fit(train_feats_matrix, train_label_matrix,
                    validation_data=(dev_feats_matrix, dev_label_matrix),
                    epochs=1000, batch_size=32, verbose=1,
                    callbacks=[checkpointer], initial_epoch=200)
model = keras.models.load_model("./models/mlp.keras",
                                custom_objects={"LayerNormalization": LayerNormalization})

train_score = model.evaluate(train_feats_matrix, train_label_matrix,
                             batch_size=32)
dev_score = model.evaluate(dev_feats_matrix, dev_label_matrix,
                           batch_size=32)

histories[-1] = history

print("training loss:", train_score[0], "training accuracy", train_score[1])
print("validation loss:", dev_score[0], "validation accuracy", dev_score[1])

Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000
Epoch 222/1000
Epoch 223/1000
Epoch 224/1000
Epoch 225/1000
Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/