In [12]:
from torch import nn 
import pickle
import torch
gpu = torch.device("mps")

In [13]:
class TextClassificationModel(nn.Module):
    def __init__(self, x_size, hidden_layer_size, dropout, activation_fn):
        super(TextClassificationModel, self).__init__()
        self.first_layer = nn.Linear(x_size, hidden_layer_size)
        self.hidden_layer = nn.Linear(hidden_layer_size, hidden_layer_size)
        self.output_projection_1 = nn.Linear(hidden_layer_size, 1)
        self.activation = activation_fn
        self.normalisation = nn.BatchNorm1d(hidden_layer_size)
        self.dropout = nn.Dropout1d(dropout)
        self.sigmoid = nn.Sigmoid()
        self.x_size = x_size

    def forward(self, x):
        x = self.first_layer(x)
        for _ in range(3):
            x = self.hidden_layer(x)
            if x.size(0) > 1:
                x = self.normalisation(x)
            x = self.activation(x)
            x = self.dropout(x)

        out = self.output_projection_1(x)
        out_distribution = self.sigmoid(out)
        return out_distribution

In [14]:
def train(
    train_dataloader, test_dataloader, loss_function, num_epochs, model, model_optimiser, batch_size
):
    # A counter for the number of gradient updates we've performed.
    num_iter = 0

    # Iterate `num_epochs` times.
    for epoch in range(num_epochs):
        print("Starting epoch {}".format(epoch + 1))
        # Iterate over the train_dataloader, unpacking the images and labels
        for data, labels in train_dataloader:
            # If we're using the GPU, move reshaped_images and labels to the GPU.
            if gpu:
                data = data.to(gpu)
                labels = labels.to(gpu)

            # Run the forward pass through the model to get predicted log distribution.
            predicted = model(data)

            # reshape
            labels = torch.unsqueeze(labels, 1)
   
            # Calculate the loss
            batch_loss = loss_function(predicted, labels)

            # Clear the gradients as we prepare to backprop.
            model_optimiser.zero_grad()

            # Backprop (backward pass), which calculates gradients.
            batch_loss.backward()

            # Take a gradient step to update parameters.
            model_optimiser.step()

            # Increment gradient update counter.
            num_iter += 1

            # Calculate test set loss and accuracy every 500 gradient updates
            # It's standard to have this as a separate evaluate function, but
            # we'll place it inline for didactic purposes.
            if num_iter % 500 == 0:
                # Set model to eval mode, which turns off dropout.
                model.eval()
                # Counters for the num of examples we get right / total num of examples.
                num_correct = 0
                total_examples = 0
                total_test_loss = 0

                with torch.no_grad():
                    # Iterate over the test dataloader
                    for test_data, test_labels in test_dataloader:

                        # If we're using the GPU, move tensors to the GPU.
                        if gpu:
                            test_data = test_data.to(gpu)
                            test_labels = test_labels.to(gpu)
                        
                        # reshape
                        test_labels = torch.unsqueeze(test_labels, 1)

                        # Run the forward pass to get predicted distribution.
                        predicted = model(test_data)
                               
                        # Calculate loss for this test batch. This is averaged, so multiply
                        # by the number of examples in batch to get a total.
                        total_test_loss += loss_function(predicted, test_labels)

                        # Get predicted labels (argmax)
                        predicted_labels = (predicted.data > 0.5).int()

                        # Count the number of examples in this batch
                        total_examples += test_labels.size(0)

                        # Count the total number of correctly predicted labels.
                        # predicted == labels generates a ByteTensor in indices where
                        # predicted and labels match, so we can sum to get the num correct.
                        num_correct += torch.sum(predicted_labels == test_labels.data)
                        
                accuracy = num_correct / total_examples
                average_test_loss = total_test_loss / total_examples
                print(
                    "Iteration {}. Test Loss {}. Test Accuracy {}. Total Examples {}".format(
                        num_iter, average_test_loss, accuracy, total_examples
                    )
                )
                # Set the model back to train mode, which activates dropout again.
                model.train()
    return model

In [15]:
# open data 
with open("./data/content_features_cv.pkl","rb") as f:
    content_features_df = pickle.load(f)

# drop values with 0 words per sentence
print(content_features_df.shape)
content_features_df = content_features_df[content_features_df["words_per_sentence_median"] != 0]
print(content_features_df.shape)

(72134, 13)
(72122, 13)


In [16]:
# create dataloader
labels = content_features_df["label"].values
features = content_features_df.drop("label", axis=1)
features = features.drop("dale_chall", axis=1) # due to NaN
features = features.drop("smog", axis=1) # due to NaN
features = features.drop("automatic_readability", axis=1) # due to NaN



# standardise data
features = features.apply(lambda x: (x - x.mean()) / x.std())
features.head()

Unnamed: 0,verbs_third_person,verbs_others,words_per_sentence_median,num_of_sentences,adverbs_rate,nouns_rate,adjectives_rate,verbs_third_person_rate,verbs_others_rate
0,0.717984,0.770385,-0.595685,0.748949,-0.720809,-0.800266,-0.476424,-0.249705,-0.093286
1,-0.926451,-0.790361,-1.945393,-0.784336,-0.025852,-2.170633,-2.451724,-2.500203,-0.460167
2,-0.868412,-0.663814,-0.516291,-0.731464,-0.894548,-0.219161,-0.081364,-1.308763,0.915637
3,1.723991,0.643838,0.35705,0.801821,-0.193986,0.473297,0.396531,1.010062,-0.282644
4,-0.404101,-0.663814,1.309785,-0.49354,-0.426788,1.392055,1.741989,0.79917,-0.989323


In [18]:
features = features.values

features_tensor = torch.tensor(features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.float32)

dataset = torch.utils.data.TensorDataset(features_tensor,labels_tensor)
print(features_tensor.shape)
print(labels_tensor.shape)

torch.Size([72122, 9])
torch.Size([72122])


In [19]:
batch_size = 16
num_epochs = 2
hidden_layer_size = 512
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset,test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
print(len(train_dataset))
print(len(test_dataset))

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

57697
14425


In [10]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=10, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.LeakyReLU()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)


57697
Starting epoch 1
Iteration 500. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425
Iteration 1000. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425
Iteration 1500. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425
Iteration 2000. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425
Iteration 2500. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425
Iteration 3000. Test Loss 6.25303316116333. Test Accuracy 0.48305025696754456. Total Examples 14425


KeyboardInterrupt: 

In [164]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.Softmax()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)


57697
Starting epoch 1


  return self._call_impl(*args, **kwargs)


Iteration 500. Test Loss 0.04337087646126747. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 1000. Test Loss 0.043358366936445236. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 1500. Test Loss 0.04334966838359833. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 2000. Test Loss 0.04333999752998352. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 2500. Test Loss 0.043330688029527664. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 3000. Test Loss 0.0433267280459404. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 3500. Test Loss 0.043323077261447906. Test Accuracy 0.5199306607246399. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.043318286538124084. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 4500. Test Loss 0.0433167964220047. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 5000. Test Loss 0.04331349581480026. Test Accuracy 0.5

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): Softmax(dim=None)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [165]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.LogSoftmax()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)


57697
Starting epoch 1


  return self._call_impl(*args, **kwargs)


Iteration 500. Test Loss 0.66214519739151. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 1000. Test Loss 0.6613492369651794. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 1500. Test Loss 0.6639080047607422. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 2000. Test Loss 0.6675491333007812. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 2500. Test Loss 0.6670762896537781. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 3000. Test Loss 0.6615761518478394. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 3500. Test Loss 0.6659291982650757. Test Accuracy 0.4800693094730377. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.6626461148262024. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 4500. Test Loss 0.6651015281677246. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 5000. Test Loss 0.6644758582115173. Test Accuracy 0.480069309473037

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): LogSoftmax(dim=None)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [166]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.LogSigmoid()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)


57697
Starting epoch 1
Iteration 500. Test Loss 0.04263605922460556. Test Accuracy 0.6635701656341553. Total Examples 14425
Iteration 1000. Test Loss 0.04245109111070633. Test Accuracy 0.6237088441848755. Total Examples 14425
Iteration 1500. Test Loss 0.042880818247795105. Test Accuracy 0.5796880125999451. Total Examples 14425
Iteration 2000. Test Loss 0.04270308092236519. Test Accuracy 0.5458579063415527. Total Examples 14425
Iteration 2500. Test Loss 0.042698945850133896. Test Accuracy 0.5435008406639099. Total Examples 14425
Iteration 3000. Test Loss 0.042544566094875336. Test Accuracy 0.6569843888282776. Total Examples 14425
Iteration 3500. Test Loss 0.042570021003484726. Test Accuracy 0.5872443914413452. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04223562404513359. Test Accuracy 0.5421143770217896. Total Examples 14425
Iteration 4500. Test Loss 0.04246094077825546. Test Accuracy 0.6119237542152405. Total Examples 14425
Iteration 5000. Test Loss 0.04258371517

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): LogSigmoid()
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [167]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.Sigmoid()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)


57697
Starting epoch 1
Iteration 500. Test Loss 0.043149884790182114. Test Accuracy 0.49143847823143005. Total Examples 14425
Iteration 1000. Test Loss 0.04288288205862045. Test Accuracy 0.6681455969810486. Total Examples 14425
Iteration 1500. Test Loss 0.0430869534611702. Test Accuracy 0.4882495701313019. Total Examples 14425
Iteration 2000. Test Loss 0.04264602065086365. Test Accuracy 0.6201040148735046. Total Examples 14425
Iteration 2500. Test Loss 0.04270919784903526. Test Accuracy 0.672443687915802. Total Examples 14425
Iteration 3000. Test Loss 0.04276537522673607. Test Accuracy 0.6437435150146484. Total Examples 14425
Iteration 3500. Test Loss 0.04256577789783478. Test Accuracy 0.6772270202636719. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.042286764830350876. Test Accuracy 0.6465857625007629. Total Examples 14425
Iteration 4500. Test Loss 0.04228523001074791. Test Accuracy 0.6516464352607727. Total Examples 14425
Iteration 5000. Test Loss 0.04231016337871

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): Sigmoid()
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [169]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.ELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.042029641568660736. Test Accuracy 0.6265511512756348. Total Examples 14425
Iteration 1000. Test Loss 0.041758958250284195. Test Accuracy 0.6156672239303589. Total Examples 14425
Iteration 1500. Test Loss 0.042377766221761703. Test Accuracy 0.5815597772598267. Total Examples 14425
Iteration 2000. Test Loss 0.04187173396348953. Test Accuracy 0.6275910139083862. Total Examples 14425
Iteration 2500. Test Loss 0.04198487102985382. Test Accuracy 0.6338301301002502. Total Examples 14425
Iteration 3000. Test Loss 0.04144539311528206. Test Accuracy 0.6688387989997864. Total Examples 14425
Iteration 3500. Test Loss 0.04200553521513939. Test Accuracy 0.640069305896759. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04094213992357254. Test Accuracy 0.6861698627471924. Total Examples 14425
Iteration 4500. Test Loss 0.04198361933231354. Test Accuracy 0.6788908243179321. Total Examples 14425
Iteration 5000. Test Loss 0.0422842018306

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): ELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [170]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.04225274920463562. Test Accuracy 0.6016637682914734. Total Examples 14425
Iteration 1000. Test Loss 0.04167064651846886. Test Accuracy 0.628284215927124. Total Examples 14425
Iteration 1500. Test Loss 0.04177100211381912. Test Accuracy 0.6625303030014038. Total Examples 14425
Iteration 2000. Test Loss 0.042112383991479874. Test Accuracy 0.6406932473182678. Total Examples 14425
Iteration 2500. Test Loss 0.04186621680855751. Test Accuracy 0.6616291403770447. Total Examples 14425
Iteration 3000. Test Loss 0.04206791892647743. Test Accuracy 0.6363951563835144. Total Examples 14425
Iteration 3500. Test Loss 0.04131036624312401. Test Accuracy 0.6587175130844116. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04169654846191406. Test Accuracy 0.6546273827552795. Total Examples 14425
Iteration 4500. Test Loss 0.041790299117565155. Test Accuracy 0.6634315252304077. Total Examples 14425
Iteration 5000. Test Loss 0.04184079915285

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [171]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.ReLU()
)
loss = nn.BCELoss()
optimiser = torch.optim.SGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.042681608349084854. Test Accuracy 0.6067937612533569. Total Examples 14425
Iteration 1000. Test Loss 0.04262861981987953. Test Accuracy 0.6078336238861084. Total Examples 14425
Iteration 1500. Test Loss 0.04254849627614021. Test Accuracy 0.6361871957778931. Total Examples 14425
Iteration 2000. Test Loss 0.04248280078172684. Test Accuracy 0.6075563430786133. Total Examples 14425
Iteration 2500. Test Loss 0.04258333519101143. Test Accuracy 0.5919584035873413. Total Examples 14425
Iteration 3000. Test Loss 0.042230408638715744. Test Accuracy 0.659480094909668. Total Examples 14425
Iteration 3500. Test Loss 0.04211507365107536. Test Accuracy 0.6472790241241455. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.042209818959236145. Test Accuracy 0.6542807817459106. Total Examples 14425
Iteration 4500. Test Loss 0.04189028590917587. Test Accuracy 0.6583015322685242. Total Examples 14425
Iteration 5000. Test Loss 0.0418497361242

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): ReLU()
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

Chosen CELU 

In [172]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.AdamW(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.04229911044239998. Test Accuracy 0.6399306654930115. Total Examples 14425
Iteration 1000. Test Loss 0.04267789423465729. Test Accuracy 0.5491161346435547. Total Examples 14425
Iteration 1500. Test Loss 0.04150907322764397. Test Accuracy 0.6176083087921143. Total Examples 14425
Iteration 2000. Test Loss 0.04198576882481575. Test Accuracy 0.6494280695915222. Total Examples 14425
Iteration 2500. Test Loss 0.042047031223773956. Test Accuracy 0.6320277452468872. Total Examples 14425
Iteration 3000. Test Loss 0.04149436205625534. Test Accuracy 0.629462718963623. Total Examples 14425
Iteration 3500. Test Loss 0.04182291775941849. Test Accuracy 0.6607279181480408. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04104803502559662. Test Accuracy 0.635008692741394. Total Examples 14425
Iteration 4500. Test Loss 0.040842898190021515. Test Accuracy 0.6558752059936523. Total Examples 14425
Iteration 5000. Test Loss 0.042306743562221

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [173]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.Adam(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.041883617639541626. Test Accuracy 0.6204506158828735. Total Examples 14425
Iteration 1000. Test Loss 0.042376305907964706. Test Accuracy 0.6529635787010193. Total Examples 14425
Iteration 1500. Test Loss 0.04201788082718849. Test Accuracy 0.6098440289497375. Total Examples 14425
Iteration 2000. Test Loss 0.04104060307145119. Test Accuracy 0.642149031162262. Total Examples 14425
Iteration 2500. Test Loss 0.04225935786962509. Test Accuracy 0.5839167833328247. Total Examples 14425
Iteration 3000. Test Loss 0.041369594633579254. Test Accuracy 0.6577469706535339. Total Examples 14425
Iteration 3500. Test Loss 0.041180793195962906. Test Accuracy 0.6343847513198853. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04167741537094116. Test Accuracy 0.6154592633247375. Total Examples 14425
Iteration 4500. Test Loss 0.042834002524614334. Test Accuracy 0.5527209639549255. Total Examples 14425
Iteration 5000. Test Loss 0.04139428958

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [174]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.ASGD(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.042512018233537674. Test Accuracy 0.5700519680976868. Total Examples 14425
Iteration 1000. Test Loss 0.0417768694460392. Test Accuracy 0.6092200875282288. Total Examples 14425
Iteration 1500. Test Loss 0.04246056452393532. Test Accuracy 0.579064130783081. Total Examples 14425
Iteration 2000. Test Loss 0.04187612235546112. Test Accuracy 0.6644020676612854. Total Examples 14425
Iteration 2500. Test Loss 0.04182278737425804. Test Accuracy 0.6208665370941162. Total Examples 14425
Iteration 3000. Test Loss 0.041488975286483765. Test Accuracy 0.6180242896080017. Total Examples 14425
Iteration 3500. Test Loss 0.04224379360675812. Test Accuracy 0.588492214679718. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04364825040102005. Test Accuracy 0.502183735370636. Total Examples 14425
Iteration 4500. Test Loss 0.04146573320031166. Test Accuracy 0.6429809331893921. Total Examples 14425
Iteration 5000. Test Loss 0.04218197986483574

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [180]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.RMSprop(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.044213201850652695. Test Accuracy 0.543708860874176. Total Examples 14425
Iteration 1000. Test Loss 0.04310188069939613. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 1500. Test Loss 0.043413400650024414. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 2000. Test Loss 0.04327463358640671. Test Accuracy 0.5200693011283875. Total Examples 14425
Iteration 2500. Test Loss 0.04385981336236. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 3000. Test Loss 0.04352565109729767. Test Accuracy 0.4800693094730377. Total Examples 14425
Iteration 3500. Test Loss 0.04338870942592621. Test Accuracy 0.4806932508945465. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04329801723361015. Test Accuracy 0.5199306607246399. Total Examples 14425
Iteration 4500. Test Loss 0.04334932938218117. Test Accuracy 0.4806932508945465. Total Examples 14425
Iteration 5000. Test Loss 0.04348139837384224

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [181]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.NAdam(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.0425371378660202. Test Accuracy 0.5600693225860596. Total Examples 14425
Iteration 1000. Test Loss 0.04241449758410454. Test Accuracy 0.5780242681503296. Total Examples 14425
Iteration 1500. Test Loss 0.04301730915904045. Test Accuracy 0.5188907980918884. Total Examples 14425
Iteration 2000. Test Loss 0.042496711015701294. Test Accuracy 0.600901186466217. Total Examples 14425
Iteration 2500. Test Loss 0.042248472571372986. Test Accuracy 0.6099133491516113. Total Examples 14425
Iteration 3000. Test Loss 0.04158344864845276. Test Accuracy 0.6232235431671143. Total Examples 14425
Iteration 3500. Test Loss 0.04222334176301956. Test Accuracy 0.6149740219116211. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04236486926674843. Test Accuracy 0.5654072761535645. Total Examples 14425
Iteration 4500. Test Loss 0.04253574088215828. Test Accuracy 0.5920970439910889. Total Examples 14425
Iteration 5000. Test Loss 0.042034864425659

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [182]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.RAdam(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.041691068559885025. Test Accuracy 0.6289081573486328. Total Examples 14425
Iteration 1000. Test Loss 0.04177755489945412. Test Accuracy 0.6216984391212463. Total Examples 14425
Iteration 1500. Test Loss 0.0423731654882431. Test Accuracy 0.6393761038780212. Total Examples 14425
Iteration 2000. Test Loss 0.04237179458141327. Test Accuracy 0.6293240785598755. Total Examples 14425
Iteration 2500. Test Loss 0.04245670139789581. Test Accuracy 0.674246072769165. Total Examples 14425
Iteration 3000. Test Loss 0.0421287976205349. Test Accuracy 0.6017330884933472. Total Examples 14425
Iteration 3500. Test Loss 0.0412408821284771. Test Accuracy 0.6526169776916504. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04159567505121231. Test Accuracy 0.6244021058082581. Total Examples 14425
Iteration 4500. Test Loss 0.04142557457089424. Test Accuracy 0.6269670724868774. Total Examples 14425
Iteration 5000. Test Loss 0.04144078120589256.

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

In [183]:
# model object
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.5, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.Rprop(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.041302766650915146. Test Accuracy 0.6063085198402405. Total Examples 14425
Iteration 1000. Test Loss 0.04150797799229622. Test Accuracy 0.6045753955841064. Total Examples 14425
Iteration 1500. Test Loss 0.041378773748874664. Test Accuracy 0.6053379774093628. Total Examples 14425
Iteration 2000. Test Loss 0.041336141526699066. Test Accuracy 0.6080415844917297. Total Examples 14425
Iteration 2500. Test Loss 0.041558410972356796. Test Accuracy 0.5966030955314636. Total Examples 14425
Iteration 3000. Test Loss 0.04142079874873161. Test Accuracy 0.6022183895111084. Total Examples 14425
Iteration 3500. Test Loss 0.04137151315808296. Test Accuracy 0.6083881855010986. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.04153791069984436. Test Accuracy 0.6031889319419861. Total Examples 14425
Iteration 4500. Test Loss 0.04156554117798805. Test Accuracy 0.5918197631835938. Total Examples 14425
Iteration 5000. Test Loss 0.04142121970

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

Chosen NAdam

In [187]:
# model object
num_epochs = 3
hidden_layer_size = 512
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.01, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.NAdam(model.parameters())
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.039066631346940994. Test Accuracy 0.6555979251861572. Total Examples 14425
Iteration 1000. Test Loss 0.03696073219180107. Test Accuracy 0.6877642869949341. Total Examples 14425
Iteration 1500. Test Loss 0.039150018244981766. Test Accuracy 0.6495667099952698. Total Examples 14425
Iteration 2000. Test Loss 0.03693527728319168. Test Accuracy 0.6881109476089478. Total Examples 14425
Iteration 2500. Test Loss 0.03580513224005699. Test Accuracy 0.709324061870575. Total Examples 14425
Iteration 3000. Test Loss 0.0422854870557785. Test Accuracy 0.5538301467895508. Total Examples 14425
Iteration 3500. Test Loss 0.03693638741970062. Test Accuracy 0.7001733183860779. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.036880213767290115. Test Accuracy 0.6958752274513245. Total Examples 14425
Iteration 4500. Test Loss 0.03826221451163292. Test Accuracy 0.6969150900840759. Total Examples 14425
Iteration 5000. Test Loss 0.03592642396688

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.01, inplace=False)
  (sigmoid): Sigmoid()
)

In [188]:
# model object
num_epochs = 3
hidden_layer_size = 512
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.01, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.NAdam(model.parameters(),lr=0.001)
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.03921680524945259. Test Accuracy 0.6517157554626465. Total Examples 14425
Iteration 1000. Test Loss 0.03670905530452728. Test Accuracy 0.6888041496276855. Total Examples 14425
Iteration 1500. Test Loss 0.038267191499471664. Test Accuracy 0.6729289293289185. Total Examples 14425
Iteration 2000. Test Loss 0.036887798458337784. Test Accuracy 0.6888041496276855. Total Examples 14425
Iteration 2500. Test Loss 0.03635110706090927. Test Accuracy 0.6963604688644409. Total Examples 14425
Iteration 3000. Test Loss 0.04040705785155296. Test Accuracy 0.6627383232116699. Total Examples 14425
Iteration 3500. Test Loss 0.0373803973197937. Test Accuracy 0.6897746920585632. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.036191798746585846. Test Accuracy 0.699826717376709. Total Examples 14425
Iteration 4500. Test Loss 0.0371435210108757. Test Accuracy 0.6937955021858215. Total Examples 14425
Iteration 5000. Test Loss 0.035946622490882

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.01, inplace=False)
  (sigmoid): Sigmoid()
)

In [189]:
# model object
num_epochs = 3
hidden_layer_size = 512
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=14, hidden_layer_size=hidden_layer_size, dropout=0.01, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.NAdam(model.parameters(),lr=0.004)
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.03992997109889984. Test Accuracy 0.6357712149620056. Total Examples 14425
Iteration 1000. Test Loss 0.03666790947318077. Test Accuracy 0.695459246635437. Total Examples 14425
Iteration 1500. Test Loss 0.04080686718225479. Test Accuracy 0.6228769421577454. Total Examples 14425
Iteration 2000. Test Loss 0.036699578166007996. Test Accuracy 0.696013867855072. Total Examples 14425
Iteration 2500. Test Loss 0.03714744374155998. Test Accuracy 0.7081455588340759. Total Examples 14425
Iteration 3000. Test Loss 0.044240668416023254. Test Accuracy 0.48062393069267273. Total Examples 14425
Iteration 3500. Test Loss 0.037058163434267044. Test Accuracy 0.6994800567626953. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.0402972474694252. Test Accuracy 0.646793782711029. Total Examples 14425
Iteration 4500. Test Loss 0.03661003336310387. Test Accuracy 0.7072443962097168. Total Examples 14425
Iteration 5000. Test Loss 0.038093246519565

TextClassificationModel(
  (first_layer): Linear(in_features=14, out_features=512, bias=True)
  (hidden_layer): Linear(in_features=512, out_features=512, bias=True)
  (output_projection_1): Linear(in_features=512, out_features=1, bias=True)
  (activation): CELU(alpha=1.0)
  (normalisation): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout1d(p=0.01, inplace=False)
  (sigmoid): Sigmoid()
)

In [21]:
# model object
num_epochs = 3
hidden_layer_size = 512
sequence_length = len(train_dataset)
print(sequence_length)
model = TextClassificationModel(
    x_size=9, hidden_layer_size=hidden_layer_size, dropout=0.001, activation_fn=nn.CELU()
)
loss = nn.BCELoss()
optimiser = torch.optim.NAdam(model.parameters(),lr=0.001)
model.to(gpu)
train(
    train_dataloader, 
    test_dataloader, 
    loss, 
    num_epochs, 
    model, 
    optimiser,
    batch_size
)

57697
Starting epoch 1
Iteration 500. Test Loss 0.037230927497148514. Test Accuracy 0.6919930577278137. Total Examples 14425
Iteration 1000. Test Loss 0.03672466799616814. Test Accuracy 0.698717474937439. Total Examples 14425
Iteration 1500. Test Loss 0.03663802146911621. Test Accuracy 0.699272096157074. Total Examples 14425
Iteration 2000. Test Loss 0.03658523038029671. Test Accuracy 0.695459246635437. Total Examples 14425
Iteration 2500. Test Loss 0.03648880496621132. Test Accuracy 0.700450599193573. Total Examples 14425
Iteration 3000. Test Loss 0.03601334989070892. Test Accuracy 0.7055112719535828. Total Examples 14425
Iteration 3500. Test Loss 0.03605044260621071. Test Accuracy 0.7029462456703186. Total Examples 14425
Starting epoch 2
Iteration 4000. Test Loss 0.039933402091264725. Test Accuracy 0.6205199360847473. Total Examples 14425
Iteration 4500. Test Loss 0.03685532137751579. Test Accuracy 0.7014904618263245. Total Examples 14425
Iteration 5000. Test Loss 0.03677619993686676

KeyboardInterrupt: 