Configuration of the Environment

In [0]:
!kill -9 -1

In [1]:
import torch

torch.cuda.empty_cache()

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("There are {} GPUs available.".format(torch.cuda.device_count()))
    print("We will use GPU {}".format(torch.cuda.get_device_name(0)))
else:
    print("There is no GPU available, using the CPU instead!")
    device = torch.device("cpu")

There are 1 GPUs available.
We will use GPU Tesla P100-PCIE-16GB


In [2]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Collecting gputil
  Downloading https://files.pythonhosted.org/packages/ed/0e/5c61eedde9f6c87713e89d794f01e378cfd9565847d4576fa627d758c554/GPUtil-1.4.0.tar.gz
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-cp36-none-any.whl size=7413 sha256=75768af385fb6364446f3df586c79dfb7845650b0c37e4ac0b0d94904aca0596
  Stored in directory: /root/.cache/pip/wheels/3d/77/07/80562de4bb0786e5ea186911a2c831fdd0018bda69beab71fd
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Gen RAM Free: 12.7 GB  | Proc size: 280.1 MB
GPU RAM Free: 16270MB | Used: 10MB | Util   0% | Total 16280MB


Now Processing the Data

In [3]:
#Changing the 4 to 1
import pandas as pd
train_with_4 = pd.read_csv('train.csv')
train_with_4['target'] = train_with_4['target']/4
train_with_4.to_csv('modified_train.csv', index = False)
print("Transform Complete!")

Transform Complete!


In [4]:
#Testing the change
import pandas as pd
train_with_1 = pd.read_csv('modified_train.csv')
train_with_1.head()

Unnamed: 0,Id,date,user,text,target
0,0,Fri Jun 05 22:04:23 PDT 2009,JGoldsborough,"@jbtaylor WIth ya. &quot;I'd like a Palm Pre, ...",1.0
1,1,Sat Jun 06 03:12:21 PDT 2009,Psioui,"felt the earthquake this afternoon, it seems t...",1.0
2,2,Sat May 30 19:02:49 PDT 2009,adriville,"Ruffles on shirts are like so in, me Likey",1.0
3,3,Thu Jun 25 05:59:18 PDT 2009,Blondie128,Pretty bad night into a crappy morning....FML!...,0.0
4,4,Sat May 30 11:16:35 PDT 2009,khrabrov,"@dcbriccetti yeah, what a clear view!",1.0


In [0]:
#Defining the generate bigrams method for the Fast_Text class
def generate_bigrams(x):
    n_grams = set(zip(*[x[i:] for i in range(2)]))
    for n_gram in n_grams:
        x.append(' '.join(n_gram))
    return x

In [0]:
#Getting the relevant imports and the fields for reading training data
import torch
from torchtext import data
from torchtext import datasets
import random
import pandas as pd
import numpy as np

SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(preprocessing = generate_bigrams)
TARGET = data.LabelField(dtype = torch.float)

In [0]:
#Defining the fields for reading train.csv
fields_train = [(None, None), (None, None), (None, None), ('text', TEXT),('target', TARGET)]

In [0]:
#Reading train.csv
train_data = data.TabularDataset(path = 'modified_train.csv',
                                 format = 'csv',
                                 fields = fields_train,
                                 skip_header = True
)

In [9]:
#Testing whether the data was imported successfully
print(vars(train_data[0]))

{'text': ['@jbtaylor', 'WIth', 'ya.', "&quot;I'd", 'like', 'a', 'Palm', 'Pre,', 'Touchstone', 'charger.', 'ReadyNow?', 'Yes,', 'that', 'sounds', 'good.', 'But', 'is', 'my', 'beer', 'ready', "now?'", '#prelaunch', 'Yes, that', "ready now?'", 'charger. ReadyNow?', 'my beer', 'But is', 'good. But', "&quot;I'd like", 'Pre, Touchstone', 'Palm Pre,', 'is my', "now?' #prelaunch", 'like a', 'ReadyNow? Yes,', 'that sounds', "ya. &quot;I'd", 'Touchstone charger.', '@jbtaylor WIth', 'beer ready', 'WIth ya.', 'sounds good.', 'a Palm'], 'target': '1.0'}


In [0]:
#Creating validation set
train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [11]:
#Getting the pre-trained word embeddings and building the vocab
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

TARGET.build_vocab(train_data)

.vector_cache/glove.6B.zip: 862MB [06:28, 2.22MB/s]                          
100%|█████████▉| 399769/400000 [00:21<00:00, 18047.08it/s]

In [0]:
#defining the batch size and defining the iterators for training and validation data
BATCH_SIZE = 64



train_iterator = data.Iterator(dataset = train_data, batch_size = BATCH_SIZE,device = device, shuffle = None, train = True, sort_key = lambda x: len(x.text), sort_within_batch = False)
valid_iterator = data.Iterator(dataset = valid_data, batch_size = BATCH_SIZE,device = device, shuffle = None, train = False, sort_key = lambda x: len(x.text), sort_within_batch = False)

In [0]:
#defining the Fast_Text Class
import torch.nn as nn
import torch.nn.functional as F

class FastText(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        
        self.fc = nn.Linear(embedding_dim, output_dim)
        
    def forward(self, text):
        
        #text = [sent len, batch size]
        
        embedded = self.embedding(text)
                
        #embedded = [sent len, batch size, emb dim]
        
        embedded = embedded.permute(1, 0, 2)
        
        #embedded = [batch size, sent len, emb dim]
        
        pooled = F.avg_pool2d(embedded, (embedded.shape[1], 1)).squeeze(1) 
        
        #pooled = [batch size, embedding_dim]
                
        return self.fc(pooled)  
        

In [0]:
#defining our models and the relevant parameters
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = FastText(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, PAD_IDX)

In [15]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,500,301 trainable parameters


In [16]:
#Copying the pre-trained vectors to our embedding layers
pretrained_embeddings = TEXT.vocab.vectors

model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[-0.1117, -0.4966,  0.1631,  ...,  1.2647, -0.2753, -0.1325],
        [-0.8555, -0.7208,  1.3755,  ...,  0.0825, -1.1314,  0.3997],
        [-0.1897,  0.0500,  0.1908,  ..., -0.3980,  0.4765, -0.1598],
        ...,
        [ 0.4849, -0.6371,  0.0804,  ...,  0.5892, -0.1101, -0.2020],
        [ 0.0715,  0.3856, -0.5967,  ...,  0.0360,  0.2100, -0.9340],
        [ 0.3891,  1.3122, -2.8343,  ..., -0.7519, -1.7957,  1.2788]])

In [0]:
#Zeroing the initial weight of our unknown and padding tokens
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [0]:
#defining our optimizer
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [0]:
#defining our loss and porting our model and loss to GPU
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [0]:
#defining the accuracy calculation method
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [0]:
#defining the training method
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.text).squeeze(1)
        
        loss = criterion(predictions, batch.target)
        
        acc = binary_accuracy(predictions, batch.target)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [0]:
#defining the validation method
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.text).squeeze(1)
            
            loss = criterion(predictions, batch.target)
            
            acc = binary_accuracy(predictions, batch.target)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [0]:
#defining the method to calculate epoch time
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [24]:
#TRAINING!
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 1m 8s
	Train Loss: 0.485 | Train Acc: 77.71%
	 Val. Loss: 0.571 |  Val. Acc: 79.80%
Epoch: 02 | Epoch Time: 1m 4s
	Train Loss: 0.442 | Train Acc: 80.37%
	 Val. Loss: 0.596 |  Val. Acc: 79.66%
Epoch: 03 | Epoch Time: 1m 4s
	Train Loss: 0.437 | Train Acc: 80.68%
	 Val. Loss: 0.593 |  Val. Acc: 79.86%
Epoch: 04 | Epoch Time: 1m 3s
	Train Loss: 0.435 | Train Acc: 80.76%
	 Val. Loss: 0.588 |  Val. Acc: 79.94%
Epoch: 05 | Epoch Time: 1m 4s
	Train Loss: 0.434 | Train Acc: 80.85%
	 Val. Loss: 0.590 |  Val. Acc: 79.88%
Epoch: 06 | Epoch Time: 1m 4s
	Train Loss: 0.433 | Train Acc: 80.90%
	 Val. Loss: 0.588 |  Val. Acc: 79.88%
Epoch: 07 | Epoch Time: 1m 4s
	Train Loss: 0.432 | Train Acc: 80.93%
	 Val. Loss: 0.598 |  Val. Acc: 79.89%
Epoch: 08 | Epoch Time: 1m 4s
	Train Loss: 0.432 | Train Acc: 81.01%
	 Val. Loss: 0.595 |  Val. Acc: 79.93%
Epoch: 09 | Epoch Time: 1m 3s
	Train Loss: 0.431 | Train Acc: 81.03%
	 Val. Loss: 0.591 |  Val. Acc: 79.92%
Epoch: 10 | Epoch Time: 1m 4