In [0]:
# Input files: embedding file in CSV formate(odd index contain question embeddings, even index contains paragraph embeddings)
# Output files: weights.pth --> trained model

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import time
import os
import copy
import numpy as np

In [0]:
basePath = '/content/'

modelSave = basePath+'weights.pth'
trainFile = basePath+'temp.csv'
testFile = basePath+'temp.csv'

learning_rate = 0.0001
batch_size = 8
num_epochs = 2
input_dim = 200

In [0]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [20]:
'''import zipfile
import os
for file_name in os.listdir('/content/'):
  if file_name.endswith('.zip'):
    with zipfile.ZipFile(file_name,'r') as zip_dir:
      zip_dir.extractall(path='/content/')'''

"import zipfile\nimport os\nfor file_name in os.listdir('/content/'):\n  if file_name.endswith('.zip'):\n    with zipfile.ZipFile(file_name,'r') as zip_dir:\n      zip_dir.extractall(path='/content/')"

In [0]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    history = []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        for inpEmbs, outEmbs in dataloaders:
            inpEmbs = inpEmbs.to(device)
            outEmbs = outEmbs.to(device)
            '''print(inpEmbs.shape)
            print(outEmbs.shape)'''
            # zero the parameter gradients
            optimizer.zero_grad()

            with torch.set_grad_enabled(True):
                # Get model outputs and calculate loss
                outputs = model(inpEmbs)
                loss = criterion(outputs,outEmbs)

                # backward + optimize
                loss.backward()
                optimizer.step()

            running_loss += loss.item() * inpEmbs.size(0)

        epoch_loss = running_loss / len(dataloaders.dataset)
        print('Loss: {:.4f}'.format(epoch_loss))
        history.append(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return model,history

In [0]:
def initialize_model():
    model = nn.Sequential(  nn.Linear(input_dim,1024),
                            nn.ReLU(),
                            nn.BatchNorm1d(1024),
                          
                            nn.Linear(1024,1024),
                            nn.ReLU(),
                            nn.BatchNorm1d(1024),
                          
                            nn.Linear(1024,1024),
                            nn.ReLU(),
                            nn.BatchNorm1d(1024),
                          
                            nn.Linear(1024,input_dim)
                            
    )

    return model

In [0]:
def getDataLoaders(path):
    dataload = np.loadtxt(path, delimiter=',')
    quesEmb = torch.tensor(dataload[::2], dtype=torch.float)
    paraEmb = torch.tensor(dataload[1::2], dtype=torch.float)
    print(quesEmb.shape)
    print(paraEmb.shape)

    eval_data = TensorDataset(quesEmb, paraEmb)
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)
    return eval_dataloader

In [0]:
def getUpdatablePara(model):
    params_to_update = model.parameters()
    print("Params to learn:")
    params_to_update = []
    for name,param in model.named_parameters():
        if  param.requires_grad == True:
            params_to_update.append(param)
            #print("\t",name)
    return params_to_update

In [0]:
def testModel(model, dataloaders, criterion):
    running_loss = 0
    for inpEmbs, outEmbs in dataloaders:
        inpEmbs = inpEmbs.to(device)
        outEmbs = outEmbs.to(device)

        outputs = model(inpEmbs)
        loss = criterion(outputs,outEmbs)

        running_loss += loss.item() * inpEmbs.size(0)

    epoch_loss = running_loss / len(dataloaders.dataset)
    print('Loss: {:.5f}'.format(epoch_loss))

# **Training**

In [0]:
model= initialize_model()
model = model.to(device)
#print(model)

In [27]:
dataloaders = getDataLoaders(trainFile)

torch.Size([4, 200])
torch.Size([4, 200])


In [28]:
params_to_update = getUpdatablePara(model)

Params to learn:


In [29]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=learning_rate, momentum=0.9)

#loss function
criterion = nn.MSELoss()
#criterion = nn.MSELoss(reduction='sum')

trainedModel, hist = train_model(model, dataloaders, criterion, optimizer_ft, num_epochs=num_epochs)

torch.save(trainedModel.state_dict(), modelSave)

Epoch 0/1
----------
Loss: 0.3035
Epoch 1/1
----------
Loss: 0.2996
Training complete in 0m 0s


# **Testing**

In [30]:
model = initialize_model()

model.load_state_dict(torch.load(modelSave))
model.eval()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

dataloaders = getDataLoaders(testFile)
criterion = nn.MSELoss()
testModel(model,dataloaders,criterion)

torch.Size([4, 200])
torch.Size([4, 200])
Loss: 0.00048
