In [25]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import time
import matplotlib.pyplot as plt
import random
import sys

In [26]:
sys.path.insert(1, '/Users/manjugupta/Desktop/CMU_Courses/Intents/getting_intents/Analysis')

In [27]:
from get_vocab import load_data

In [37]:
#Check if cuda is available
cuda = torch.cuda.is_available()
print('CUDA is', cuda)

num_workers = 8 if cuda else 0

print(num_workers)

CUDA is False
0


In [110]:
language = 'english'
context_size = 2
emb_size = 22 # size of embeddings you want to create
embedding_data = load_data('embedding_data_' + language + '_' + str(context_size) + '.pkl')
vocab_size = len(embedding_data) + 1

print(embedding_data.keys())
print(vocab_size)

dict_keys(['uː', 'p', 'l', 'k', 'ŋ', 'x', 'ʔ', 'v', 'iː', 'n', 'eː', 'a', 'ɻ', 'ɹ', 'pʰ', 'd', 'ʊ', 's', 'f', 'w', 'ʃ', 'ʒ', 'ɡ', 'j', 'e', 'ə', 'ʌ', 'ɒ', 'z', 'æ', 'h', 'tʰ', 'b', 'kʰ', 'ɯ', 'u', 'θ', 'ɑ', 'i', 't', 'ɪ', 'aː', 'ð', 'r', 'm', 'ɔ', 'ɛ', 'o'])
49


In [56]:
feature_vectors = {}        #create a dictionary of feature vectors
for i, phone in enumerate(embedding_data):
    feature_vectors[phone] = [0]*(len(embedding_data) + 1)
    feature_vectors[phone][i] = 1
    
feature_vectors['unk'] = [0]*(len(embedding_data) + 1)
feature_vectors['unk'][len(embedding_data)] = 1

In [106]:
class MyDataset(Dataset):
    def __init__(self, embedding_data, feature_vectors, phone):
        self.label = np.argmax(np.array(feature_vectors[phone]))

        self.data = []
        for context in embedding_data[phone]:
            phone_to_vector = []
            for c in context:
                phone_to_vector.append(feature_vectors[c])
            self.data.append(torch.from_numpy(np.array(phone_to_vector)).float())
            
    def __len__(self):
        return len(self.data)

    def __getitem__(self,index):
        
        return self.data[index], self.label

In [107]:
phone = 'p'
train_dataset = MyDataset(embedding_data, feature_vectors, phone)
train_loader_args = dict(shuffle=True, batch_size=128, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=True, batch_size=32)
train_loader = DataLoader(train_dataset, **train_loader_args)

In [108]:
len(train_loader)

115

In [109]:
for data in train_loader:
    print(data[0].shape, data[1])
    break

torch.Size([32, 4, 49]) tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])


In [122]:
class My_Model(nn.Module):
    def __init__(self, emb_size, vocab_size):
        super(My_Model, self).__init__()
        self.fc1 = nn.Linear(vocab_size, emb_size, bias = False)
        self.fc2 = nn.Linear(emb_size, vocab_size, bias = False)

    def forward(self, x):
        outputs = self.fc1(x[:,0,:])
        for i in range(1, x.shape[1]):
            outputs += self.fc1(x[:,i,:])
                           
        outputs /= outputs/x.shape[1]
                           
        output = self.fc2(outputs)   
        return output

In [123]:
model = My_Model(emb_size, vocab_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)
print(model)
print(optimizer)

My_Model(
  (fc1): Linear(in_features=49, out_features=22, bias=False)
  (fc2): Linear(in_features=22, out_features=49, bias=False)
)
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    weight_decay: 0
)


In [124]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        loss = criterion(outputs, target)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += target.size(0)
        correct_predictions += (predicted == target).sum().item()
    
            
    end_time = time.time()
    
    acc = (correct_predictions/total_predictions)*100.0
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')  
    print('Training Accuracy: ', acc, '%')
    return running_loss

In [125]:
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(test_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc

In [129]:
Train_loss = []
Test_loss = []
Test_acc = []
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, cooldown=5)

for i in range(1):
    print('epoch:', i)
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    Train_loss.append(train_loss)

epoch: 0
Training Loss:  2.685697765390733e-05 Time:  0.13553190231323242 s
Training Accuracy:  100.0 %


In [138]:
model.fc1.weight.shape

torch.Size([22, 49])