# Insult Detector
Have you ever wondered whether someone is insulting you online? Well now you can find out in one simple neural network!

In [None]:
import numpy as np
import scipy as scp
import json
import torch
from torch import nn

### Reformatting the vocabulary file
Python is very efficient with dictionaries, and we need the "index" of each word in the vocabulary. So, this creates a mapping from word -> index.

In [4]:
# Take the original dictionary of words and index it
def reformat_vocab():
    formatted_vocab = {}
    with open('vocab.json', 'r') as fp:
        vocab = json.load(fp)
        i = 0
        for word in vocab.keys():
            formatted_vocab[word] = i
            i += 1
        

    with open('vocab.json', 'w') as fp:
        json.dump(formatted_vocab, fp, indent=2)

### Data Import and Formatting

We need to import the training text and labels, as well as construct the input matrices for each datum.

In [2]:
import string

def load_vocab():
    with open("vocab.json") as fp:
        vocab = json.load(fp)
    return vocab

def comment_parse(comment):
    # to lower
    comment = comment.lower()
    # remove punctuation
    comment = comment.translate(string.maketrans('','',string.punctuation))

    vocab = load_vocab()
    
    glove = np.zeros(len(comment.split()), 300)
    
    for word in comment:
        if word in vocab:
            glove[i] = vocab[word]
    
    return glove

def load_train():
    labels = np.genfromtxt("data/train.csv"
                           , delimiter=","
                           , dtype=None
                           , skip_header=1
                           , usecols=0 
                          )
    
    train = np.genfromtxt("data/train.csv"
                          , delimiter=","
                          , dtype=None
                          , skip_header=1
                          , usecols=2
                          , converters={2:comment_parse}
                         )
    
    return train, labels

In [None]:
# train, labels = load_train()

In [None]:
def get_device():
    use_gpu = torch.is_cuda_available()

    if use_gpu:
        device = torch.device('cuda')
        print('GPU used')
    else:
        device = torch.device('cpu')
        print('CPU used')
    return device

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        self.fc  = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        batch_size = x.size(0)
        
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.rnn(x, hidden)
        
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden

    def init_hidden(self, size):
        return torch.zeros(self.n_layers, size, self.hidden_dim)

In [None]:
def train():
    model = Model("""TODO""")
    model.to(get_device())
    
    n_epochs = 100
    learning_rate = 0.01
    
    # Loss function & optimization
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    for i in range(1, n_epochs + 1):
        optimizer.zero_grad()
        # send input tensor to device
        # input.to(device)
        output, hidden = model(input_seq)
        loss = criterion(output, target_seq.view(-1).long())
        loss.backward() # Do backprop
        optimizer.step() # Update weights
        
        if epoch%10 == 0:
            print('Epoch: %d/%d.............' % (epoch, n_epochs), end=' ')
            print("Loss: {%.4f}".format(loss.item()))
        
    return model