# Bias Test Submission

In [None]:
import torch 
from torch.utils.data import dataset
import torch.nn as nn
import os

## Load the Pytorch Model

In [None]:
weights_in_dir = ('saved_weights.pt' in os.listdir())
if not weights_in_dir:
  print("Please add your model as saved_weights.pt to the current working directory before proceeding.")
else:
  print("saved_weights.pt is in the current working directory. You may proceed.")

saved_weights.pt is in the current working directory. You may proceed.


In [None]:
path= 'saved_weights.pt'
weights = torch.load(path)

## Define and instantiate the model
This will be presented to the submitter as a stub. They can fill it in as they see fit

In [None]:
class classifier(nn.Module):
    
    #define all the layers used in model
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout):
        
        #Constructor
        super().__init__()          
        
        #embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        #lstm layer
        self.lstm = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout,
                           batch_first=True)
        
        #dense layer
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        
        #activation function
        self.act = nn.Sigmoid()
        
    def forward(self, text, text_lengths):
        
        #text = [batch size,sent_length]
        embedded = self.embedding(text)
        #embedded = [batch size, sent_len, emb dim]
      
        #packed sequence
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths,batch_first=True)
        
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        #hidden = [batch size, num layers * num directions,hid dim]
        #cell = [batch size, num layers * num directions,hid dim]
        
        #concat the final forward and backward hidden state
        hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
                
        #hidden = [batch size, hid dim * num directions]
        dense_outputs=self.fc(hidden)

        #Final activation function
        outputs=self.act(dense_outputs)
        
        return outputs

## Set hyperparameters
Again this will be presented to the submitter as a stub

In [None]:
def instantiate_model():
  size_of_vocab = weights["embedding.weight"].size()[0]
  embedding_dim = 100
  num_hidden_nodes = 32
  num_output_nodes = 1
  num_layers = 2
  bidirection = True
  dropout = 0.2

  #instantiate the model
  model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers, 
                    bidirectional = True, dropout = dropout)
  return model

In [None]:
model = instantiate_model()
print(model)

classifier(
  (embedding): Embedding(13906, 100)
  (lstm): LSTM(100, 32, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
  (act): Sigmoid()
)


## Define a preprocessing function

In [None]:
# import any packages required for preprocessing 

from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
import string
from django.core.validators import URLValidator
from django.core.exceptions import ValidationError

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
def preprocess(input_string,stop_words=None,punct_strip=None):
  def is_url(token):
    val = URLValidator()
    try:
        val(token)
        return True
    except ValidationError:
        return False

  if stop_words == None:
    stop_words = set(stopwords.words('english')) 
  if punct_strip == None:
    punct_strip = str.maketrans('', '', string.punctuation)

  # tokenize
  input_string = input_string.split(" ")
  final_input_string = []

  for token in input_string:
    # lower case
    token = token.lower()

    # remove stop words, mentions, urls
    if token and (token in stop_words or token[0] == '@' or is_url(token)):
      continue # do not append it to final_tweet


    # remove punctuation 
    token = token.translate(punct_strip)

  
    
    if len(token) > 0:
      final_input_string.append(token)

  return " ".join(final_input_string)


In [None]:
#Test preprocessor 
preprocess("hello @mr http://hello.com hello Hello!")

'hello hello hello'

## Define a prediction function

In [None]:

import spacy
nlp = spacy.load('en')
TEXT = data.Field(tokenize='spacy',batch_first=True,include_lengths=True)
LABEL = data.LabelField(dtype = torch.float,batch_first=True)
fields = [('text',TEXT),('label', LABEL)]
train_data=data.TabularDataset(path = '/content/drive/My Drive/PyTorchData/train.csv',format = 'csv',fields = fields,skip_header = True)
TEXT.build_vocab(train_data,min_freq=0,vectors = "glove.6B.100d")  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  

def predict(model, sentence):
    sentence = preprocess(sentence)
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]  #tokenize the sentence 
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]          #convert to integer sequence
    length = [len(indexed)]                                    #compute no. of words
    tensor = torch.LongTensor(indexed).to(device)              #convert to tensor
    tensor = tensor.unsqueeze(1).T                             #reshape in form of batch,no. of words
    length_tensor = torch.LongTensor(length)                   #convert to tensor
    prediction = model(tensor, length_tensor)                  #prediction 
    prediction = prediction[0].item()
    ### Quantitative to Class Name
    if prediction < .3:
      return "Unintelligent"
    elif prediction < .6:
      return "Moderately Intelligent"
    else:
      return "Intelligent"

## Define your output classes in social impact order

In [None]:
social_impact_order = ["Unintelligent","Moderately Intelligent","Intelligent"]

## Test to see the prediction function returns an output class

In [None]:
print("Running model on an example short text.")
result = predict(model,"hello @hello https://hello.com what's up") in social_impact_order

print("Prediction class is listed in social_impact_order: " + str(result))

Running model on an example short text
Prediction class is listed in social_impact_order: True
