## Content
1. [Loading preprocessed data](#1.-Loading-preprocessed-data)  
2. [Solutions](#2.-Solutions)  
    2.1 [Random approach](#2.1-Random-approach)  
    2.2 [String kernels](#2.2-String-kernels)   
      2.2.1 [Spectrum kernel](#2.2.1-Spectrum-kernel)  
      2.2.2 [Presence kernel](#2.2.2-Presence-kernel)  
      2.2.3 [Intersection kernel](#2.2.3-Intersection-kernel)  
    2.3 [Sentence similarity](#2.3-Sentence-similarity)   
3. [Results](#3.-Results)  

## 1. Loading preprocessed data

In [1]:
import json
from pprint import pprint

IN_PATH = '../data/squad/'


def load_data(filename):
    data = []
    with open(filename) as f:
        data = json.load(f)  
        
    return data

In [2]:
train = load_data(IN_PATH + 'train-v1.1-preprocessed.json')
dev = load_data(IN_PATH + 'dev-v1.1-preprocessed.json')

## 2. Solutions

** Let's import evaluation module. **

In [3]:
import evaluation

** Let's define a generic function to run different methods/solutions/approaches. **

In [4]:
def run_method(method, question_words, candidate_answers_words):
    if method == 'random_solution':
        return random_solution(len(candidate_answers_words))
    elif 'kernel' in method:
        return kernel_solution(question_words, candidate_answers_words, method)
    elif 'sentence_similarity' in method:
        return sentence_similarity_solution(question_words, candidate_answers_words)
    
    return None


def run(dataset, method='random_solution'):
    results = {'Method': method, 'Prec@1': [], 'Prec@5': [], 'Prec@10': [],
               'AvgPrec': [], 'MAP': 0}
    idx = 0
    for article in dataset['data']:
        for qas_context in article['paragraphs']:
            # get the number of candidate answers
            nr_candidate_answers = qas_context['nr_candidate_answers']

            for qas in qas_context['qas']:
                # get answers' labels from context
                answer_labels = list()
                for answer in qas['answers']:
                    answer_labels.append(answer['answer_label'])
                    
                # trying to keep the same notation
                question_words = qas['question_words']
                candidate_answers_words = qas_context['context_sentences_words']
                y = answer_labels
                
                # run a method
                y_pred = run_method(method, question_words, candidate_answers_words)
                
                # evaluation
                results['Prec@1'].append(evaluation.precision_at_k(y_pred, y, k=1))
                #results['Prec@5'].append(evaluation.precision_at_k(y_pred, y, k=5))
                #results['Prec@10'].append(evaluation.precision_at_k(y_pred, y, k=10))
                results['AvgPrec'].append(evaluation.average_precision(y_pred, y))
                
                '''
                if idx == 414:
                    print('idx: {}\n'.format(idx))
                    print('question: {}\n'.format(question))
                    print('candidate_answers:')
                    pprint(candidate_answers)
                    print('context length', len(qas_context['context']))
                    print('context_sentences sum(length)', np.sum([len(s) for s in qas_context['context_sentences']]))
                    for answer in qas['answers']:
                        print('answer_start', answer['answer_start'])
                    print('y_pred: {}\n'.format(y_pred))
                    print('y: {}\n'.format(y))
                '''
                
                idx += 1
                
                if idx % 10000 == 0:
                    print('{}'.format(idx))
                
                
                
    # evaluation (MAP - mean average precision)
    results['MAP'] = np.mean(results['AvgPrec'])
    results['StdAP'] = np.std(results['AvgPrec'])
    results['AvgPrec@1'] = np.mean(results['Prec@1'])
    results['StdPrec@1'] = np.std(results['Prec@1'])
    #results['AvgPrec@5'] = np.mean(results['Prec@5'])
    #results['AvgPrec@10'] = np.mean(results['Prec@10'])
    
    return results

### 2.1 Random approach

In [5]:
import numpy as np

def random_solution(n):
    return np.random.permutation(n).tolist()

In [6]:
def write_results(results):
    print('Method: {}'.format(results['Method']))
    print('AvgPrec@1: {} (std = {})'.format(results['AvgPrec@1'], results['StdPrec@1']))
    print('MAP: {} (std = {})'.format(results['MAP'], results['StdAP']))
    print('\n')

In [7]:
train_results = run(train, method='random_solution')
write_results(train_results)

10000
20000
30000
40000
50000
60000
70000
80000
Method: random_solution
AvgPrec@1: 0.247263096611 (std = 0.431420974994)
MAP: 0.49216751562 (std = 0.312040888335)




### 2.2 String kernels

** I will try to use string kernels at word level even if they are used as character-level method. **

** Aux methods **

In [8]:
def num(word, sentence_words):
    count = 0
    for w in sentence_words:
        if word == w:
            count += 1
    
    return count

### 2.2.1 Spectrum kernel

In [9]:
def spectrum_kernel_value(question_words, sentence_words):
    kernel_value = 0
    vocab_inters = set(question_words).intersection(sentence_words)
    
    for word in vocab_inters:
        kernel_value += num(word, question_words) * num(word, sentence_words)
        
    return kernel_value

### 2.2.2 Presence kernel

In [10]:
def presence_kernel_value(question_words, sentence_words):
    kernel_value = 0
    vocab_inters = set(question_words).intersection(sentence_words)
    
    return len(vocab_inters)

### 2.2.3 Intersection kernel

In [11]:
def intersection_kernel_value(question_words, sentence_words):
    kernel_value = 0
    vocab_inters = set(question_words).intersection(sentence_words)
    
    for word in vocab_inters:
        kernel_value += min(num(word, question_words), num(word, sentence_words))
        
    return kernel_value

In [12]:
def kernel_solution(question_words, context_sentences, kernel_type='intersection_kernel'):
    scores = {}
    
    for sentence_label in range(len(context_sentences)):
        kernel_value = 0
        if kernel_type == 'spectrum_kernel':
            kernel_value = spectrum_kernel_value(question_words, context_sentences[sentence_label])
        elif kernel_type == 'presence_kernel':
            kernel_value = presence_kernel_value(question_words, context_sentences[sentence_label])
        elif kernel_type == 'intersection_kernel':
            kernel_value = intersection_kernel_value(question_words, context_sentences[sentence_label])
        
        scores[sentence_label] = kernel_value
        
    labels = sorted(scores, key=scores.get, reverse=True)
    
    return labels

In [13]:
methods = ['spectrum_kernel', 'presence_kernel', 'intersection_kernel']
for method in methods:
    dev_results = run(dev, method)
    write_results(dev_results)

10000
Method: spectrum_kernel
AvgPrec@1: 0.677010406812 (std = 0.46761877195)
MAP: 0.802783152628 (std = 0.282561751016)


10000
Method: presence_kernel
AvgPrec@1: 0.793093661306 (std = 0.405087775306)
MAP: 0.871033907576 (std = 0.244495759616)


10000
Method: intersection_kernel
AvgPrec@1: 0.790350047304 (std = 0.407058779577)
MAP: 0.869128338505 (std = 0.246021270821)




### 2.3 Sentence similarity

** The code for max sentence similarity is taken from this site: https://nlpforhackers.io/wordnet-sentence-similarity/.  
This algorithm is proposed by Mihalcea et al. in the paper [Corpus-based and Knowledge-based Measures
of Text Semantic Similarity](https://www.aaai.org/Papers/AAAI/2006/AAAI06-123.pdf).  
More info about word similarities cand be found here: http://www.nltk.org/howto/wordnet.html. **

In [14]:
from nltk import word_tokenize, pos_tag
from nltk.corpus import wordnet as wn
 

def penn_to_wn(tag):
    """ Convert between a Penn Treebank tag to a simplified Wordnet tag """
    if tag.startswith('N'):
        return 'n'
 
    if tag.startswith('V'):
        return 'v'
 
    if tag.startswith('J'):
        return 'a'
 
    if tag.startswith('R'):
        return 'r'
 
    return None
 

def tagged_to_synset(word, tag):
    wn_tag = penn_to_wn(tag)
    if wn_tag is None:
        return None
 
    try:
        return wn.synsets(word, wn_tag)[0]
    except:
        return None
    
    return None

 
def sentence_similarity(sentence1, sentence2):
    """ compute the sentence similarity using Wordnet """
    # Tokenize and tag
    sentence1 = pos_tag(word_tokenize(sentence1))
    sentence2 = pos_tag(word_tokenize(sentence2))
 
    # Get the synsets for the tagged words
    synsets1 = [tagged_to_synset(*tagged_word) for tagged_word in sentence1]
    synsets2 = [tagged_to_synset(*tagged_word) for tagged_word in sentence2]
 
    # Filter out the Nones
    synsets1 = [ss for ss in synsets1 if ss]
    synsets2 = [ss for ss in synsets2 if ss]
 
    score, count = 0.0, 0
 
    # For each word in the first sentence
    for synset in synsets1:
        # Get the similarity value of the most similar word in the other sentence
        similarities = [synset.wup_similarity(ss) for ss in synsets2]
        best_score = 0
        if similarities:
            best_score = max(similarities)
 
        # Check that the similarity could have been computed
        if best_score is not None:
            score += best_score
            count += 1
 
    # Average the values
    if count > 0:
        score /= count
    
    return score


def symmetric_sentence_similarity(sentence1, sentence2):
    """ compute the symmetric sentence similarity using Wordnet """
    return (sentence_similarity(sentence1, sentence2) + sentence_similarity(sentence2, sentence1)) / 2 

In [15]:
def sentence_similarity_solution(question_words, candidate_answers_words):
    question_sentence = " ".join(question_words)
    scores = {}
    
    for sentence_label in range(len(candidate_answers_words)):
        candidate_answer_sentence = " ".join(candidate_answers_words[sentence_label])
        similarity = symmetric_sentence_similarity(question_sentence, candidate_answer_sentence)
        scores[sentence_label] = similarity
        
    labels = sorted(scores, key=scores.get, reverse=True)
    
    return labels

## 3. Results

** First results **

In [16]:
methods = ['random_solution', 'spectrum_kernel', 'presence_kernel', 'intersection_kernel', 'sentence_similarity']


print('---Results---train_set---')
for method in methods:
    train_results = run(train, method)
    write_results(train_results)


'''
print('---Results---dev_set---')
for method in methods:
    dev_results = run(dev, method)
    write_results(dev_results)
'''

---Results---train_set---
10000
20000
30000
40000
50000
60000
70000
80000
Method: random_solution
AvgPrec@1: 0.243221954589 (std = 0.429028012366)
MAP: 0.489592709539 (std = 0.310652522653)


10000
20000
30000
40000
50000
60000
70000
80000
Method: spectrum_kernel
AvgPrec@1: 0.657781481524 (std = 0.474452320142)
MAP: 0.793677682596 (std = 0.294664462039)


10000
20000
30000
40000
50000
60000
70000
80000
Method: presence_kernel
AvgPrec@1: 0.762463041816 (std = 0.4255739086)
MAP: 0.856392399475 (std = 0.264111287787)


10000
20000
30000
40000
50000
60000
70000
80000
Method: intersection_kernel
AvgPrec@1: 0.759483555748 (std = 0.427397103753)
MAP: 0.85435303237 (std = 0.265727202849)


10000
20000
30000
40000
50000
60000
70000
80000
Method: sentence_similarity
AvgPrec@1: 0.707610817475 (std = 0.454860141656)
MAP: 0.819571496116 (std = 0.288932461455)




"\nprint('---Results---dev_set---')\nfor method in methods:\n    dev_results = run(dev, method)\n    write_results(dev_results)\n"