In [None]:
import numpy as np
import nltk
import math
import sys, os

In [None]:
def read_data(s1, s2, labels):
    f1 = open(s1)
    f2 = open(s2)
    lab = open(labels)
    sent1 = []
    for line in f1:
        sent1.append(line.replace(" \n", "").split(" "))
    sent2 = []
    for line in f2:
        sent2.append(line.replace(" \n", "").split(" "))
    labs = []
    for line in lab:
        labs.append(line.replace("\n", ""))
    f1.close()
    f2.close()
    lab.close()
    return sent1, sent2, labs

s1, s2, labels = read_data("s1.train", "s2.train", "labels.train")


In [None]:
# Is Data Balanced?
label_counts = {}
for label in labels:
    if not label in label_counts:
        label_counts[label] = 0
    label_counts[label] += 1
print "label_counts: ", label_counts

In [None]:
# Preprocess
num_sent = len(s1)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats
import seaborn as sns
import torch
import nltk
nltk.download("punkt")
import torch
import pickle
import regrFuncs as rF
import testFuncs as tF
import random

GLOVE_PATH = './Downloads/glove.840B.300d.txt'
MODEL_PATH = './Downloads/infersent.allnli.pickle'
REGR_MODEL_PATH = './models/'
EMBED_STORE = None
TEST_OUT_PATH = './regout/'
DATA_PATH = './Downloads/SNLI/true/'

outpaths = {'REGR_MODEL_PATH': REGR_MODEL_PATH, 'TEST_OUT_PATH': TEST_OUT_PATH}


id2label = {0:'CONTRADICTION', 1:'NEUTRAL', 2:'ENTAILMENT'}
label2id = {'CONTRADICTION': 0, 'NEUTRAL':1, 'ENTAILMENT':2}

print "MODEL_PATH=", MODEL_PATH
print "cwd=", os.getcwd()

model = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)
model.use_cuda = False
model.set_glove_path(GLOVE_PATH)
model.build_vocab_k_words(K=100000)

names = ['InferSent', 'BOW']
classifiers = [ 'LogReg']
all_regs = {}
for name in names:
    for classifier in classifiers:
        all_regs[name+classifier] = pickle.load(open('{0}{1}'.format(outpaths['REGR_MODEL_PATH'], name+classifier), 'rb'))

def print_preds(sent_a, sent_b, verbose = True, names = names, classifiers = classifiers):
    vals = {}
    for name in names:
        for classifier in classifiers:
            A, B = rF.embed(model, sent_a, 1, name), rF.embed(model, sent_b, 1, name)
            pred, conf = tF.predict(A, B, all_regs[name+classifier])
            if verbose:
                print('*'*20)
                print(name, classifier)
                print('*'*20, '\n')
            vals[name + classifier] = {}
            vals[name + classifier]['pred'] = []
            vals[name + classifier]['conf'] = []
            for i in range(len(A)):
                if verbose:
                    print('A: ', sent_a[i], '\t B: ', sent_b[i])
                    print(id2label[pred[i]], conf[i][pred[i]]*100)
                    print('\n')
                vals[name + classifier]['pred'].append(id2label[pred[i]])
                vals[name + classifier]['conf'].append(conf[i][pred[i]]*100)
            vals[name + classifier]['pred'] = np.array(vals[name + classifier]['pred'])
            vals[name + classifier]['conf'] = np.array(vals[name + classifier]['conf'])
            if verbose:
                print('\n\n')
    return vals

from sets import Set
def overlap_ratio(sent1, sent2):
    sent1 = [word.lower() for word in sent1]
    sent2 = [word.lower() for word in sent2]
    word1 = Set(sent1)
    word2 = Set(sent2)
    cnt = 0
    for word in sent2:
        if word in word1:
            cnt += 1
    for word in sent1:
        if word in word2:
            cnt += 1
    return 1.0 * cnt / (len(sent1) + len(sent2))

overlaps = []
for i in range(num_sent):
    overlaps.append((i, overlap_ratio(s1[i], s2[i])))

overlaps = sorted(overlaps, key=lambda p:-p[1])
overlap_indices = [p[0] for p in overlaps]


In [None]:
# why high overlap = relevance?
def general_stats(indices, k=5):
    counter = {"entailment": 0, "contradiction":0, "neutral":0}
    c_indices = []
    for ind in indices:
        counter[labels[ind]] += 1
        if labels[ind] == "contradiction":
            c_indices.append(ind)
    print "Top %d:" % len(indices)
    
    for l in counter:
        c = counter[l]
        print "%s:%d (%.3lf)" % (l, c, 1.0 * c / len(indices))
    
    for i in range(k):
        ind = np.random.choice(indices)
        print "    SAMPLE\n    s1=%s\n    s2=%s\n    label=%s" % (" ".join(s1[ind]), " ".join(s2[ind]), labels[ind])
general_stats(overlap_indices)
general_stats(overlap_indices[:1000])
general_stats(overlap_indices[:10000])

In [None]:
# Contradiction Analysis
def contradiction_analysis(indices, predicts=None):
    true_c, predict_c, match_c = 0, 0, 0
    
    subset_s1 = [" ".join(s1[ind]) for ind in indices]
    subset_s2 = [" ".join(s2[ind]) for ind in indices]
    if predicts is None:
        predicts = print_preds(subset_s1, subset_s2)
    
    for i, ind in enumerate(indices):
        if labels[ind] == "contradiction":
            true_c += 1
        if predicts["InferSentLogReg"]["pred"][i] == "CONTRADICTION":
            predict_c += 1
            if labels[ind] == "contradiction":
                match_c += 1
    print "True contradictions: %d / %d (%.3lf)" % (true_c, len(indices), 1.0 * true_c / len(indices))
    print "Predicted contradictions: %d / %d (%.3lf)" % (predict_c, len(indices), 1.0 * predict_c / len(indices))
    print "How many true contradictions are predicted correctly? %d / %d ( %.3lf)" % (match_c, true_c, 1.0 * match_c / true_c)
    print "How many predicted contradictions are true? %d / %d (%.3lf)" % (match_c, predict_c, 1.0 * match_c / predict_c)
    return predicts

In [None]:
predicts = contradiction_analysis(overlap_indices[:1000])

In [None]:
import random
import numpy as np
def sample_data_point(indices, label):
    # not entirely random at the moment
    ind = np.random.choice(indices)
    while (labels[ind] != label):
        ind = np.random.choice(indices)
    return "[%d]%s\n%s\n%s\n" % (ind, labels[ind], " ".join(s1[ind]), " ".join(s2[ind]))

In [None]:
print sample_data_point(overlap_indices[:1000], "contradiction")
print sample_data_point(overlap_indices[:1000], "contradiction")

print sample_data_point(overlap_indices[:1000], "entailment")
print sample_data_point(overlap_indices[:1000], "entailment")


In [None]:
def sample_data_point_v2(indices, predicts, k=10):
    selected_indices = []
    for i, ind in enumerate(indices):
        if labels[ind] == "contradiction" and predicts["InferSentLogReg"]["pred"][i] == "CONTRADICTION":
            selected_indices.append(i)
    for t in range(k):
        i = np.random.choice(selected_indices)
        ind = indices[i]
        print "P(C_pred | C_true) SAMPLE %d:" % t
        print "    %s\n    %s\npredict=%s\n" % (" ".join(s1[ind]), " ".join(s2[ind]), predicts["InferSentLogReg"]["pred"][i])
    
    selected_indices = []
    for i, ind in enumerate(indices):
        if labels[ind] == "contradiction" and predicts["InferSentLogReg"]["pred"][i] != "CONTRADICTION":
            selected_indices.append(i)
    for t in range(k):
        i = np.random.choice(selected_indices)
        ind = indices[i]
        print "P(not C_pred | C_true) SAMPLE %d:" % t
        print "    %s\n    %s\npredict=%s\n" % (" ".join(s1[ind]), " ".join(s2[ind]), predicts["InferSentLogReg"]["pred"][i])





In [None]:
sample_data_point_v2(overlap_indices[:1000], predicts)

In [None]:
# Negation Analysis
negation_words_basic = ["no", "not", "didn't", "don't", "doesn't", "n't", ]
negation_words_advanced = negation_words_basic + ["neither", "none", "never", "nobody", "nothing", "nowhere", "neither", "little", "hardly", "few", "rarely"]
""" check if there's how many negation sentence is contradiction"""
from sets import Set

def has_negation(sent1, sent2, negation_words):
    sent = Set(sent1) | Set(sent2)
    return len(Set(negation_words) & sent) != 0
    
def negation_words_analysis(indices, negation_words, label="contradiction"):
    # negation / contradictions
    
    
    total = 0
    counter = 0
    for ind in indices:
        if " ".join(s1[ind]) == " ".join(s2[ind]):
            continue
        if labels[ind] == label:
            total += 1
            if has_negation(s1[ind], s2[ind], negation_words):
                counter += 1
            #else:
                #print "s1=", s1[ind]
                #print "s2=", s2[ind]
                #print "labels=", labels[ind]
                #print "\n\n"
    print "negation/%s: %d/%d (%.3lf) %s pairs have negations" % (label, counter, total, 1.0*counter/total, label)
    # contradictions / negations
    total, counter = 0, 0
    for ind in indices:
        if " ".join(s1[ind]) == " ".join(s2[ind]):
            continue

        if has_negation(s1[ind], s2[ind], negation_words):
            total += 1
            if labels[ind] == label:
                counter += 1
    print "%s/negation: %d/%d (%.3lf) negation pairs are %s" % (label, counter, total, 1.0*counter/total, label)

print "high overlap/relevancy top 10000"
negation_words_analysis(overlap_indices[:10000], negation_words_advanced)
negation_words_analysis(overlap_indices[:10000], negation_words_advanced, label="entailment")

print "overall"
negation_words_analysis(overlap_indices, negation_words_advanced)
negation_words_analysis(overlap_indices, negation_words_advanced, label="entailment")


In [None]:
def negation_words_analysis_infersent(indices, negation_words, label="CONTRADICTION", predicts=None):
    # negation / contradictions
    if predicts == None:
        subset_s1 = [" ".join(s1[ind]) for ind in indices]
        subset_s2 = [" ".join(s2[ind]) for ind in indices]
        predicts = print_preds(subset_s1, subset_s2)

    total = 1
    counter = 0
    for i, ind in enumerate(indices):
        if " ".join(s1[ind]) == " ".join(s2[ind]):
            continue
        if predicts["InferSentLogReg"]["pred"][i] == label:
            total += 1
            if has_negation(s1[ind], s2[ind], negation_words):
                counter += 1
            #else:
                #print "s1=", s1[ind]
                #print "s2=", s2[ind]
                #print "labels=", labels[ind]
                #print "\n\n"
    print "negation/%s: %d/%d (%.3lf) %s pairs have negations" % (label, counter, total, 1.0*counter/total, label)
    # contradictions / negations
    total, counter = 1, 0
    for i, ind in enumerate(indices):
        if " ".join(s1[ind]) == " ".join(s2[ind]):
            continue

        if has_negation(s1[ind], s2[ind], negation_words):
            print "HAS NEGATION !!!!!!!!!!!!!!!!!!!!!!!!! \n%s\n%s\npredict=%s, true=%s\n" % (s1[ind], s2[ind], predicts["InferSentLogReg"]["pred"][i], labels[ind])
            total += 1
            if predicts["InferSentLogReg"]["pred"][i] == label:
                counter += 1
    print "%s/negation: %d/%d (%.3lf) negation pairs are %s" % (label, counter, total, 1.0*counter/total, label)
    
    return predicts


In [None]:
print "high overlap/relevancy top 1000"

predicts = negation_words_analysis_infersent(overlap_indices[:1000], negation_words_basic)
_ = negation_words_analysis_infersent(overlap_indices[:1000], negation_words_basic, "ENTAILMENT", predicts)


In [None]:
_ = negation_words_analysis_infersent(overlap_indices[:1000], negation_words_basic, predicts=predicts)
_ = negation_words_analysis_infersent(overlap_indices[:1000], negation_words_basic, label="ENTAILMENT", predicts=predicts)


In [None]:
"""How many contradictory pair of sentences has antonyms + print out importance of those antonyms"""
import nltk
from nltk.corpus import wordnet
from sets import Set
from tqdm import tqdm 
def get_antonym_set(word):
    antonyms = Set([])
    for syn in wordnet.synsets(word):
        for l in syn.lemmas():
            if l.antonyms():
                antonyms.add(l.antonyms()[0].name())
    return antonyms
def antonym_analysis(indices, label="contradiction", imp_sample=10):
    sum_antonym = 0
    counter = 0
    total = 0
    total_ant = 0
    for ind in tqdm(indices):
        if labels[ind] == label:
            total += 1
        cur_antonym = 0
        
        
        for word in s1[ind]:
            antonym = get_antonym_set(word)
            if len(Set(s2[ind]) & antonym) != 0:
                cur_antonym += 1
                
                if labels[ind] == label:
                    print "ind=%d\ns1:%s\ns2:%s\nAntonym: %s, %s" % (ind, " ".join(s1[ind]), " ".join(s2[ind]), word, Set(s2[ind]) & antonym)
        if labels[ind] == label:
            counter += int(cur_antonym > 0)
            sum_antonym += cur_antonym
            
        total_ant += int(cur_antonym > 0)

        
        """
        if imp_sample != 0 and cur_antonym > 0:
            print "label=", labels[ind]
            model.visualize(" ".join(s1[ind]), tokenize=True)
            model.visualize(" ".join(s2[ind]), tokenize=True)
            _, _, y1 = external_visualize(model," ".join(s1[ind]), tokenize=True,output_y=True)
            _, _, y2 = external_visualize(model," ".join(s2[ind]), tokenize=True,output_y=True)
            imp = 0
            for idx, word in enumerate(s1[ind]):
                antonym = get_antonym_set(word)
                if len(Set(s2[ind]) & antonym) != 0:
                    imp += y1[idx]
                    for word2 in s2[ind]:
                        if word2 in antonym:
                            print "find (%s, %s)" % (word, word2)
            imp /= cur_antonym
            print "avg imp of antonyms (in s1): %.3lf" % (imp)
            imp_sample -= 1
        """
        
    print "avg antonym # for given type sentence: %.5lf" % (sum_antonym * 1.0 / total)
    print "antonym/%s: percantage of given type sentence to have antonyms: %d/%d (%.5lf)" % (label, counter, total, 1.0 * counter / total)
    print "%s/antonym: percentage of antonyms pairs sentences to be %s: %d/%d(%.5lf)" % (label, label, counter, total_ant, 1.0 * counter / total_ant)

In [None]:
antonym_analysis(overlap_indices[:1000])
#antonym_analysis(overlap_indices[:1000], label="entailment")

In [None]:
# TODO
antonym_analysis(overlap_indices)
antonym_analysis(overlap_indices, label="entailment")

In [None]:
def antonym_analysis_infersent(indices, predicts, label="CONTRADICTION", imp_sample=10):
    sum_antonym = 0
    counter = 0
    total = 0
    total_ant = 0
    for i, ind in tqdm(enumerate(indices)):
        if predicts["InferSentLogReg"]["pred"][i] == label:
            total += 1
        cur_antonym = 0
        
        
        for word in s1[ind]:
            antonym = get_antonym_set(word)
            if len(Set(s2[ind]) & antonym) != 0:
                cur_antonym += 1
                
                if predicts["InferSentLogReg"]["pred"][i] == label:
                    print "ind=%d\ns1:%s\ns2:%s\nAntonym: %s, %s" % (ind, " ".join(s1[ind]), " ".join(s2[ind]), word, Set(s2[ind]) & antonym)
        if predicts["InferSentLogReg"]["pred"][i] == label:
            counter += int(cur_antonym > 0)
            sum_antonym += cur_antonym
            
        total_ant += int(cur_antonym > 0)

        
        """
        if imp_sample != 0 and cur_antonym > 0:
            print "label=", labels[ind]
            model.visualize(" ".join(s1[ind]), tokenize=True)
            model.visualize(" ".join(s2[ind]), tokenize=True)
            _, _, y1 = external_visualize(model," ".join(s1[ind]), tokenize=True,output_y=True)
            _, _, y2 = external_visualize(model," ".join(s2[ind]), tokenize=True,output_y=True)
            imp = 0
            for idx, word in enumerate(s1[ind]):
                antonym = get_antonym_set(word)
                if len(Set(s2[ind]) & antonym) != 0:
                    imp += y1[idx]
                    for word2 in s2[ind]:
                        if word2 in antonym:
                            print "find (%s, %s)" % (word, word2)
            imp /= cur_antonym
            print "avg imp of antonyms (in s1): %.3lf" % (imp)
            imp_sample -= 1
        """
    print "avg antonym # for given type sentence: %.5lf" % (sum_antonym * 1.0 / total)
    print "antonym/%s: percantage of given type sentence to have antonyms: %d/%d (%.5lf)" % (label, counter, total, 1.0 * counter / total)
    print "%s/antonym: percentage of antonyms pairs sentences to be %s: %d/%d(%.5lf)" % (label, label, counter, total_ant, 1.0 * counter / total_ant)

In [None]:
antonym_analysis_infersent(overlap_indices[:1000], predicts)
antonym_analysis_infersent(overlap_indices[:1000], predicts, label="ENTAILMENT")

In [None]:
print predicts