In [1]:
import os
os.chdir('..')

%load_ext autoreload
%autoreload 2

In [2]:
import torch

from src.main import setup_torch, get_corpus
from src.utils import get_latest_model_file
from src.winograd_schema_challenge import analyse_single_wsc, generate_full_sentences, find_missing_wsc_words_in_corpus_vocab, winograd_test

# from src.html_wsc_parser import join_content, clean_tags_for_schema_and_snippet, get_schema_and_snippet_texts, generate_df

# from src.wsc_parser import generate_df_from_json

In [3]:
import json
import pandas as pd

def generate_df_from_json():
    rows = []
    with open('./data/processed/english_wsc.json', 'r', encoding='utf-8') as fp:
        wsc_json = json.load(fp)

    for i in range(0, len(wsc_json), 2):
        correct_sentence = wsc_json[i]['substitution'] if wsc_json[i]['correctness'] else wsc_json[i+1]['substitution']
        incorrect_sentence = wsc_json[i]['substitution'] if not wsc_json[i]['correctness'] else wsc_json[i+1]['substitution']
        rows.append([correct_sentence, incorrect_sentence])

    df = pd.DataFrame(rows, columns=['correct_sentence', 'incorrect_sentence'])
    
    return df

In [4]:
df = generate_df_from_json()

In [5]:
setup_torch()
device = torch.device("cuda")
corpus = get_corpus()
ntokens = len(corpus.dictionary)

# TODO remove these two lines
# assert ntokens == 602755
# assert corpus.valid.size()[0] == 11606861
assert corpus.train.max() < ntokens
assert corpus.valid.max() < ntokens
assert corpus.test.max() < ntokens

In [14]:
model_file_name = get_latest_model_file()

In [15]:
find_missing_wsc_words_in_corpus_vocab(df, corpus)

['gestured',
 'dowdy',
 'Ollie',
 'snobs',
 'butter',
 'Xenophanes',
 'conveys',
 'axe',
 'strangely',
 'drawer',
 'regretted',
 'jogging',
 'fooling',
 'handy',
 'backpack',
 'pot',
 'auditorium',
 'Sally',
 'Look',
 'bookcase',
 'ca',
 "''",
 'cried',
 'Kamchatka',
 'pronounce',
 'Kamtchatka',
 'dangled',
 'teller',
 'bullying',
 'quicker',
 'admires',
 'compassionate',
 'tellers',
 'half-empty',
 'spoon',
 'subway',
 'juggler',
 'sweater',
 'bassinet',
 'ached',
 'Lucy',
 'Archaeologists',
 'gameboy',
 'trainer',
 'rag',
 'closes',
 'keyhole',
 'passionately',
 'candy',
 'skeptical',
 'cleaned',
 'demonstrators',
 'bunk',
 'tasty',
 'Luckily',
 'flute',
 'Janie',
 'stir',
 'zoomed',
 'Everyone',
 'bathroom',
 '4:30',
 'gotten',
 'smiled',
 'groceries',
 'vindicated',
 'shepherds',
 'leftovers',
 'indiscreet',
 'frantically',
 'crutches',
 'chickens',
 'great-grandfather',
 'con',
 'barking',
 'chatting',
 'refrigerator',
 'slept',
 '``',
 'Terpsichore',
 'Lily',
 'envies',
 'sidewal

In [23]:
df, accuracy = winograd_test(df, corpus, model_file_name, ntokens, device)
print('Acurácia: {} para teste realizado com {} exemplos'.format(accuracy, len(df)))

Acurácia: 0.6959706959706959 para teste realizado com 273 exemplos


In [25]:
df, accuracy = winograd_test(df, corpus, model_file_name, ntokens, device)
print('Acurácia: {} para teste realizado com {} exemplos'.format(accuracy, len(df)))

Most probable:  The city <UNK> refused the <UNK> a permit because The <UNK> feared violence .
Most probable:  The city <UNK> refused the <UNK> a permit because The <UNK> advocated violence .
Most probable:  The trophy does <UNK> fit into the brown <UNK> because the <UNK> is too large .
Most probable:  The trophy does <UNK> fit into the brown <UNK> because the <UNK> is too small .
Most probable:  Joan made sure to <UNK> Susan for all the help Susan had <UNK> .
Most probable:  Joan made sure to <UNK> Susan for all the help Susan had given .
Most probable:  Paul tried to call George on the phone , but George was <UNK> successful .
Most probable:  Paul tried to call George on the phone , but George was <UNK> available .
Most probable:  The lawyer asked the witness a question , but the witness was reluctant to repeat it .
Most probable:  The lawyer asked the witness a question , but the witness was reluctant to answer it .
Most probable:  The delivery truck <UNK> by the school bus because t

In [17]:
len(df[~df.test_result])

83

In [18]:
len(df[df.test_result])

190

In [19]:
len(df)

273

In [72]:
if analyse_single_wsc(model_file_name, corpus, ntokens, device,
                      'A medalha não cabe na maleta porque a maleta é pequena .',
                      'A medalha não cabe na maleta porque a medalha é pequena .'):
    print('Right choice :D')
else:
    print('Wrong :(')

Wrong :(


In [31]:
177/283

0.6254416961130742

In [20]:
df, accuracy = winograd_test(df, corpus, model_file_name, ntokens, device, partial=True)
print('Acurácia: {} para teste realizado com {} exemplos'.format(accuracy, len(df)))

Acurácia: 0.5201465201465202 para teste realizado com 273 exemplos
