In [1]:
# Importing libraries
import json
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import os
from configuration import Configuration
from configuration import CONSTANTS as C
# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
from nltk.translate.bleu_score import sentence_bleu
from rich.table import Column, Table
from rich import box
from rich.console import Console
from tensorboardX import SummaryWriter
import time
from torch import cuda

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [9]:
with open(os.path.join(C.DATA_DIR, "distractor/race_test_original.json"), 'r') as content_file:
    content = content_file.read()
content=content.replace('\n',',')
content='['+content[:-1]+']'
records = json.loads(content)
records=pd.DataFrame(records)

records=records.assign(question=records.question.str.join(' '))
records=records.assign(distractor=records.distractor.str.join(' '))
records=records.assign(article=records.article.str.join(' '))
records=records.assign(answer_text=records.answer_text.str.join(' '))

In [10]:
records.shape

(6884, 13)

In [11]:
records.question.nunique()

5779

In [5]:
records.question.nunique()

41505

In [19]:
records[records.distractor.str.lower().str.contains('true')].distractor.value_counts().reset_index().iloc[:499]

Unnamed: 0,index,distractor
0,"To test whether the saying "" money is the root...",1
1,The True Fact of Pearl Harbor .,1
2,A true friend will stand by you whatever happe...,1
3,have true love,1
4,To teach us how to make true friends .,1
5,Hold a true friend with both your hands .,1
6,Something blue reminds the bride to be true to...,1
7,The true happiness in the writer 's present li...,1
8,True friendship between them .,1
9,What a true friend is like .,1


In [20]:
records.distractor.value_counts().reset_index().iloc[:499]

Unnamed: 0,index,distractor
0,a forest fire created a black cloud over the city,2
1,The movie has made the Japanese angry .,2
2,laugh all the time,2
3,She wanted to remind her father of the stories...,2
4,baby boys are much more active,2
5,green light,2
6,will take fewer risks in pursuing happiness,2
7,Adventures of children,2
8,It rented more bikes to tourists .,2
9,Women are fonder of driving than men .,2


In [None]:
remove=['All of the above .','all of the above','Both A and B','All the above .']

In [24]:
records.answer_text.value_counts().reset_index().iloc[:499].values

array([['All of the above .', 11],
       ['all of the above', 9],
       ['Wild time for children', 6],
       ['The movie will cause a lot of arguments .', 6],
       ['More research should be made before the technology comes into wide use .',
        6],
       ['wild time is beneficial for children', 6],
       ["The author 's memoir has proved to be very popular and successful .",
        6],
       ['Both A and B', 6],
       ['All the above .', 6],
       ['It saw its bike sales on the rise .', 6],
       ['She hoped those things would bring happiness to her father .',
        6],
       ['What makes the Ice Hotel special', 6],
       ["Joan 's friends visit her more often than she can accept", 5],
       ['the government ignored the smog', 5],
       ['The author was inspired to help others by the Mexican family .',
        5],
       ['find out early about an earthquake', 4],
       ['how photography was developed', 4],
       ['smile and even laugh often', 4],
       ['how an

In [21]:
records.answer_text.value_counts().reset_index().iloc[:499]

Unnamed: 0,index,answer_text
0,All of the above .,11
1,all of the above,9
2,Wild time for children,6
3,The movie will cause a lot of arguments .,6
4,More research should be made before the techno...,6
5,wild time is beneficial for children,6
6,The author 's memoir has proved to be very pop...,6
7,Both A and B,6
8,All the above .,6
9,It saw its bike sales on the rise .,6


In [11]:
records.answer_text.value_counts().reset_index().iloc[:499]

Unnamed: 0,index,answer_text
0,All of the above .,57
1,all of the above,40
2,both A and B,29
3,both B and C,27
4,"A , B and C",22
5,All of the above,18
6,parents,15
7,both A and C,15
8,All the above .,14
9,Both A and B,12


In [5]:
records

Unnamed: 0,article,sent,question,distractor,answer_text,id
0,Last week I talked with some of my students ab...,"[[Last, week, I, talked, with, some, of, my, s...","According to the passage , the author believes...",it 's right for graduates to ask for others to...,media are to blame for misleading young people...,"{'file_id': 19088, 'question_id': 0, 'distract..."
1,Last week I talked with some of my students ab...,"[[Last, week, I, talked, with, some, of, my, s...",Which ' s the best title for the passage ? .,Young Graduates ' Opinion About Cosmetic Surgery,Young Graduates Look to Surgery for Better Jobs,"{'file_id': 19088, 'question_id': 1, 'distract..."
2,"YUZHOU , HENAN -An accident in a central China...","[[YUZHOU, ,, HENAN, -An, accident, in, a, cent...",What could be the best title for this passage ?,Death Toll Rises in an Accident in China,A Coal Mine Accident in Central China,"{'file_id': 15596, 'question_id': 0, 'distract..."
3,"YUZHOU , HENAN -An accident in a central China...","[[YUZHOU, ,, HENAN, -An, accident, in, a, cent...",What could be the best title for this passage ?,An Accident in Central China,A Coal Mine Accident in Central China,"{'file_id': 15596, 'question_id': 0, 'distract..."
4,"YUZHOU , HENAN -An accident in a central China...","[[YUZHOU, ,, HENAN, -An, accident, in, a, cent...",What could be the best title for this passage ?,Coal Mine Accidents in China,A Coal Mine Accident in Central China,"{'file_id': 15596, 'question_id': 0, 'distract..."
...,...,...,...,...,...,...
96496,People usually talk about two groups...,"[[People, , usually, , talk, , about, , tw...",The best title for this passage shou...,Warm colours and cool colours,People and colours,"{'file_id': 867, 'question_id': 3, 'distractor..."
96497,People usually talk about two groups...,"[[People, , usually, , talk, , about, , tw...",The best title for this passage shou...,Sociable people like warm colours,People and colours,"{'file_id': 867, 'question_id': 3, 'distractor..."
96498,People usually talk about two groups...,"[[People, , usually, , talk, , about, , tw...",The best title for this passage shou...,Places and colours,People and colours,"{'file_id': 867, 'question_id': 3, 'distractor..."
96499,"One day in the eighth grade , I was taking a M...","[[One, day, in, the, eighth, grade, ,, I, was,...","From the passage , we know that the writer",got a good grade at last,did n't cheat at last,"{'file_id': 3886, 'question_id': 0, 'distracto..."
