<a href="https://colab.research.google.com/github/ankitk75/DL-Lab/blob/main/question%20generation%20model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook Example of LMQG
This is a colab notebook of [`lmqg`](https://github.com/asahi417/lm-question-generation#lmqg-language-model-for-question-generation-).

In [5]:
!pip install lmqg



In [6]:
from pprint import pprint
from lmqg import TransformersQG

## Question and Answer Generation (End2end)

In [7]:
# initialize model
model = TransformersQG(language='en', model='lmqg/t5-base-squad-qag')



In [8]:
# paragraph to generate pairs of question and answer
context = "William Turner was an English painter who specialised in watercolour landscapes. He is often known as William Turner of Oxford or just Turner of Oxford to distinguish him from his contemporary, J. M. W. Turner. Many of Turner's paintings depicted the countryside around Oxford. One of his best known pictures is a view of the city of Oxford from Hinksey Hill."
# model prediction
question_answer = model.generate_qa(context)
# the output is a list of tuple (question, answer)
pprint(question_answer)

100%|██████████| 1/1 [00:00<00:00, 497.31it/s]


[('Who was an English painter who specialised in watercolour landscapes?',
  'William Turner'),
 ('What is William Turner often known as?',
  'William Turner of Oxford or just Turner of Oxford'),
 ("What did many of Turner's paintings depict?",
  'the countryside around Oxford'),
 ("What is one of Turner's best known pictures?",
  'a view of the city of Oxford from Hinksey Hill')]


## Question and Answer Generation (Multitask)


In [9]:
# initialize model
model = TransformersQG(language='en', model='lmqg/t5-base-squad-qg-ae')

tokenizer_config.json:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/20.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [41]:
# paragraph to generate pairs of question and answer
context = "But on March, offline classes were canceled due to the rising number of COVID- cases on campus. By the first week of April, over students of MIT were infected by the coronavirus due to poor social distancing and lax safety standards in the college. In the last week of May, MIT released a revised schedule for the upcoming months. This was a very chaotic, tough schedule. This caused outrage amongst the students, who were already not pleased with the college due to unpopular decisions taken by them over the course of the pandemic. The protests by the students of MIT were initially ignored by the administration of MIT and MAHE. But, on June, MIT cancelled the end-semester examinations and opted to prorate the marks based on previous semester performances."
# model prediction
question_answer = model.generate_qa(context)
# the output is a list of tuple (question, answer)
pprint(question_answer)

100%|██████████| 7/7 [00:00<00:00, 454.93it/s]
100%|██████████| 7/7 [00:00<00:00, 648.07it/s]


[('Why were offline classes canceled?', 'COVID-'),
 ('Why were students infected with the coronavirus?', 'poor social distancing'),
 ('When did MIT release a revised schedule for the upcoming months?',
  'last week'),
 ('What type of schedule did MIT release in May?', 'very chaotic, tough'),
 ('What did the revised schedule cause amongst the students?', 'outrage'),
 ('Who ignored the protests of the students of MIT?', 'administration'),
 ('What did MIT do after the end of June?',
  'prorate the marks based on previous semester performances.')]


## Question and Answer Generation (Pipeline)

In [11]:
# initialize model
model = TransformersQG(model='lmqg/t5-base-squad-qg', model_ae='lmqg/t5-base-squad-ae')

tokenizer_config.json:   0%|          | 0.00/2.44k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/20.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.54k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/20.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [None]:
# paragraph to generate pairs of question and answer
context = "William Turner was an English painter who specialised in watercolour landscapes. He is often known as William Turner of Oxford or just Turner of Oxford to distinguish him from his contemporary, J. M. W. Turner. Many of Turner's paintings depicted the countryside around Oxford. One of his best known pictures is a view of the city of Oxford from Hinksey Hill."
# model prediction
question_answer = model.generate_qa(context)
# the output is a list of tuple (question, answer)
pprint(question_answer)

100%|██████████| 4/4 [00:00<00:00, 370.21it/s]
100%|██████████| 4/4 [00:00<00:00, 80.88it/s]


[('Who was an English painter who specialised in watercolour landscapes?',
  'William Turner'),
 ('What is another name for William Turner?', 'William Turner of Oxford'),
 ("What did many of William Turner's paintings depict around Oxford?",
  'the countryside'),
 ('From what hill is a view of the city of Oxford taken?', 'Hinksey Hill.')]


## Question Generation

In [None]:
# initialize model
model = TransformersQG(language='en', model='lmqg/t5-base-squad-qg')

In [None]:
# a list of paragraph
context = [
    "William Turner was an English painter who specialised in watercolour landscapes",
    "William Turner was an English painter who specialised in watercolour landscapes"
]
# a list of answer (same size as the context)
answer = [
    "William Turner",
    "English"
]
# model prediction
question = model.generate_q(list_context=context, list_answer=answer)
pprint(question)

100%|██████████| 2/2 [00:00<00:00, 674.43it/s]


['Who was an English painter who specialised in watercolour landscapes?',
 'What nationality was William Turner?']


## Answer Extraction

In [None]:
# initialize model
model = TransformersQG(language='en', model='lmqg/t5-base-squad-ae')

In [None]:
# model prediction
answer = model.generate_a("William Turner was an English painter who specialised in watercolour landscapes")
pprint(answer)

100%|██████████| 1/1 [00:00<00:00, 570.73it/s]


['William Turner']
