# Question Generation

1. Part of Speech Tagger
2. Text Chunking
3. Yes/No Question Generation

# Part Of Speech Tagger

Given a list of words, how can we classify each word to a meaning.

In [1]:
import nltk

#### Installation (In command line):

    pip install nltk
    python -m nltk.downloader averaged_perceptron_tagger
    python -m nltk.downloader tagsets

In [34]:
word_list = ['Los', 'Angeles', 'has', 'a', 'Gyarados', '(', 'ギャラドス', ',', 'Gyaradosu', ',', 'or', ')', 'is', 'a', 'Pokémon', 'species', 'in', 'Nintendo', 'and', 'Game', 'Freak', "'s", 'Pokémon', 'franchise', '.', 'Created', 'by', 'Ken', 'Sugimori', ',', 'Gyarados', 'first', 'appeared', 'in', 'the', 'video', 'games', 'Pokémon', 'Red', 'and', 'Pokemon', 'Green', 'and', 'subsequent', 'sequels', ',', 'later', 'appearing', 'in', 'various', 'merchandise', ',', 'spinoff', 'titles', 'and', 'animated', 'and', 'printed', 'adaptations', 'of', 'the', 'franchise', '.', 'Gyarados', 'is', 'voiced', 'by', 'Unshō', 'Ishizuka', 'in', 'both', 'Japanese', 'and', 'English', 'media', '.']

In [35]:
def pos_tagger(docs):
    return nltk.pos_tag(docs)

In [36]:
tags = pos_tagger(word_list)
print(tags)

[('Los', 'NNP'), ('Angeles', 'NNP'), ('has', 'VBZ'), ('a', 'DT'), ('Gyarados', 'NNP'), ('(', '('), ('ギャラドス', 'NNP'), (',', ','), ('Gyaradosu', 'NNP'), (',', ','), ('or', 'CC'), (')', ')'), ('is', 'VBZ'), ('a', 'DT'), ('Pokémon', 'JJ'), ('species', 'NNS'), ('in', 'IN'), ('Nintendo', 'NNP'), ('and', 'CC'), ('Game', 'NNP'), ('Freak', 'NNP'), ("'s", 'POS'), ('Pokémon', 'NNP'), ('franchise', 'NN'), ('.', '.'), ('Created', 'VBN'), ('by', 'IN'), ('Ken', 'NNP'), ('Sugimori', 'NNP'), (',', ','), ('Gyarados', 'NNP'), ('first', 'RB'), ('appeared', 'VBD'), ('in', 'IN'), ('the', 'DT'), ('video', 'NN'), ('games', 'NNS'), ('Pokémon', 'NNP'), ('Red', 'NNP'), ('and', 'CC'), ('Pokemon', 'NNP'), ('Green', 'NNP'), ('and', 'CC'), ('subsequent', 'JJ'), ('sequels', 'NNS'), (',', ','), ('later', 'RB'), ('appearing', 'VBG'), ('in', 'IN'), ('various', 'JJ'), ('merchandise', 'NN'), (',', ','), ('spinoff', 'NN'), ('titles', 'NNS'), ('and', 'CC'), ('animated', 'VBN'), ('and', 'CC'), ('printed', 'VBN'), ('adaptatio

### Tags

full list here: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

In [37]:
#Run this on a specific tag to get information about it
nltk.help.upenn_tagset("JJ")

JJ: adjective or numeral, ordinal
    third ill-mannered pre-war regrettable oiled calamitous first separable
    ectoplasmic battery-powered participatory fourth still-to-be-named
    multilingual multi-disciplinary ...


# Chunking

Given pos tagged words, we want to chunk certain words like "South" and "Africa" together for greater contextual meaning

#### Installation (In command line):

    python -m nltk.downloader maxent_ne_chunker

In [38]:
#nltk.download('maxent_ne_chunker')
#nltk.download('words')

In [41]:
chunk = nltk.ne_chunk(tags)
print(repr(chunk))

Tree('S', [Tree('GPE', [('Los', 'NNP'), ('Angeles', 'NNP')]), ('has', 'VBZ'), ('a', 'DT'), Tree('GPE', [('Gyarados', 'NNP')]), ('(', '('), Tree('ORGANIZATION', [('ギャラドス', 'NNP')]), (',', ','), Tree('PERSON', [('Gyaradosu', 'NNP')]), (',', ','), ('or', 'CC'), (')', ')'), ('is', 'VBZ'), ('a', 'DT'), ('Pokémon', 'JJ'), ('species', 'NNS'), ('in', 'IN'), Tree('GPE', [('Nintendo', 'NNP')]), ('and', 'CC'), Tree('PERSON', [('Game', 'NNP'), ('Freak', 'NNP')]), ("'s", 'POS'), Tree('ORGANIZATION', [('Pokémon', 'NNP')]), ('franchise', 'NN'), ('.', '.'), ('Created', 'VBN'), ('by', 'IN'), Tree('PERSON', [('Ken', 'NNP'), ('Sugimori', 'NNP')]), (',', ','), Tree('PERSON', [('Gyarados', 'NNP')]), ('first', 'RB'), ('appeared', 'VBD'), ('in', 'IN'), ('the', 'DT'), ('video', 'NN'), ('games', 'NNS'), Tree('PERSON', [('Pokémon', 'NNP'), ('Red', 'NNP')]), ('and', 'CC'), Tree('PERSON', [('Pokemon', 'NNP'), ('Green', 'NNP')]), ('and', 'CC'), ('subsequent', 'JJ'), ('sequels', 'NNS'), (',', ','), ('later', 'RB'),

In [45]:
from nltk import Tree
import json

def tree2dict(tree):
    return {tree.label(): [tree2dict(t) if isinstance(t, Tree) else t for t in tree]}

with open('data.json', 'w+') as f:
    json.dump(tree2dict(chunk), f)

# Yes/No Question Generation

Given tagged and chunked words

In [None]:
#Update stuff