# Question Generation

Purpose: Given an article output a list of sentences
1. Parse Article into sentences
2. From each sentence, generate Stanford dependency parse tree
3. From each parse tree, use rule based method to generate question from sentence.
4. Refine the sentences using language models.

### Article -> Sentences

In [3]:
import nltk

In [6]:
with open('data/set1/a1.txt', 'r', encoding="utf-8") as f:
    content = f.read()

In [7]:
sentences = nltk.sent_tokenize(content)
print(sentences)

['Clint Dempsey\n\n\nClinton Drew "Clint" Dempsey /ˈdɛmpsi/ (born March 9, 1983) is an American professional soccer player who plays for Seattle Sounders FC in Major League Soccer and has served as the captain of the United States national team.', 'He has also played for New England Revolution, Fulham and Tottenham Hotspur.', "Growing up in Nacogdoches, Texas, Dempsey played for one of the top youth soccer clubs in the state, the Dallas Texans, before playing for Furman University's men's soccer team.", 'In 2004, Dempsey was drafted by Major League Soccer club New England Revolution, where he quickly integrated himself into the starting lineup.', 'Hindered initially by a jaw injury, he would eventually score 25 goals in 71 appearances with the Revolution.', "Between 2007 and 2012, Dempsey played for Premier League team Fulham and is the club's highest Premier League goalscorer of all time.", 'Dempsey became the first American player to score a hat-trick in the English Premier League, i

### Sentences -> Parse Trees

In [8]:
from nltk.parse.corenlp import CoreNLPServer
from nltk.parse.corenlp import CoreNLPParser
from nltk.parse.corenlp import CoreNLPDependencyParser
import os

In [14]:
STANFORD = "stanford-corenlp-full-2018-10-05"

# Create the server
server = CoreNLPServer(
   os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"),
   os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"),    
)
server.start()

CoreNLPServerError: Could not connect to the server.

Download Stanford Parser: https://nlp.stanford.edu/software/lex-parser.shtml#Download Version 3.9.2

In [10]:
from nltk.tree import Tree
parser = CoreNLPParser()

In [11]:
def list_to_string(word_list):
    return ' '.join(word_list)

def tree_to_string(parsed_tree):
#     if isinstance(parsed_tree, str):
#         return parsed_tree
#     words = []
#     for subtree in parsed_tree:
#         words.append(tree_to_string(subtree))
    return list_to_string(parsed_tree.leaves())

def binary_question_from_tree(parsed_tree):
    sentence = parsed_tree[0]
    assert(sentence.label() == 'S')
    np = sentence[0]
    vp = sentence[1]
    assert(np.label() == 'NP')
    assert(vp.label() == 'VP')
    if vp[0].label() == 'VBZ':
        return list_to_string([vp[0][0].capitalize(), tree_to_string(np), tree_to_string(vp[1])]) + '?'
    return vp[0]

In [12]:
#Sentence Structure Tree
class SST():
    def __init__(self, label, children):
        self.label = label
        self.children = children

#Sentence Structure Leaf
class SSL():
    def __init__(self, label):
        self.label = label
        
simple_predicate = SST('ROOT', [SST('S', [SSL('NP'), SSL('VP'), SSL('.')])])

def satisfies_structure(parsed_tree, structure):
    if isinstance(structure, SSL):
        return parsed_tree.label() == structure.label
    else:
        if parsed_tree.label() != structure.label or len(parsed_tree) != len(structure.children): return False
        for i in range(len(parsed_tree)):
            if satisfies_structure(parsed_tree[i], structure.children[i]) == False:
                return False
        return True

In [13]:
parse_list = []
for sentence in sentences:
    if len(sentence) < 180:
        parse = next(parser.raw_parse(sentence))
        if satisfies_structure(parse, simple_predicate):
            print("=========================== Sentence ======================")
            print(parse)
#             print(parse.label())
            print(sentence) 
            print(binary_question_from_tree(parse))
            parse_list.append(parse)
            

    
    
parse.draw()

(ROOT
  (S
    (NP (PRP He))
    (VP
      (VBZ has)
      (ADVP (RB also))
      (VP
        (VBN played)
        (PP
          (IN for)
          (NP
            (NP (NNP New) (NNP England) (NNP Revolution))
            (, ,)
            (NP (NNP Fulham))
            (CC and)
            (NP (NNP Tottenham) (NNP Hotspur))))))
    (. .)))
He has also played for New England Revolution, Fulham and Tottenham Hotspur.
Has He also?
(ROOT
  (S
    (NP (PRP He))
    (VP
      (VP
        (VBZ has)
        (VP (VBN earned) (PP (IN over) (NP (CD 100) (NNS caps)))))
      (CC and)
      (VP (VBD scored) (NP (CD 48) (JJ international) (NNS goals)))
      (, ,)
      (S
        (VP
          (VBG making)
          (S
            (NP (PRP him))
            (NP
              (NP
                (NP (DT the) (NN nation) (POS 's))
                (JJ sixth-most)
                (JJ capped)
                (NN player))
              (CC and)
              (NP
                (NP (JJ second) (JJ top) (

In [None]:
server.stop()