# Episode Generation for DeepQA using NLP

In [38]:
from pymongo import MongoClient
import pandas as pd
import nltk

NATMED_CN = "mongodb://localhost:27017"
NATMED_DB = "natmed"
NATMED_COL = "foods"

DEEPQA_DIR = "../Dumps/natmed_dqa"

DIALOGUE_SIZE = 15

EPISODE_TYPES = [
    'SINGLE_FACT',
    'TWO_FACT',
    'THREE_FACT',
    'TWO_ARGS',
    'THREE_ARGS',
    'YES_NO',
    'LISTS',
    'NEGATION',
    'INDEFINITE',
    'BASIC_COFERENCE',
    'CONJUNCTION',
    'COMPOUND',
    'BASIC_DEDUCTION',
    'BASIC_INDUCTION',
    'PATH_FIDING',
    'AGENT_MOTIVATION']

In [2]:
client = MongoClient(NATMED_CN)
db = client[NATMED_DB][NATMED_COL]

In [62]:
class Episode(object):
    def __init__(self, _type):
        self.type = _type
        self.lines = []
        self.counter = 0
    
    def fact(self, fact):
        self.counter += 1
        self.lines.append((self.counter, fact))
        return self.counter
    
    def dialoge(self, question, answer, fact):
        self.counter += 1
        self.lines.append((self.counter, question, answer, fact))
        return self.counter
    
    def __str__(self):
        lines = ["\t".join(map(str,line)) for line in self.lines]
        return "\n".join(lines)

class Narrative(object):
    def __init__(self, name):
        self.name = name
        self.episodes = {}
        
        for t in EPISODE_TYPES:
            self.episodes[t] = []
    
    def episode(self, episode):
        self.episodes[episode.type].append(episode)
    
    def dump(self):
        print("Narrative", self.name)
        for k in self.episodes.keys():
            if len(self.episodes[k]) > 0:
                print("Episode", k)
                [print(e) for e in self.episodes[k]]

In [49]:
nr = Narrative("teste")

ep = Episode("YES_NO")

fid = ep.fact("Teste is cool!")
ep.dialoge("Is Teste cool?", "yes", fid)

nr.episode(ep)

nr.dump()

Narrative teste
Episode YES_NO
1 Teste is cool!
2 Is Teste cool? yes 2


In [58]:
def family_names(limit):
    return db.aggregate([
        { "$project": { "name": 1, "familyName": 1 } },
        { "$limit": limit }
    ])

def family_name_narrative():
    nr = Narrative("family_name")
    single = Episode("SINGLE_FACT")
    yes_no = Episode("YES_NO")
    
    for doc in family_names(15):
        if doc.get('familyName'):
            f_name = " ".join(doc.get('familyName').split("/"))
            
            fid = single.fact("{} is the family name of {}.".format(f_name, doc['name']))
            single.dialoge("What is the family name of {}?".format(doc['name']), f_name, fid)
            
            
    nr.episode(single)
    
    return nr

In [63]:
nr = family_name_narrative()
nr.dump()

Narrative family_name
Episode SINGLE_FACT
1	Polemoniaceae is the family name of Abscess Root.
2	What is the family name of Abscess Root?	Polemoniaceae	1
3	Menispermaceae is the family name of Abuta.
4	What is the family name of Abuta?	Menispermaceae	3
5	Fabaceae Leguminosae is the family name of Acacia.
6	What is the family name of Acacia?	Fabaceae Leguminosae	5
7	Fabaceae is the family name of Acacia rigidula.
8	What is the family name of Acacia rigidula?	Fabaceae	7
9	Arecaceae Palmae is the family name of Acai.
10	What is the family name of Acai?	Arecaceae Palmae	9
11	Malpighiaceae is the family name of Acerola.
12	What is the family name of Acerola?	Malpighiaceae	11
13	Sapindaceae is the family name of Ackee.
14	What is the family name of Ackee?	Sapindaceae	13
15	Ranunculaceae is the family name of Aconite.
16	What is the family name of Aconite?	Ranunculaceae	15
17	Cyperaceae is the family name of Adrue.
18	What is the family name of Adrue?	Cyperaceae	17
19	Hypoxidaceae or Liliaceae

In [65]:
q = family_names(10)