In [None]:
import wikipedia
from collections import defaultdict
import json
import codecs
import os

In [None]:
class Wikisearch(object):
    
    def __init__(self, out_folder, results=20):
        self.folder = out_folder
        self.results = results
        self.queries = []
        self.mapping = defaultdict(lambda: set())
    
    def search(self, query):
        qi = len(self.queries)
        self.queries.append(query)
        results = wikipedia.search(query, results=self.results)
        for result in results:
            try:
                page = wikipedia.page(result)
                self.mapping[qi].add((page.pageid, page.url))
            except wikipedia.exceptions.DisambiguationError as e:
                for option in e.options:
                    page = wikipedia.page(option)
                    self.mapping[qi].add((page.pageid, page.url))
    
    def save(self, content=True):
        """
        If content is set to False, gets page summary
        """
        outdict = {}
        for qi, qt in enumerate(self.queries):
            pages = list(self.mapping[qi])
            outdict[qi] = {'query': qt, 'page_ids': [x[0] for x in pages], 
                           'page_urls': [x[1] for x in pages]}
            for page_id, page_url in pages:
                page = wikipedia.page(pageid=page_id)
                if content:
                    text = page.content
                else:
                    text = page.summary
                with codecs.open(os.sep.join([self.folder, page_id + '.txt']), 'wb', encoding='utf-8') as tout:
                    tout.write(text)
        with codecs.open(os.sep.join([self.folder, 'queries.json']), 'wb', encoding='utf-8') as jout:
            json.dump(outdict, jout)
                
                