Skip to content

Commit

Permalink
Refactored the code with suggested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ayuhsya committed Dec 8, 2016
1 parent d6a7fff commit 1629a9c
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 127 deletions.
108 changes: 108 additions & 0 deletions sample/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import requests
from pprint import pprint
import re


class WikiPage:
def __init__(self, *pageid):
self.base_url = "https://en.wikipedia.org/w/api.php"

self.payload = {
'action': 'query',
'pageids': '|'.join(str(ID) for ID in list(pageid)),
'format': 'json',
}

# Method to fetch categories
def get_categories(self, get_hidden=False):
if self.payload['pageids'] is None:
return []

prop = 'categories'
self.payload['clshow'] = 'hidden' if get_hidden else '!hidden'
self.payload['prop'] = prop

res = requests.get(self.base_url, params=self.payload).json()
cat_list = _strip_JSON(res, prop, 'Category')

while 'continue' in res:
self.payload['clcontinue'] = res['continue']['clcontinue']
res = requests.get(self.base_url, params=self.payload).json()
_append_results(cat_list, res, prop, 'Category')

self.payload['clcontinue'] = None
self.payload['prop'] = None
return cat_list


# Method to fetch images
def get_images(self, imlimit='max',imdir='ascending'):
if self.payload['pageids'] is None:
return []

prop = 'images'
self.payload['prop'] = prop,
self.payload['imlimit'] = imlimit,
self.payload['imdir'] = imdir

res = requests.get(self.base_url, params=self.payload).json()

img_list = _strip_JSON(res, prop, 'File')
while 'continue' in res:
self.payload['imcontinue'] = res['continue']['imcontinue']
res = requests.get(self.base_url, params=self.payload).json()
_append_results(img_list, res, prop, 'File')

self.payload['imcontinue'] = None
self.payload['prop'] = None
return img_list

# Method to fetch linkshere
def get_linkshere(self, lhprop="pageid|title|redirect", lhlimit="max"):
if self.payload['pageids'] is None:
return []

prop = 'linkshere'
self.payload['prop'] = prop
self.payload['lhprop'] = lhprop
self.payload['lhlimit'] = lhlimit

lh_list = {}
res = requests.get(self.base_url, params=self.payload).json()

lh_list = _strip_JSON(res, prop, '_nothing-to-strip_')
while 'continue' in res:
self.payload['lhcontinue'] = res['continue']['lhcontinue']
res = requests.get(self.base_url, params=self.payload).json()
_append_results(lh_list, res, prop, '_nothing-to-strip_')

self.payload['lhcontinue'] = None
self.payload['prop'] = None
return lh_list


def _strip_prop(text, prop):
return re.sub(prop+':', "", text, count=1)

def _strip_JSON(res, prop, strip_chars):
ret = {}
for page_id, page_entry in res['query']['pages'].items():
if prop not in page_entry:
continue
ret[page_id] = [_strip_prop(content['title'], strip_chars) for content in page_entry[prop]]
return ret

def _append_results(currlist, newlist, prop, strip_chars):
ret = {}
ret = _strip_JSON(newlist, prop, strip_chars)
for key in ret:
if key not in currlist:
currlist[key] = []
currlist[key] += ret[key]

if __name__ == "__main__":

wk = WikiPage('843158','20715044')
#pprint(wk.get_categories())
#pprint(wk.get_images())
#pprint(wk.get_linkshere())
45 changes: 0 additions & 45 deletions wiki_cat.py

This file was deleted.

44 changes: 0 additions & 44 deletions wiki_images.py

This file was deleted.

38 changes: 0 additions & 38 deletions wiki_linkshere.py

This file was deleted.

0 comments on commit 1629a9c

Please sign in to comment.