Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add suggestions route and wiki api functionality #18

Merged
merged 7 commits into from
May 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 123 additions & 1 deletion servlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
except:
cld2 = None

RECAPTCHA_VERIFICATION_URL = 'https://www.google.com/recaptcha/api/siteverify'

try:
import chardet
except:
Expand Down Expand Up @@ -732,6 +734,98 @@ def get(self):
self.send_error(400, explanation='Accept-Language missing from request headers')


class SuggestionHandler(BaseHandler):
wiki_session = None
wiki_edit_token = None
SUGGEST_URL = None
recaptcha_secret = None

@gen.coroutine
def get(self):
self.send_error(405, explanation='GET request not supported')

@gen.coroutine
def post(self):
context = self.get_argument('context', None)
word = self.get_argument('word', None)
newWord = self.get_argument('newWord', None)
langpair = self.get_argument('langpair', None)
recap = self.get_argument('g-recaptcha-response', None)

if not newWord:
self.send_error(400, explanation='A suggestion is required')
return

if not recap:
self.send_error(400, explanation='The ReCAPTCHA is required')
return

if not all([context, word, langpair, newWord, recap]):
self.send_error(400, explanation='All arguments were not provided')
return

logging.info("Suggestion (%s): Context is %s \n Word: %s ; New Word: %s " % (langpair, context, word, newWord))
logging.info('Now verifying ReCAPTCHA.')

if not self.recaptcha_secret:
logging.error('No ReCAPTCHA secret provided!')
self.send_error(400, explanation='Server not configured correctly for suggestions')
return

# for nginx or when behind a proxy
x_real_ip = self.request.headers.get("X-Real-IP")
user_ip = x_real_ip or self.request.remote_ip
payload = {
'secret': self.recaptcha_secret,
'response': recap,
'remoteip': user_ip
}
recapRequest = self.wiki_session.post(RECAPTCHA_VERIFICATION_URL,
data=payload)
if recapRequest.json()['success']:
logging.info('ReCAPTCHA verified, adding data to wiki')
else:
logging.info('ReCAPTCHA verification failed, stopping')
self.send_error(400, explanation='ReCAPTCHA verification failed')
return

from util import addSuggestion
data = {
'context': context, 'langpair': langpair,
'word': word, 'newWord': newWord
}
result = addSuggestion(self.wiki_session,
self.SUGGEST_URL, self.wiki_edit_token,
data)

if result:
self.sendResponse({
'responseData': {
'status': 'Success'
},
'responseDetails': None,
'responseStatus': 200
})
else:
logging.info('Page update failed, trying to get new edit token')
self.wiki_edit_token = wikiGetToken(
SuggestionHandler.wiki_session, 'edit', 'info|revisions')
logging.info('Obtained new edit token. Trying page update again.')
result = addSuggestion(self.wiki_session,
self.SUGGEST_URL, self.wiki_edit_token,
data)
if result:
self.sendResponse({
'responseData': {
'status': 'Success'
},
'responseDetails': None,
'responseStatus': 200
})
else:
self.send_error(400, explanation='Page update failed')


class PipeDebugHandler(BaseHandler):

@gen.coroutine
Expand Down Expand Up @@ -831,6 +925,11 @@ def sanity_check():
parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0)
parser.add_argument('-V', '--version', help='show APY version', action='version', version="%(prog)s version " + __version__)
parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true')
parser.add_argument('-wp', '--wiki-password', help="Apertium Wiki account password for SuggestionHandler", default=None)
parser.add_argument('-wu', '--wiki-username', help="Apertium Wiki account username for SuggestionHandler", default=None)
parser.add_argument('-wd', '--wiki-url', help="Apertium Wiki page to send data to for SuggestionHandler", default='User:Svineet')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Kira-D: As you suggested, we should probably take this out. Adding it to the user page is safer.

# Change default for this ^
parser.add_argument('-rs', '--recaptcha-secret', help="ReCAPTCHA secret for suggestion validation", default=None)
parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached (default = 1000)", type=int, default=1000)
parser.add_argument('-T', '--stat-period-max-age', help="How many seconds back to keep track request timing stats (default = 3600)", type=int, default=3600)
args = parser.parse_args()
Expand Down Expand Up @@ -882,9 +981,32 @@ def sanity_check():
(r'/calcCoverage', CoverageHandler),
(r'/identifyLang', IdentifyLangHandler),
(r'/getLocale', GetLocaleHandler),
(r'/pipedebug', PipeDebugHandler)
(r'/pipedebug', PipeDebugHandler),
(r'/suggest', SuggestionHandler)
])

if all([args.wiki_username, args.wiki_password, args.wiki_url]):
logging.info('Logging into Apertium Wiki with username %s' % args.wiki_username)

requestsImported = False
try:
import requests
requestsImported = True
except ImportError:
logging.error('requests module is required for SuggestionHandler')

if requestsImported:
from wiki_util import wikiLogin, wikiGetToken
SuggestionHandler.SUGGEST_URL = args.wiki_url
SuggestionHandler.recaptcha_secret = args.recaptcha_secret
SuggestionHandler.wiki_session = requests.Session()
SuggestionHandler.auth_token = wikiLogin(
SuggestionHandler.wiki_session,
args.wiki_username,
args.wiki_password)
SuggestionHandler.wiki_edit_token = wikiGetToken(
SuggestionHandler.wiki_session, 'edit', 'info|revisions')

global http_server
if args.ssl_cert and args.ssl_key:
http_server = tornado.httpserver.HTTPServer(application, ssl_options={
Expand Down
28 changes: 28 additions & 0 deletions tools/apertium-recaptcha-test.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Recaptcha test</title>

<script src='https://www.google.com/recaptcha/api.js'></script>
</head>
<body>
<form action="http://localhost:2737/suggest" method="get">
<span>Language pair</span>
<input type="text" name="langpair" placeholder='eng|esp'>
<br>
<span>Context</span>
<input type="text" name='context' placeholder='context'>
<br>
<span>word</span>
<input type="text" name='word' placeholder='word'>
<br>
<span>newword</span>
<input type="text" name='newWord' placeholder='newWord'>


<div class="g-recaptcha" data-sitekey="6LedqOgSAAAAAKEbYmAXZTTahn8H08JuO0b2BmE3"></div>
<input type="submit" value="Submit">
</form>
</body>
</html>
19 changes: 19 additions & 0 deletions util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from subprocess import Popen, PIPE
from datetime import datetime

from wiki_util import wikiGetPage, wikiEditPage, wikiAddText

iso639Codes = {"abk":"ab","aar":"aa","afr":"af","aka":"ak","sqi":"sq","amh":"am","ara":"ar","arg":"an","hye":"hy","asm":"as","ava":"av","ave":"ae","aym":"ay","aze":"az","bam":"bm","bak":"ba","eus":"eu","bel":"be","ben":"bn","bih":"bh","bis":"bi","bos":"bs","bre":"br","bul":"bg","mya":"my","cat":"ca","cha":"ch","che":"ce","nya":"ny","zho":"zh","chv":"cv","cor":"kw","cos":"co","cre":"cr","hrv":"hr","ces":"cs","dan":"da","div":"dv","nld":"nl","dzo":"dz","eng":"en","epo":"eo","est":"et","ewe":"ee","fao":"fo","fij":"fj","fin":"fi","fra":"fr","ful":"ff","glg":"gl","kat":"ka","deu":"de","ell":"el","grn":"gn","guj":"gu","hat":"ht","hau":"ha","heb":"he","her":"hz","hin":"hi","hmo":"ho","hun":"hu","ina":"ia","ind":"id","ile":"ie","gle":"ga","ibo":"ig","ipk":"ik","ido":"io","isl":"is","ita":"it","iku":"iu","jpn":"ja","jav":"jv","kal":"kl","kan":"kn","kau":"kr","kas":"ks","kaz":"kk","khm":"km","kik":"ki","kin":"rw","kir":"ky","kom":"kv","kon":"kg","kor":"ko","kur":"ku","kua":"kj","lat":"la","ltz":"lb","lug":"lg","lim":"li","lin":"ln","lao":"lo","lit":"lt","lub":"lu","lav":"lv","glv":"gv","mkd":"mk","mlg":"mg","msa":"ms","mal":"ml","mlt":"mt","mri":"mi","mar":"mr","mah":"mh","mon":"mn","nau":"na","nav":"nv","nob":"nb","nde":"nd","nep":"ne","ndo":"ng","nno":"nn","nor":"no","iii":"ii","nbl":"nr","oci":"oc","oji":"oj","chu":"cu","orm":"om","ori":"or","oss":"os","pan":"pa","pli":"pi","fas":"fa","pol":"pl","pus":"ps","por":"pt","que":"qu","roh":"rm","run":"rn","ron":"ro","rus":"ru","san":"sa","srd":"sc","snd":"sd","sme":"se","smo":"sm","sag":"sg","srp":"sr","gla":"gd","sna":"sn","sin":"si","slk":"sk","slv":"sl","som":"so","sot":"st","azb":"az","spa":"es","sun":"su","swa":"sw","ssw":"ss","swe":"sv","tam":"ta","tel":"te","tgk":"tg","tha":"th","tir":"ti","bod":"bo","tuk":"tk","tgl":"tl","tsn":"tn","ton":"to","tur":"tr","tso":"ts","tat":"tt","twi":"tw","tah":"ty","uig":"ug","ukr":"uk","urd":"ur","uzb":"uz","ven":"ve","vie":"vi","vol":"vo","wln":"wa","cym":"cy","wol":"wo","fry":"fy","xho":"xh","yid":"yi","yor":"yo","zha":"za","zul":"zu", "hbs":"sh", "arg":"an", "pes":"fa"}
'''
Bootstrapped from https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes using
Expand Down Expand Up @@ -194,3 +196,20 @@ def __init__(self, handler):
self.key = handler.get_argument('key', default = 'null')
self.ip = handler.request.headers.get('X-Real-IP', handler.request.remote_ip)
self.referer = handler.request.headers.get('Referer', 'null')


def addSuggestion(s, SUGGEST_URL, editToken, data):
content = wikiGetPage(s, SUGGEST_URL)
content = wikiAddText(content, data)
editResult = wikiEditPage(s, SUGGEST_URL, content, editToken)

try:
if editResult['edit']['result'] == 'Success':
logging.info('Update of page %s' % (SUGGEST_URL))
return True
else:
logging.error('Update of page %s failed: %s' % (SUGGEST_URL,
editResult))
return False
except KeyError:
return False
84 changes: 84 additions & 0 deletions wiki_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3
import logging
import json

WIKI_API_URL = 'http://wiki.apertium.org/w/api.php'


# Apertium Wiki utility functions
def wikiLogin(s, loginName, password):
try:
payload = {'action': 'login', 'format': 'json', 'lgname': loginName, 'lgpassword': password}
authResult = s.post(WIKI_API_URL, params=payload)
authToken = json.loads(authResult.text)['login']['token']
logging.debug('Auth token: {}'.format(authToken))

payload = {'action': 'login', 'format': 'json', 'lgname': loginName, 'lgpassword': password, 'lgtoken': authToken}
authResult = s.post(WIKI_API_URL, params=payload)
if not json.loads(authResult.text)['login']['result'] == 'Success':
logging.critical('Failed to login as {}: {}'.format(loginName, json.loads(authResult.text)['login']['result']))
else:
logging.info('Login as {} succeeded'.format(loginName))
return authToken
except Exception as e:
logging.critical('Failed to login: {}'.format(e))


def wikiGetPage(s, pageTitle):
payload = {
'action': 'query',
'format': 'json',
'titles': pageTitle,
'prop': 'revisions',
'rvprop': 'content'
}
viewResult = s.get(WIKI_API_URL, params=payload)
jsonResult = json.loads(viewResult.text)

if 'missing' not in list(jsonResult['query']['pages'].values())[0]:
return list(jsonResult['query']['pages'].values())[0]['revisions'][0]['*']


def wikiEditPage(s, pageTitle, pageContents, editToken):
payload = {
'action': 'edit',
'format': 'json',
'title': pageTitle,
'text': pageContents,
'bot': 'True',
'contentmodel': 'wikitext',
'token': editToken
}
editResult = s.post(WIKI_API_URL, data=payload)
jsonResult = json.loads(editResult.text)
return jsonResult


def wikiGetToken(s, tokenType, props):
try:
payload = {
'action': 'query',
'format': 'json',
'prop': props,
'intoken': tokenType,
'titles': 'Main Page'
}
tokenResult = s.get(WIKI_API_URL, params=payload)
token = json.loads(tokenResult.text)['query']['pages']['1']['%stoken' % tokenType]
logging.debug('%s token: %s' % (tokenType, token))
return token
except Exception as e:
logging.error('Failed to obtain %s token: %s' % (tokenType, e))


def wikiAddText(content, data):
if not content:
content = ''

src, dst = data['langpair'].split('|')
content += '\n* {{suggest|%s|%s|%s|%s|%s}}' % (src, dst,
data['context'],
data['word'],
data['newWord'])

return content