From a4e89663c351c60828b2b3f429fe990501e25915 Mon Sep 17 00:00:00 2001 From: Kevin Brubeck Unhammer Date: Wed, 24 Aug 2016 10:53:17 +0200 Subject: [PATCH] travis: py3.2 doesn't seem to support tornado>4.3 so use tornado 4.3 when python 3.2, otherwise newest tornado --- .travis.yml | 4 +- servlet.py | 130 ++++++++++++++++++++++++++++- tools/apertium-recaptcha-test.html | 28 +++++++ util.py | 19 +++++ wiki_util.py | 84 +++++++++++++++++++ 5 files changed, 262 insertions(+), 3 deletions(-) create mode 100644 tools/apertium-recaptcha-test.html create mode 100644 wiki_util.py diff --git a/.travis.yml b/.travis.yml index f4264b157..8f689980b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,9 @@ python: - "3.5-dev" - "nightly" -install: pip3 install tornado +install: + - if [[ $TRAVIS_PYTHON_VERSION = 3.2 ]]; then pip3 install tornado==4.3; fi + - if [[ $TRAVIS_PYTHON_VERSION != 3.2 ]]; then pip3 install tornado; fi before_script: - wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash diff --git a/servlet.py b/servlet.py index e7197eb95..ca1676603 100755 --- a/servlet.py +++ b/servlet.py @@ -3,7 +3,7 @@ # coding=utf-8 # -*- encoding: utf-8 -*- -import sys, os, re, argparse, logging, time, signal, tempfile, zipfile +import sys, os, re, argparse, logging, time, signal, tempfile, zipfile, string, random from subprocess import Popen, PIPE from multiprocessing import Pool, TimeoutError from functools import wraps @@ -42,6 +42,9 @@ except: cld2 = None +RECAPTCHA_VERIFICATION_URL = 'https://www.google.com/recaptcha/api/siteverify' +bypassToken = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(24)) + try: import chardet except: @@ -733,6 +736,101 @@ def get(self): self.send_error(400, explanation='Accept-Language missing from request headers') +class SuggestionHandler(BaseHandler): + wiki_session = None + wiki_edit_token = None + SUGGEST_URL = None + recaptcha_secret = None + + @gen.coroutine + def get(self): + self.send_error(405, explanation='GET request not supported') + + @gen.coroutine + def post(self): + context = self.get_argument('context', None) + word = self.get_argument('word', None) + newWord = self.get_argument('newWord', None) + langpair = self.get_argument('langpair', None) + recap = self.get_argument('g-recaptcha-response', None) + + if not newWord: + self.send_error(400, explanation='A suggestion is required') + return + + if not recap: + self.send_error(400, explanation='The ReCAPTCHA is required') + return + + if not all([context, word, langpair, newWord, recap]): + self.send_error(400, explanation='All arguments were not provided') + return + + logging.info("Suggestion (%s): Context is %s \n Word: %s ; New Word: %s " % (langpair, context, word, newWord)) + logging.info('Now verifying ReCAPTCHA.') + + if not self.recaptcha_secret: + logging.error('No ReCAPTCHA secret provided!') + self.send_error(400, explanation='Server not configured correctly for suggestions') + return + + if recap == bypassToken: + logging.info('Adding data to wiki with bypass token') + else: + # for nginx or when behind a proxy + x_real_ip = self.request.headers.get("X-Real-IP") + user_ip = x_real_ip or self.request.remote_ip + payload = { + 'secret': self.recaptcha_secret, + 'response': recap, + 'remoteip': user_ip + } + recapRequest = self.wiki_session.post(RECAPTCHA_VERIFICATION_URL, + data=payload) + if recapRequest.json()['success']: + logging.info('ReCAPTCHA verified, adding data to wiki') + else: + logging.info('ReCAPTCHA verification failed, stopping') + self.send_error(400, explanation='ReCAPTCHA verification failed') + return + + from util import addSuggestion + data = { + 'context': context, 'langpair': langpair, + 'word': word, 'newWord': newWord + } + result = addSuggestion(self.wiki_session, + self.SUGGEST_URL, self.wiki_edit_token, + data) + + if result: + self.sendResponse({ + 'responseData': { + 'status': 'Success' + }, + 'responseDetails': None, + 'responseStatus': 200 + }) + else: + logging.info('Page update failed, trying to get new edit token') + self.wiki_edit_token = wikiGetToken( + SuggestionHandler.wiki_session, 'edit', 'info|revisions') + logging.info('Obtained new edit token. Trying page update again.') + result = addSuggestion(self.wiki_session, + self.SUGGEST_URL, self.wiki_edit_token, + data) + if result: + self.sendResponse({ + 'responseData': { + 'status': 'Success' + }, + 'responseDetails': None, + 'responseStatus': 200 + }) + else: + self.send_error(400, explanation='Page update failed') + + class PipeDebugHandler(BaseHandler): @gen.coroutine @@ -832,6 +930,10 @@ def sanity_check(): parser.add_argument('-v', '--verbosity', help='logging verbosity', type=int, default=0) parser.add_argument('-V', '--version', help='show APY version', action='version', version="%(prog)s version " + __version__) parser.add_argument('-S', '--scalemt-logs', help='generates ScaleMT-like logs; use with --log-path; disables', action='store_true') + parser.add_argument('-wp', '--wiki-password', help="Apertium Wiki account password for SuggestionHandler", default=None) + parser.add_argument('-wu', '--wiki-username', help="Apertium Wiki account username for SuggestionHandler", default=None) + parser.add_argument('-b', '--bypass-token', help="ReCAPTCHA bypass token", action='store_true') + parser.add_argument('-rs', '--recaptcha-secret', help="ReCAPTCHA secret for suggestion validation", default=None) parser.add_argument('-M', '--unknown-memory-limit', help="keeps unknown words in memory until a limit is reached (default = 1000)", type=int, default=1000) parser.add_argument('-T', '--stat-period-max-age', help="How many seconds back to keep track request timing stats (default = 3600)", type=int, default=3600) args = parser.parse_args() @@ -883,9 +985,33 @@ def sanity_check(): (r'/calcCoverage', CoverageHandler), (r'/identifyLang', IdentifyLangHandler), (r'/getLocale', GetLocaleHandler), - (r'/pipedebug', PipeDebugHandler) + (r'/pipedebug', PipeDebugHandler), + (r'/suggest', SuggestionHandler) ]) + if args.bypass_token: + logging.info('reCaptcha bypass for testing:%s' % bypassToken) + + if all([args.wiki_username, args.wiki_password]): + logging.info('Logging into Apertium Wiki with username %s' % args.wiki_username) + + try: + import requests + except ImportError: + logging.error('requests module is required for SuggestionHandler') + + if requests: + from wiki_util import wikiLogin, wikiGetToken + SuggestionHandler.SUGGEST_URL = 'User:' + args.wiki_username + SuggestionHandler.recaptcha_secret = args.recaptcha_secret + SuggestionHandler.wiki_session = requests.Session() + SuggestionHandler.auth_token = wikiLogin( + SuggestionHandler.wiki_session, + args.wiki_username, + args.wiki_password) + SuggestionHandler.wiki_edit_token = wikiGetToken( + SuggestionHandler.wiki_session, 'edit', 'info|revisions') + global http_server if args.ssl_cert and args.ssl_key: http_server = tornado.httpserver.HTTPServer(application, ssl_options={ diff --git a/tools/apertium-recaptcha-test.html b/tools/apertium-recaptcha-test.html new file mode 100644 index 000000000..fdc11c2f8 --- /dev/null +++ b/tools/apertium-recaptcha-test.html @@ -0,0 +1,28 @@ + + + + + Recaptcha test + + + + +
+ Language pair + +
+ Context + +
+ word + +
+ newword + + + +
+ +
+ + \ No newline at end of file diff --git a/util.py b/util.py index d71f9a9ed..6b0209d1c 100644 --- a/util.py +++ b/util.py @@ -5,6 +5,8 @@ from subprocess import Popen, PIPE from datetime import datetime +from wiki_util import wikiGetPage, wikiEditPage, wikiAddText + iso639Codes = {"abk":"ab","aar":"aa","afr":"af","aka":"ak","sqi":"sq","amh":"am","ara":"ar","arg":"an","hye":"hy","asm":"as","ava":"av","ave":"ae","aym":"ay","aze":"az","bam":"bm","bak":"ba","eus":"eu","bel":"be","ben":"bn","bih":"bh","bis":"bi","bos":"bs","bre":"br","bul":"bg","mya":"my","cat":"ca","cha":"ch","che":"ce","nya":"ny","zho":"zh","chv":"cv","cor":"kw","cos":"co","cre":"cr","hrv":"hr","ces":"cs","dan":"da","div":"dv","nld":"nl","dzo":"dz","eng":"en","epo":"eo","est":"et","ewe":"ee","fao":"fo","fij":"fj","fin":"fi","fra":"fr","ful":"ff","glg":"gl","kat":"ka","deu":"de","ell":"el","grn":"gn","guj":"gu","hat":"ht","hau":"ha","heb":"he","her":"hz","hin":"hi","hmo":"ho","hun":"hu","ina":"ia","ind":"id","ile":"ie","gle":"ga","ibo":"ig","ipk":"ik","ido":"io","isl":"is","ita":"it","iku":"iu","jpn":"ja","jav":"jv","kal":"kl","kan":"kn","kau":"kr","kas":"ks","kaz":"kk","khm":"km","kik":"ki","kin":"rw","kir":"ky","kom":"kv","kon":"kg","kor":"ko","kur":"ku","kua":"kj","lat":"la","ltz":"lb","lug":"lg","lim":"li","lin":"ln","lao":"lo","lit":"lt","lub":"lu","lav":"lv","glv":"gv","mkd":"mk","mlg":"mg","msa":"ms","mal":"ml","mlt":"mt","mri":"mi","mar":"mr","mah":"mh","mon":"mn","nau":"na","nav":"nv","nob":"nb","nde":"nd","nep":"ne","ndo":"ng","nno":"nn","nor":"no","iii":"ii","nbl":"nr","oci":"oc","oji":"oj","chu":"cu","orm":"om","ori":"or","oss":"os","pan":"pa","pli":"pi","fas":"fa","pol":"pl","pus":"ps","por":"pt","que":"qu","roh":"rm","run":"rn","ron":"ro","rus":"ru","san":"sa","srd":"sc","snd":"sd","sme":"se","smo":"sm","sag":"sg","srp":"sr","gla":"gd","sna":"sn","sin":"si","slk":"sk","slv":"sl","som":"so","sot":"st","azb":"az","spa":"es","sun":"su","swa":"sw","ssw":"ss","swe":"sv","tam":"ta","tel":"te","tgk":"tg","tha":"th","tir":"ti","bod":"bo","tuk":"tk","tgl":"tl","tsn":"tn","ton":"to","tur":"tr","tso":"ts","tat":"tt","twi":"tw","tah":"ty","uig":"ug","ukr":"uk","urd":"ur","uzb":"uz","ven":"ve","vie":"vi","vol":"vo","wln":"wa","cym":"cy","wol":"wo","fry":"fy","xho":"xh","yid":"yi","yor":"yo","zha":"za","zul":"zu", "hbs":"sh", "arg":"an", "pes":"fa"} ''' Bootstrapped from https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes using @@ -194,3 +196,20 @@ def __init__(self, handler): self.key = handler.get_argument('key', default = 'null') self.ip = handler.request.headers.get('X-Real-IP', handler.request.remote_ip) self.referer = handler.request.headers.get('Referer', 'null') + + +def addSuggestion(s, SUGGEST_URL, editToken, data): + content = wikiGetPage(s, SUGGEST_URL) + content = wikiAddText(content, data) + editResult = wikiEditPage(s, SUGGEST_URL, content, editToken) + + try: + if editResult['edit']['result'] == 'Success': + logging.info('Update of page %s' % (SUGGEST_URL)) + return True + else: + logging.error('Update of page %s failed: %s' % (SUGGEST_URL, + editResult)) + return False + except KeyError: + return False diff --git a/wiki_util.py b/wiki_util.py new file mode 100644 index 000000000..46f709c53 --- /dev/null +++ b/wiki_util.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +import logging +import json + +WIKI_API_URL = 'http://wiki.apertium.org/w/api.php' + + +# Apertium Wiki utility functions +def wikiLogin(s, loginName, password): + try: + payload = {'action': 'login', 'format': 'json', 'lgname': loginName, 'lgpassword': password} + authResult = s.post(WIKI_API_URL, params=payload) + authToken = json.loads(authResult.text)['login']['token'] + logging.debug('Auth token: {}'.format(authToken)) + + payload = {'action': 'login', 'format': 'json', 'lgname': loginName, 'lgpassword': password, 'lgtoken': authToken} + authResult = s.post(WIKI_API_URL, params=payload) + if not json.loads(authResult.text)['login']['result'] == 'Success': + logging.critical('Failed to login as {}: {}'.format(loginName, json.loads(authResult.text)['login']['result'])) + else: + logging.info('Login as {} succeeded'.format(loginName)) + return authToken + except Exception as e: + logging.critical('Failed to login: {}'.format(e)) + + +def wikiGetPage(s, pageTitle): + payload = { + 'action': 'query', + 'format': 'json', + 'titles': pageTitle, + 'prop': 'revisions', + 'rvprop': 'content' + } + viewResult = s.get(WIKI_API_URL, params=payload) + jsonResult = json.loads(viewResult.text) + + if 'missing' not in list(jsonResult['query']['pages'].values())[0]: + return list(jsonResult['query']['pages'].values())[0]['revisions'][0]['*'] + + +def wikiEditPage(s, pageTitle, pageContents, editToken): + payload = { + 'action': 'edit', + 'format': 'json', + 'title': pageTitle, + 'text': pageContents, + 'bot': 'True', + 'contentmodel': 'wikitext', + 'token': editToken + } + editResult = s.post(WIKI_API_URL, data=payload) + jsonResult = json.loads(editResult.text) + return jsonResult + + +def wikiGetToken(s, tokenType, props): + try: + payload = { + 'action': 'query', + 'format': 'json', + 'prop': props, + 'intoken': tokenType, + 'titles': 'Main Page' + } + tokenResult = s.get(WIKI_API_URL, params=payload) + token = json.loads(tokenResult.text)['query']['pages']['1']['%stoken' % tokenType] + logging.debug('%s token: %s' % (tokenType, token)) + return token + except Exception as e: + logging.error('Failed to obtain %s token: %s' % (tokenType, e)) + + +def wikiAddText(content, data): + if not content: + content = '' + + src, dst = data['langpair'].split('|') + content += '\n* {{suggest|%s|%s|%s|%s|%s}}' % (src, dst, + data['word'], + data['newWord'], + data['context'],) + + return content