From 2d1305ad16e9982055b0f755c6e0db06e8ed92ef Mon Sep 17 00:00:00 2001 From: emijrp Date: Fri, 29 Jul 2016 13:46:47 +0200 Subject: [PATCH] assessing vandalism, test edits and blanking --- avbot.py | 107 ++++++++-- avbotanalysis.py | 26 +-- avbotcomb.py | 67 +------ avbotglobals.py | 171 ++++------------ avbotload.py | 141 +------------ avbotml.py | 501 +++++++++++++++++++++++++++++++++++++++++++++++ stream.sample | 38 ++++ 7 files changed, 688 insertions(+), 363 deletions(-) create mode 100644 avbotml.py create mode 100644 stream.sample diff --git a/avbot.py b/avbot.py index 30c66c3..3733e01 100644 --- a/avbot.py +++ b/avbot.py @@ -382,6 +382,8 @@ def isUserNewbie(self, user): self.users[user] = {'groups': userprops['groups'], 'whitelisted': False, 'editcount': userprops['editcount']} return self.isUserNewbie(user) + + def analyseChange(self, change): change['timestamp_utc'] = datetime.datetime.fromtimestamp(change['timestamp']).strftime('%Y-%m-%d %H:%M:%S') if change['user'] == self.wikiBotName: # Ignore own edits @@ -506,24 +508,101 @@ def rcIRC(self): ircbuffer += data.decode('utf-8') except socket.error: print >>sys.stderr, 'Socket error!' - - def analyseEdit(self, change): + + def getDiff(self, change): + """ Devuelve el diff de dos revisiones """ + """ Return a diff of two revisions """ + query = pywikibot.data.api.Request(parameters={'action': 'compare', 'fromrev': change['revision']['old'], 'torev': change['revision']['new']}, site=self.site) data = query.submit() + diff = {'added': [], 'deleted': []} if 'compare' in data and '*' in data['compare']: - added = [] - m = re.findall(r'(?im)]*?>([^<>]*?)', data['compare']['*']) - for i in m: - added.append(i) - m = re.findall(r'(?im)
([^<>]*?)
', data['compare']['*']) - for i in m: - added.append(i) - added_plain = '\n'.join(added) - - for filterr in self.filters: - m = re.findall(filterr['compiled'], added_plain) + diff['added'] += re.findall(r'(?im)]*?>([^<>]*?)', data['compare']['*']) + diff['added'] += re.findall(r'(?im)
([^<>]*?)
', data['compare']['*']) + diff['deleted'] += re.findall(r'(?im)]*?>([^<>]*?)', data['compare']['*']) + diff['deleted'] += re.findall(r'(?im)
([^<>]*?)
', data['compare']['*']) + return diff + + def getScore(self, diff): + """ Calcula la puntuación para un diff al pasarle los filtros """ + """ Calculate score for diff using filters """ + + score = { + 'test': {'added_score': 0, 'deleted_score': 0}, + 'vandalism': {'added_score': 0, 'deleted_score': 0}, + 'global': {'score': 0, 'group': 'unknown'}, + } + for ifilter in self.filters: + for iadded in diff['added']: + m = re.findall(ifilter['compiled'], iadded) for i in m: - print("!!!Encontrado %s (%s score)" % (filterr['regexp'], filterr['score'])) + print("!!!Añadido %s (%s score)" % (ifilter['regexp'], ifilter['score'])) + score[ifilter['group']]['added_score'] += ifilter['score'] + for ideleted in diff['deleted']: + m = re.findall(ifilter['compiled'], ideleted) + for i in m: + print("!!!Eliminado %s (%s score)" % (ifilter['regexp'], ifilter['score'] * -1)) + score[ifilter['group']]['deleted_score'] += ifilter['score'] * -1 + + score['global']['score'] = (score['test']['added_score'] + score['vandalism']['added_score']) + \ + (score['test']['deleted_score'] + score['vandalism']['deleted_score']) + score['global']['group'] = score['test']['added_score'] <= score['vandalism']['added_score'] and 'test' or 'vandalism' + return score + + def revertEdit(self, change, alledits=False): + """ Revierte una edición de un usuario o todas sus ediciones """ + """ Revert one or all edits by a user """ + + print("---> Reverting %s edit(s) by %s" % (change['revision']['new'], change['user'])) + pass + + def isEditBlanking(self, change): + """ Evalúa si una edición es un blanqueo """ + """ Assess whether an edit is a blanking """ + + lenOld = change['length']['old'] + lenNew = change['length']['new'] + if lenNew < lenOld and \ + not re.search(r'(?im)(redirect|redirección)', '\n'.join(change['diff']['added'])): + percent = (lenOld-lenNew)/(lenOld/100.0) + if (lenOld>=500 and lenOld<1000 and percent>=90) or \ + (lenOld>=1000 and lenOld<2500 and percent>=85) or \ + (lenOld>=2500 and lenOld<5000 and percent>=75) or \ + (lenOld>=5000 and lenOld<10000 and percent>=72.5) or \ + (lenOld>=10000 and lenOld<20000 and percent>=70) or \ + (lenOld>=20000 and percent>=65): + return True + return False + + def isEditVandalism(self, change, score): + vandalismthreshold = -4 + vandalismdensity = 150 + if score['global']['group'] == 'vandalism': + if score['global']['score'] <= vandalismthreshold: + return True + elif score['global']['score'] < 0 and + return False + + def analyseEdit(self, change): + """ Analiza una edición """ + """ Analyse one edit """ + + diff = self.getDiff(change) + change['diff'] = diff + score = self.getScore(diff) + print("Score: %s" % (score)) + + # calcular score general o por tipos mejor? + if self.isEditTest(change, score): + self.revertEdit(change) + self.sendMessage(change, message='test') + elif self.isEditBlanking(change): + self.revertEdit(change) + self.sendMessage(change, message='blanking') + elif self.isEditVandalism(change, score): + self.revertEdit(change) + self.sendMessage(change, message='vandalism') + """ for m in match: diff --git a/avbotanalysis.py b/avbotanalysis.py index 274467e..6a70e66 100644 --- a/avbotanalysis.py +++ b/avbotanalysis.py @@ -356,31 +356,7 @@ def mustBeReverted(editData, cleandata, userClass): editData['details']=u"" #no olvidar return revertAllEditsByUser(editData, userClass, regexplist) #Revert - #Blanking edit? - lenOld=editData['lenOld'] - lenNew=editData['lenNew'] - if lenNew=500 and lenOld<1000 and percent>=90) or \ - (lenOld>=1000 and lenOld<2500 and percent>=85) or \ - (lenOld>=2500 and lenOld<5000 and percent>=75) or \ - (lenOld>=5000 and lenOld<10000 and percent>=72.5) or \ - (lenOld>=10000 and lenOld<20000 and percent>=70) or \ - (lenOld>=20000 and percent>=65): - editData['type']='bl' - editData['score']=-(editData['lenNew']+1) #la puntuacion de los blanqueos es la nueva longitud + 1, negada, para evitar el -0 - editData['details']=u'' - - return revertAllEditsByUser(editData, userClass, regexplist) #Revert - """ - if editData['lenOld']>=1000 and editData['lenNew']<=500 and editData['lenNew']')[1].split('')[0] - m=re.compile(ur'').finditer(data) - wikipedianm=u'' + + data = avbotglobals.preferences['site'].getUrl("/w/index.php?title=Special:RecentChanges") + data = data.split('')[0] + m = re.compile(ur'').finditer(data) for i in m: - number=int(i.group(1)) - name=i.group(2) - if number==namespace: - wikipedianm+=name - return wikipedianm + nmid = int(i.group('nmid')) + nmname = i.group('nmname') + if namespaceid == nmid: + return nmname + return '' def resumeTranslator(editData): """ Traductor de resúmenes de edición primitivo """ @@ -261,25 +261,6 @@ def getParameters(): wikipedia.output(u"Not all obligatory parameters were found. Please, check (*) parameters.") sys.exit() -def getTime(): - """ Coge la hora del sistema """ - """ Get system time """ - return time.strftime('%H:%M:%S') - -def encodeLine(line): - """ Codifica una cadena en UTF-8 a poder ser """ - """ Encode string into UTF-8 """ - - try: - line2=unicode(line,'utf-8') - except UnicodeError: - try: - line2=unicode(line,'iso8859-1') - except UnicodeError: - print u'Unknown codification' - return '' - return line2 - def getUserClass(editData): """ Averigua el tipo de usuario del que se trata """ """ Check user class """ @@ -343,31 +324,3 @@ def checkBlockInEnglishWikipedia(editData): return comment, isProxy -def checkForUpdates(): - fullpath = "/"+"/".join(os.path.abspath( __file__ ).split("/")[:-1])+"/" - svn='http://avbot.googlecode.com/svn/trunk/' - f=urllib.urlopen(svn) - html=f.read() - m=re.compile(ur">(?P[^<]+?\.py)").finditer(html) - for i in m: - filename=i.group("filename") - wikipedia.output(u"Checking file %s..." % filename) - g=open(fullpath+filename, 'r') - h=urllib.urlopen(svn+filename) - if g.read()!=h.read(): - wikipedia.output(u"%s has changed!!!" % filename) - g.close() - return True - else: - wikipedia.output(u"OK!") - g.close() - f.close() - return False - -def existenceFile(): - while True: - if not os.path.isfile(avbotglobals.existFile): - existFile=open(avbotglobals.existFile, 'w') - existFile.write(str("hi")) - existFile.close() - time.sleep(60) # debe ser menor que el time del cron / 2 diff --git a/avbotglobals.py b/avbotglobals.py index b002acc..23d2b02 100644 --- a/avbotglobals.py +++ b/avbotglobals.py @@ -27,97 +27,16 @@ import time import os -""" pywikipediabot modules """ -import wikipedia - -""" AVBOT modules """ -import avbotcomb - -""" Default bot preferences """ -global preferences -preferences = { - 'botNick': u'Bot', #Bot name - 'ownerNick': u'Owner', #Owner nick - 'language': u'es', #Default language is Spanish - 'family': u'wikipedia', #Default project family is Wikipedia - 'site': 0, #Empty var - 'network': u'irc.wikimedia.org', #IRC network where is the IRC channel with recent changes - 'channel': 0, #RSS channel for recent changes in Wikipedia - 'nickname': 0, #Bot nick in channel, with random numbers to avoid nick collisions - 'port': 6667, #Port number - 'logsDirectory': 'botlogs', #Directory reverts logs, not ending in / - 'newbie': 25, #Who is a newbie user? How many edits? - 'statsDelay': 60, #How man seconds between showing stats in screen - 'colors': { - 'steward': 'lightblue', - 'sysop': 'lightblue', - 'bureaucrat': 'lightblue', - 'checkuser': 'lightblue', - 'bot': 'lightpurple', - 'reg': 'lightgreen', - 'anon': 'lightyellow', - }, - 'context': ur'[ \@\º\ª\·\#\~\$\<\>\/\(\)\'\-\_\:\;\,\.\r\n\?\!\¡\¿\"\=\[\]\|\{\}\+\&]', - 'msg': {}, - 'testmode': False, - 'nosave': False, - 'force': False, - 'trial': False, - 'editsFilename': 'edits.txt', - 'historyLength': 10, # history length to recover -} - -""" Header message """ -header = u"AVBOT Copyright (C) 2008-2010 Emilio José Rodríguez Posada\n" -header += u"This program comes with ABSOLUTELY NO WARRANTY.\n" -header += u"This is free software, and you are welcome to redistribute it\n" -header += u"under certain conditions. See license.\n\n" -header += u"############################################################################\n" -header += u"# Name: AVBOT (Anti-Vandalism BOT) #\n" -header += u"# Version: 1.2 #\n" -header += u"# Tasks: To revert vandalism, blanking and test edits #\n" -header += u"# To report vandalism waves attacks to admins #\n" -header += u"# To improve new articles (magic interwikis) #\n" -header += u"# To mark for deletion rubbish articles #\n" -header += u"############################################################################\n\n" -header += u"Available parameters (* obligatory): -lang, -family, -newbie, -botnick*, -statsdelay, -network, -channel, -ownernick*, -nosave, -force\n" -header += u"Example: python avbot.py -botnick:MyBot -ownernick:MyUser\n" -wikipedia.output(header) avbotcomb.getParameters() -if avbotcomb.checkForUpdates(): #no llega al directorio actual (cron lo ejecuta con la absoluta) - wikipedia.output(u"***New code available*** Please, update your copy of AVBOT from http://avbot.googlecode.com/svn/trunk/") - sys.exit() - -preferences['site'] = wikipedia.Site(preferences['language'], preferences['family']) -if not preferences['nosave']: - testEdit = wikipedia.Page(preferences['site'], 'User:%s/Sandbox' % preferences['botNick']) - testEdit.put(u'136', u'BOT - Arrancando robot', botflag=False, maxTries=3) #same text always, avoid avbotcron edit panic - testEdit = wikipedia.Page(wikipedia.Site(u'en', u'wikipedia'), 'User:%s/Sandbox' % preferences['botNick']) - testEdit.put(u'Test edit', u'BOT - Arrancando robot', botflag=False, maxTries=3) #same text always, avoid avbotcron edit panic - -if not preferences['channel']: - preferences['channel'] = '#%s.%s' % (preferences['language'], preferences['family']) -if not preferences['nickname']: - preferences['nickname'] = '%s%s' % (preferences['botNick'], str(random.randint(1000, 9999))) - -preferences['editsFilename']='%s-%s-edits.txt' % (preferences['language'], preferences['family']) - -preferences['goodandevil']=u'Lista del bien y del mal.css' -preferences['exclusions']=u'Exclusiones.css' -preferences['messages']=u'Mensajes.css' -if preferences['site'].lang=='en': - preferences['goodandevil']=u'Good and evil list.css' - preferences['exclusions']=u'Exclusions.css' - preferences['messages']=u'Messages.css' -elif preferences['site'].lang=='pt': - preferences['goodandevil']=u'Expressões.css' - preferences['exclusions']=u'Exclusões.css' - preferences['messages']=u'Mensagens.css' - -global namespaces -namespaces={} + + +if not preferences['dryrun']: + testEdit = pywikibot.Page(preferences['site'], 'User:%s/AVBOT' % (preferences['botNick'])) + testEdit.text = 'Starting AVBOT' + testEdit.save('BOT - Starting AVBOT', botflag=False, maxTries=3) # Same text always, avoid avbotcron edit panic + namespaces[2] = avbotcomb.namespaceTranslator(2) namespaces[3] = avbotcomb.namespaceTranslator(3) @@ -130,10 +49,7 @@ global statsTimersDic statsTimersDic={'speed':0, 2: time.time(), 12: time.time(), 24: time.time(), 'tvel': time.time()} -global existFile -existFile = '%s-%s-%s-exist.txt' % (preferences['language'], preferences['family'], preferences['botNick']) -global pidFile -pidFile = '%s-%s-%s-mypid.txt' % (preferences['language'], preferences['family'], preferences['botNick']) + global userData userData={} @@ -149,48 +65,47 @@ global parserRegexps parserRegexps={ - 'cleandiff-diff-context': re.compile(ur'diff-context'), - 'cleandiff-diff-addedline': re.compile(ur'diff-addedline'), - 'cleandiff-diff-addedline-div': re.compile(ur'
'), - 'cleandiff-diff-deletedline': re.compile(ur'diff-deletedline'), - 'cleandiff-diffchange': re.compile(ur'(||)(?P[^<]*?)'), - 'watch-1': re.compile(ur'\/'), - 'goodandevil': re.compile(ur'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['goodandevil'])), - 'exclusions': re.compile(ur'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['exclusions'])), - 'messages': re.compile(ur'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['messages'])), - 'anti-birthday-es': re.compile(ur'(?m)^\d{1,2} de (enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)$'), - 'loaduseredits-editcount': re.compile(ur'editcount'), - 'loaduseredits-editcount-d': re.compile(ur' editcount="(\d+)"'), - 'isrubbish-tl-red': re.compile(ur'(?i)\{\{|redirect'), - 'isrubbish-link': re.compile(ur'\[\['), - 'blanqueos': re.compile(ur'(?i)(redirect|redirección|desamb|\{\{ *(db\-|copyvio|destruir|plagio|robotdestruir|wikificar))'), #fix add more cases for en: and pt: mainly db-copyvio - 'block': re.compile(ur'(?i)\[\[Especial:Log/block\]\] +block +\* +(?P.*?) +\* +bloqueó a +\"Usuario\:(?P.*?)\" +.*?durante un plazo de \"(?P.*?)\"'), + 'cleandiff-diff-context': re.compile(r'diff-context'), + 'cleandiff-diff-addedline': re.compile(r'diff-addedline'), + 'cleandiff-diff-addedline-div': re.compile(r'
'), + 'cleandiff-diff-deletedline': re.compile(r'diff-deletedline'), + 'cleandiff-diffchange': re.compile(r'(||)(?P[^<]*?)'), + 'watch-1': re.compile(r'\/'), + 'goodandevil': re.compile(r'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['goodandevil'])), + 'exclusions': re.compile(r'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['exclusions'])), + 'messages': re.compile(r'%s:%s/%s' % (namespaces[2], preferences['ownerNick'], preferences['messages'])), + 'anti-birthday-es': re.compile(r'(?m)^\d{1,2} de (enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)$'), + + 'isrubbish-tl-red': re.compile(r'(?i)\{\{|redirect'), + 'isrubbish-link': re.compile(r'\[\['), + 'blanqueos': re.compile(r'(?i)(redirect|redirección|desamb|\{\{ *(db\-|copyvio|destruir|plagio|robotdestruir|wikificar))'), #fix add more cases for en: and pt: mainly db-copyvio + 'block': re.compile(r'(?i)\[\[Especial:Log/block\]\] +block +\* +(?P.*?) +\* +bloqueó a +\"Usuario\:(?P.*?)\" +.*?durante un plazo de \"(?P.*?)\"'), #[[Especial:Log/delete]] delete * Snakeyes * borró "Discusión:Gastronomía en Estados Unidos": borrado rápido usando [[w:es:User:Axxgreazz/Monobook-Suite|monobook-suite]] el contenido era: «{{delete|Vandalismo}} {{fuenteprimaria|6|mayo}} Copia y pega el siguiente código en la página de discusión del creador del artículo: == Ediciones con investigac - #'borrado': re.compile(ur'(?i)\[\[...(?P.*?)..\]\].*?delete.*?\*.....(?P.*?)...\*'), - 'borrado': re.compile(ur'(?i)\[\[Especial:Log/delete\]\] +delete +\* +(?P.*?) +\* +borró +«(?P.*?)»\:'), - 'categories': re.compile(ur'(?i)\[\[ *(Category|Categoría) *\: *[^\]\n\r]+? *\]\]'), - 'footerallowed': re.compile(ur"(?i)(\[\[|\=\=|\:\/\/|\{\{|\'\'|\:|\, |\.(com|org|edu|gov|net|info|tv))"), #http://en.wikipedia.org/w/index.php?title=Sukhoi_Superjet_100&diff=353978236&oldid=353978214 - 'conflictivos': re.compile(ur'(?i)( Cfd | AfD |(\{\{ *(AfDM|ad|advert|spam|cleanup|copy ?to|db\-|isrev|inuse|Underconstruction|copyvio|copypaste|autotrad|maltrad|mal traducido|anuncio|promocional|publicidad|sin ?relevancia|SRA|irrelevante|wikci|al? (wikcionario|wikicitas|wikinoticias|wikiquote|wikisource))))'), #promocional etc suelen ser blanqueados o mejorados por IPs para que quiten el cartel, evitamos revertir http://en.wikipedia.org/wiki/Wikipedia:Template_messages/Maintenance #fix pasar a una subpágina /Skip - 'destruir': re.compile(ur'(?i)( Cfd | AfD |(\{\{ *(destruir|db\-|spam|ad)))'), + #'borrado': re.compile(r'(?i)\[\[...(?P.*?)..\]\].*?delete.*?\*.....(?P.*?)...\*'), + 'borrado': re.compile(r'(?i)\[\[Especial:Log/delete\]\] +delete +\* +(?P.*?) +\* +borró +«(?P.*?)»\:'), + 'categories': re.compile(r'(?i)\[\[ *(Category|Categoría) *\: *[^\]\n\r]+? *\]\]'), + 'footerallowed': re.compile(r"(?i)(\[\[|\=\=|\:\/\/|\{\{|\'\'|\:|\, |\.(com|org|edu|gov|net|info|tv))"), #http://en.wikipedia.org/w/index.php?title=Sukhoi_Superjet_100&diff=353978236&oldid=353978214 + 'conflictivos': re.compile(r'(?i)( Cfd | AfD |(\{\{ *(AfDM|ad|advert|spam|cleanup|copy ?to|db\-|isrev|inuse|Underconstruction|copyvio|copypaste|autotrad|maltrad|mal traducido|anuncio|promocional|publicidad|sin ?relevancia|SRA|irrelevante|wikci|al? (wikcionario|wikicitas|wikinoticias|wikiquote|wikisource))))'), #promocional etc suelen ser blanqueados o mejorados por IPs para que quiten el cartel, evitamos revertir http://en.wikipedia.org/wiki/Wikipedia:Template_messages/Maintenance #fix pasar a una subpágina /Skip + 'destruir': re.compile(r'(?i)( Cfd | AfD |(\{\{ *(destruir|db\-|spam|ad)))'), #diffstylebegin y end va relacionado - 'diffstylebegin': re.compile(ur'(||)'), - 'diffstyleend': re.compile(ur'(||)(?P[^<]*?)'), - 'interwikis': re.compile(ur'(?i)\[\[ *[a-z]{2} *\: *[^\]\|\n\r]+? *\]\]'), - 'ip': re.compile(ur'(?im)^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])$'), - 'firmas1': re.compile(ur'
([^<]*?)
'), + 'diffstylebegin': re.compile(r'(||)'), + 'diffstyleend': re.compile(r'(||)(?P[^<]*?)'), + 'interwikis': re.compile(r'(?i)\[\[ *[a-z]{2} *\: *[^\]\|\n\r]+? *\]\]'), + 'ip': re.compile(r'(?im)^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-6])$'), + 'firmas1': re.compile(r'
([^<]*?)
'), #sin title - #'edit': re.compile(ur'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?title\=.*?diff\=(?P\d+)\&oldid\=(?P\d+) +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), - 'edit': re.compile(ur'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?diff\=(?P\d+)\&oldid\=(?P\d+)(\&rcid=\d+)? +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), - #'newpage': re.compile(ur'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?title\=.*?\&rcid\=\d+ +\* (?P.*?) +\*' % preferences['language']), - 'newpage': re.compile(ur'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?oldid\=(?P\d+)(\&rcid=\d+)? +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), - 'nuevousuario': re.compile(ur'(?i)\[\[Especial:Log/newusers\]\] +create +\* +(?P.*?) +\* +Usuario nuevo'), - 'protegida': re.compile(ur'(?i)\[\[Especial:Log/protect\]\] +protect +\* +(?P.*?) +\* +protegió +\[\[(?P.*?)\]\] +\[edit\=(?Psysop|autoconfirmed)\][^\[]*?\[move\=(?Psysop|autoconfirmed)\]'), + #'edit': re.compile(r'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?title\=.*?diff\=(?P\d+)\&oldid\=(?P\d+) +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), + 'edit': re.compile(r'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?diff\=(?P\d+)\&oldid\=(?P\d+)(\&rcid=\d+)? +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), + #'newpage': re.compile(r'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?title\=.*?\&rcid\=\d+ +\* (?P.*?) +\*' % preferences['language']), + 'newpage': re.compile(r'(?i)\[\[(?P.*?)\]\] +(?P.*?) +http\://%s\.wikipedia\.org/w/index\.php\?oldid\=(?P\d+)(\&rcid=\d+)? +\* +(?P.*?) +\* +\(.*?\) +(?P.*)' % preferences['language']), + 'nuevousuario': re.compile(r'(?i)\[\[Especial:Log/newusers\]\] +create +\* +(?P.*?) +\* +Usuario nuevo'), + 'protegida': re.compile(r'(?i)\[\[Especial:Log/protect\]\] +protect +\* +(?P.*?) +\* +protegió +\[\[(?P.*?)\]\] +\[edit\=(?Psysop|autoconfirmed)\][^\[]*?\[move\=(?Psysop|autoconfirmed)\]'), #protegidacreacion [[Especial:Log/protect]] protect * Snakeyes * protegió [[Tucupido cincuentero]] [create=sysop] (indefinido): Artículo ensayista reincidente - 'desprotegida': re.compile(ur'(?i)\[\[.*?Especial\:Log/protect.*?\]\].*?unprotect'), - 'spam': re.compile(ur'(?im)
[^<]*?(http://[a-z0-9\.\-\=\?\_\/]+)[^<]*?
'), + 'desprotegida': re.compile(r'(?i)\[\[.*?Especial\:Log/protect.*?\]\].*?unprotect'), + 'spam': re.compile(r'(?im)
[^<]*?(http://[a-z0-9\.\-\=\?\_\/]+)[^<]*?
'), #[[Especial:Log/move]] move_redir * Manuel González Olaechea y Franco * [[Anexo:Presidente del Perú]] ha sido trasladado a [[Anexo:Presidentes del Perú]] sobre una redirección. #[[Especial:Log/move]] move * Dhidalgo * [[Macizo Etíope]] ha sido trasladado a [[Macizo etíope]] - 'traslado': re.compile(ur'(?i)\[\[Especial:Log/move\]\] +move +\* +(?P.*?) +\* +\[\[(?P.*?)\]\] +ha sido trasladado a +\[\[(?P.*?)\]\]'), + 'traslado': re.compile(r'(?i)\[\[Especial:Log/move\]\] +move +\* +(?P.*?) +\* +\[\[(?P.*?)\]\] +ha sido trasladado a +\[\[(?P.*?)\]\]'), } #Check logs directory diff --git a/avbotload.py b/avbotload.py index 0f14ce4..f969cac 100644 --- a/avbotload.py +++ b/avbotload.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -# AVBOT - Anti-Vandalism BOT for MediaWiki projects -# Copyright (C) 2008-2010 Emilio José Rodríguez Posada +# AVBOT - Anti-vandalism bot for MediaWiki wikis +# Copyright (C) 2008-2016 emijrp # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -42,92 +42,6 @@ def changedRegexpsList(dic1, dic2): return True return False -def loadEdits(): - """ Carga fichero con número de ediciones """ - """ Load user edits file """ - newbie=avbotglobals.preferences['newbie'] - ediciones={} - filename=avbotglobals.preferences['editsFilename'] - try: - f=open(filename, "r") - except: - f=open(filename, "w") - f.write('') - f.close() - f=open(filename, "r") - l=ur"" - l=f.readline() - while l: - l=unicode(l, "utf-8") - if len(l)>=4: #dos ; y un caracter de nick y un numero de ediciones - #print l - tmp=l.split(";") - usuario=tmp[0] - numero=tmp[1] - if numero=='None': - numero=0 - if numero<1: #nos curamos en salud, por el bug de usuarios con acentos ej: Zósimo, Botellín (aunque a boteellin no deberia ni revisarlo por ser bot) - numero=newbie+1 - ediciones[usuario]=numero - l=f.readline() - f.close() - - wikipedia.output(u"Loaded info for %d users from \"%s\"" % (len(ediciones.items()), filename)) - - avbotglobals.userData['edits']=ediciones - -def loadUsers(group): - """ Captura lista de usuarios de Wikipedia según el tipo deseado """ - """ Fetch user list by class """ - - """vieja forma de hacerlo (un poco fea) users=[] - data=avbotglobals.preferences['site'].getUrl("/w/index.php?title=Special:Listusers&limit=5000&group=%s" % group) - data=data.split('') - data=data[1].split('')[0] - namespace=avbotcomb.namespaceTranslator(2) - m=re.compile(ur" title=\"%s:(.*?)\">" % namespace).finditer(data) - for i in m: - users.append(i.group(1)) - wikipedia.output(u"Loaded info for %d %ss from [[Special:Listusers]]" % (len(users), group)) - avbotglobals.userData[group]=users""" - users=[] - aufrom="!" - while aufrom: - query=wikipedia.query.GetData({'action':'query', 'list':'allusers', 'augroup':group, 'aulimit':'500', 'aufrom':aufrom},site=avbotglobals.preferences['site'],useAPI=True) - for allusers in query['query']['allusers']: - users.append(allusers['name']) - if query.has_key('query-continue'): - aufrom=query.has_key('query-continue') - else: - aufrom="" - wikipedia.output(u"Loaded info for %d %ss from [[Special:Listusers]]" % (len(users), group)) - avbotglobals.userData[group]=users - -def loadSysops(): - """ Carga lista de sysops """ - """ Load sysops list """ - loadUsers('sysop') - -def loadBots(): - """ Carga lista de bots """ - """ Load bots list """ - loadUsers('bot') - -def loadBureaucrats(): - """ Carga lista de bureaucrats """ - """ Load bureaucrats list """ - loadUsers('bureaucrat') - -def loadCheckusers(): - """ Carga lista de checkusers """ - """ Load checkusers list """ - loadUsers('checkuser') - -def loadStewards(): - """ Carga lista de stewards """ - """ Load stewards list """ - loadUsers('steward') - def loadMessages(): """ Carga preferencias sobre mensajes """ """ Load messages preferences """ @@ -247,55 +161,4 @@ def reloadRegexpList(author, diff): p.put(u'* {{subst:CURRENTDAY}} de {{subst:CURRENTMONTHNAME}} de {{subst:CURRENTYEAR}}, {{subst:CURRENTTIME}} (UTC): {{u|%s}} ha editado la página pero hay las mismas %d expresiones regulares válidas ([http://%s.wikipedia.org/w/index.php?title=User:%s/Lista_del_bien_y_del_mal.css&diff=%s&oldid=prev ver diff]).\n%s' % (author, len(avbotglobals.vandalRegexps), avbotglobals.preferences['language'], ownerNick, diff, p.get()), u'BOT - La lista no ha cambiado. Total [%d]' % len(avbotglobals.vandalRegexps), botflag=False, maxTries=1)""" return -def loadUserEdits(author): - """ Carga número de ediciones de un usuario en concreto """ - """ Load user edits number """ - #fix pasar esta función a query completa? - author_=re.sub(' ', '_', author) - try: - rawdata=avbotglobals.preferences['site'].getUrl("/w/api.php?action=query&list=users&ususers=%s&usprop=editcount&format=xml" % urllib.quote(author_)) - if re.search(avbotglobals.parserRegexps['loaduseredits-editcount'], rawdata): - m=avbotglobals.parserRegexps['loaduseredits-editcount-d'].finditer(rawdata) - for i in m: - editsnum=int(i.group(1)) - if editsnum<1: - return avbotglobals.preferences['newbie']+1 - else: - return editsnum - else: - return avbotglobals.preferences['newbie']+1 - except: - return avbotglobals.preferences['newbie']+1 -def loadExclusions(): - """ Carga lista de páginas excluidas """ - """ Load excluded pages list """ - #p=wikipedia.Page(avbotglobals.preferences['site'], u'User:%s/Exclusiones.css' % avbotglobals.preferences['ownerNick']) - p="" - if avbotglobals.preferences['site'].lang=='es': - p=wikipedia.Page(avbotglobals.preferences['site'], u'%s:Emijrp/%s' % (avbotglobals.namespaces[2], avbotglobals.preferences['exclusions'])) - else: - p=wikipedia.Page(avbotglobals.preferences['site'], u'%s:%s/%s' % (avbotglobals.namespaces[2], avbotglobals.preferences['ownerNick'], avbotglobals.preferences['exclusions'])) - raw='' - if p.exists(): - if not p.isRedirectPage() and not p.isDisambig(): - raw=p.get() - else: - wikipedia.output('A preferences page is needed in [[%s]]' % p.title()) - wikipedia.output('Introduce an excluded page per line. Without [[]]') - wikipedia.output('You can skip this with -force parameter') - if not avbotglobals.preferences['force']: - sys.exit() - - for l in raw.splitlines(): - l=l.strip() - if len(l)>=1: - if l[0]=='#' or l[0]=='<': - continue - l=re.sub(ur"(?im)^([^\#]*?)\#[^\n\r]*?$", ur"\1", l)#Clean inline comments - t=l.split(';;') - exclusion=t[0] - if not avbotglobals.excludedPages.has_key(exclusion): - avbotglobals.excludedPages[exclusion]=re.compile(ur"(?m)^%s$" % exclusion) - - wikipedia.output(u"Loaded %d page excluded pages..." % (len(avbotglobals.excludedPages.items()))) diff --git a/avbotml.py b/avbotml.py new file mode 100644 index 0000000..42d2441 --- /dev/null +++ b/avbotml.py @@ -0,0 +1,501 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +#todo +#ircbot https://fisheye.toolserver.org/browse/~raw,r=720/Bryan/TsLogBot/TsLogBot.py +#capacidad para leer CR de irc o de api + +import datetime +import os +import random +import re +import time +import thread +import threading +import urllib +import socket +import sys + +#modules from pywikipediabot +import query +import wikipedia + +#whitelisted users +wlgroups = [ + 'abusefilter', + 'bot', + 'bureaucrat', + 'checkuser', + 'founder', + 'import', + 'oversight', + 'researcher', + 'reviewer', + 'rollbacker', + 'steward', + 'sysop', + ] +users = {} #dic with users sorted by group +groups = {} +colours = { + 'anon': 'lightyellow', + 'bot': 'lightpurple', + 'sysop': 'lightblue', + '': 'lightgreen', + } +""" +colourcodes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f'] +colournames = ['black', 'blue', 'green', 'aqua', 'red', 'purple', 'yellow', 'white', 'grey', 'light blue', 'light green', 'light aqua', 'light red', 'light purple', 'light yellow', 'bright white'] +""" + +preferences = { + 'language': 'en', + 'family': 'wikipedia', + 'botname': 'AVBOT', + 'newbie': 25, + 'rcAPI': False, + 'rcIRC': True, + 'server': 'irc.wikimedia.org', + 'channel': '#en.wikipedia', + 'ircname': 'AVBOT%d' % (random.randint(10000,99999)), + 'userinfofile': 'userinfo.txt', + 'test': True, + 'testwiki': False, + 'testfile': True, + 'testfilename': 'avbotreverts-testing.txt', +} + +preferences['site'] = wikipedia.Site(preferences['language'], preferences['family']) +preferences['testwikipage'] = wikipedia.Page(preferences['site'], u'User:%s/Test' % (preferences['botname'])) + +regexps = [ + ur'(?i)\bf+u+c+k+\b', + ur'(?i)\b(h+a+){2,}\b', + ur'(?i)\bg+a+y+\b', + ur'(?i)\bf+a+g+s*\b', + ur'(?i)\ba+s+s+\b', + ur'(?i)\bb+i+t+c+h+(e+s+)?\b', + ] +cregexps = [] +ipregexp = re.compile(ur'\d+(\.\d+){3}') + +for regexp in regexps: + cregexps.append(re.compile(regexp)) + +def loadUsersFromGroup(group): + global users + + users[group] = {} + aufrom = '!' + while aufrom: + params = { + 'action': 'query', + 'list': 'allusers', + 'augroup': group, + 'aulimit': '500', + 'aufrom': aufrom, + } + data = query.GetData(params, site = preferences['site']) + if not 'error' in data.keys(): + for item in data['query']['allusers']: + user = item['name'] + users[group][user] = {'editcount': getUserEditcount(user), 'groups': getUserGroups(user)} + print user, users[group][user] + + if 'query-continue' in data.keys(): + aufrom = data['query-continue']['allusers']['aufrom'] + else: + aufrom = '' + +def getUserInfo(user): + editcount = 0 + if not isIP(user): + if users.has_key(user): + editcount = users[user]['editcount'] + + if editcount > preferences['newbie']: + if not random.randint(0, 20): #avoid update no newbies users too much + return editcount + + params = { + 'action': 'query', + 'list': 'users', + 'ususers': user, + 'usprop': 'editcount|groups', + } + data = query.GetData(params, site=preferences['site']) + if not 'error' in data.keys(): + editcount = 0 + if 'editcount' in query.GetData(params)['query']['users'][0].keys(): + editcount = int(query.GetData(params)['query']['users'][0]['editcount']) + groups = [] + if 'groups' in query.GetData(params)['query']['users'][0].keys(): + groups = query.GetData(params)['query']['users'][0]['groups'] + users[user] = {'editcount': editcount, 'groups': groups, } + + saveUserInfo() + +def getUserEditcount(user): + if isIP(user): + return 0 + + if users.has_key(user): + return users[user]['editcount'] + else: + getUserInfo(user) + return users[user]['editcount'] + +def getUserGroups(user): + if isIP(user): + return [] + + if users.has_key(user): + return users[user]['groups'] + else: + getUserInfo(user) + return users[user]['groups'] + +def saveUserInfo(): + f = open(preferences['userinfofile'], 'w') + + for user, props in users.items(): + #print props + line = u'%s\t%d\t%s\n' % (user, props['editcount'], ','.join(props['groups'])) + f.write(line.encode('utf-8')) + + f.close() + +def loadUserInfo(): + global users + + if not os.path.exists(preferences['userinfofile']): + #creating empty file + saveUserInfo() + + f = open(preferences['userinfofile'], 'r') + for line in f: + line = unicode(line, 'utf-8') + line = line[:-1] + if line: + user, editcount, groups = line.split('\t') + users[user] = {'editcount': int(editcount), 'groups': groups.split(',')} + f.close() + +def loadGroups(): + #Info about groups: http://www.mediawiki.org/wiki/Manual:User_rights + global groups + + groups = [] + params = { + 'action': 'query', + 'meta': 'siteinfo', + 'siprop': 'usergroups', + } + data = query.GetData(params, site=preferences['site']) + if not 'error' in data.keys(): + for item in query.GetData(params)['query']['usergroups']: + groups.append(item['name']) + +def loadData(): + #users + loadGroups() + print 'Loaded %d groups: %s' % (len(groups), ', '.join(groups)) + print 'Loaded %d whitelisted groups: %s' % (len(wlgroups), ', '.join(wlgroups)) + + loadUserInfo() + print 'Loaded userinfo for %d users' % (len(users.keys())) + + #other interesting data... + +def editIsBlanking(edit_props): + lenNew = len(edit_props['newText']) + lenOld = len(edit_props['oldText']) + + if lenNew < lenOld and \ + not re.search(ur"(?i)# *REDIRECT", edit_props['newText']): + #Avoid articles converted into #REDIRECT [[...]] and other legitimate blankings + percent = (lenOld-lenNew)/(lenOld/100.0) + if (lenOld>=500 and lenOld<1000 and percent>=90) or \ + (lenOld>=1000 and lenOld<2500 and percent>=85) or \ + (lenOld>=2500 and lenOld<5000 and percent>=75) or \ + (lenOld>=5000 and lenOld<10000 and percent>=72.5) or \ + (lenOld>=10000 and lenOld<20000 and percent>=70) or \ + (lenOld>=20000 and percent>=65): + return True + + return False + +def editIsTest(edit_props): + + + return False + +def editIsVandalism(edit_props): + for regexp in cregexps: + if re.search(regexp, edit_props['newText']) and \ + not re.search(regexp, edit_props['oldText']): + return True + + return False + +def editIsVanish(edit_props): + return False + +def userwarning(): + #enviar mensajes según el orden que ya tengan los de la discusión + pass + +def reverted(): + return False + +def revert(edit_props, motive=""): + #revertir usando rollback y sino hay (detectar mirando los grupos del bot) buscar la ultima edicion de un no-vandalo (bajarme las ultimas 10? ediciones del historial?) y hacer .put + #print "Detected edit to revert: %s" % motive + + #revertind code + #revert all edits by this user + stableoldid = '' + stableuser = '' + + #print edit_props['history'] + for revision in edit_props['history']: + if revision[2] != edit_props['user']: + stableoldid = revision[0] + stableuser = revision[2] + break #nos quedamos con la más reciente que sea válida + + print '--->', edit_props['title'], stableoldid, edit_props['oldid'], '<----' + if stableoldid and str(stableoldid) == str(edit_props['oldid']): + if preferences['testwiki']: + output = u'\n* %s [[%s]] [{{SERVER}}/w/index.php?diff=next&oldid=%s]' % (edit_props['timestamp'], edit_props['title'], edit_props['diff']) + #preferences['testwikipage'].put(output, u'BOT - Adding one more: [[%s]]' % (edit_props['title'])) + elif preferences['testfile']: + output = u'\n* %s [[%s]] [{{SERVER}}/w/index.php?diff=next&oldid=%s]' % (edit_props['timestamp'], edit_props['title'], edit_props['diff']) + f=open(preferences['testfilename'], 'a') + f.write(output.encode('utf-8')) + f.close() + else: + pass + #edit_props['page'].put(edit_props['oldText'], u'BOT - Reverting to %s version by [[User:%s|%s]]' % (stableoldid, stableuser, stableuser)) + + #end code + + if reverted(): #a lo mejor lo ha revertido otro bot u otra persona + pass + #userwarning() + else: + print "Somebody was faster than us reverting. Reverting not needed" + +def editWar(edit_props): + #comprueba si esa edición ya existe previamente en el historial, por lo que el usuario está insistiendo en que permanezca + #primero con la longitud, y si hay semejanzas, entonces se compara con el texto completo + return False + +def analize(edit_props): + if editWar(edit_props): + #http://es.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Francia&rvprop=size&rvend=2010-07-25T14:54:54Z + print "Saltamos para evitar una guerra de ediciones" + return + elif mustBeAnalysed(edit_props): + #preparing data + #get last edits in history + t1=time.time() + #todo http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Aa&rvlimit=2&rvprop=ids|timestamp|user|comment|content + #comparar ids con diff y oldid para ver si han revertido ya + #si coinciden, capturar texts + + params = { + 'action': 'query', + 'prop': 'revisions', + 'titles': edit_props['title'], + 'rvlimit': '10', + 'rvprop': 'ids|timestamp|user|comment|content', + } + data = query.GetData(params, site = preferences['site']) + if not 'error' in data.keys(): + rnew = data['query']['pages'][data['query']['pages'].keys()[0]]['revisions'][0] + rold = data['query']['pages'][data['query']['pages'].keys()[0]]['revisions'][1] + + edit_props['oldText'] = '' + edit_props['newText'] = '' + + if str(rnew['revid']) == str(edit_props['diff']) and str(rold['revid']) == str(edit_props['oldid']): + edit_props['oldText'] = rold['*'] + edit_props['newText'] = rnew['*'] + + print list(set(set(edit_props['newText'].split())-set(edit_props['oldText'].split())))[:10] + + line = u'%s %s %s %s' % (edit_props['title'], time.time()-t1, len(edit_props['oldText']), len(edit_props['newText'])) + wikipedia.output(u'\03{lightyellow}%s\03{default}' % line) + + if editIsBlanking(edit_props): + #revert(edit_props, motive="blanking") + wikipedia.output(u'\03{lightred}-> *Blanking* detected in [[%s]] (%s)\03{default}' % (edit_props['title'], edit_props['change'])) + elif editIsTest(edit_props): + wikipedia.output(u'\03{lightred}-> *Test* detected in [[%s]] (%s)\03{default}' % (edit_props['title'], edit_props['change'])) + elif editIsVandalism(edit_props): + wikipedia.output(u'\03{lightred}-> *Vandalism* detected in [[%s]] (%s)\03{default}' % (edit_props['title'], edit_props['change'])) + #revert(edit_props, motive="vandalism") + elif editIsVanish(edit_props): + wikipedia.output(u'\03{lightred}-> *Vanish* detected in [[%s]] (%s)\03{default}' % (edit_props['title'], edit_props['change'])) + else: + pass + +def isIP(user): + if re.search(ipregexp, user): + t = user.split('.') + if len(t) == 4 and \ + int(t[0])>=0 and int(t[0])<=255 and \ + int(t[1])>=0 and int(t[1])<=255 and \ + int(t[2])>=0 and int(t[2])<=255 and \ + int(t[3])>=0 and int(t[3])<=255: + return True + return False + +def mustBeAnalysed(edit_props): + # decide if an edit must be analysed + + useredits = getUserEditcount(edit_props['user']) + + #namespace filter + if edit_props['page'].namespace() != 0: + return False + + #anon filter + if isIP(edit_props['user']): + return True + + #group filter + for wlgroup in wlgroups: + if wlgroup in users[edit_props['user']]['groups']: + return False + + #edit number filter + if useredits <= preferences['newbie']: + return True + + return False + +def fetchedEdit(edit_props): + timestamp = edit_props['timestamp'].split('T')[1].split('Z')[0] + change = edit_props['change'] + if change >= 0: + change = '+%d' % (change) + + colour = 'lightyellow' #default + if getUserEditcount(edit_props['user']) > preferences['newbie']: + colour = 'lightgreen' + for group in getUserGroups(edit_props['user']): #for users with importan flags (stewards, oversight) but probably low editcounts + if group in wlgroups: + colour = 'lightblue' + if 'bot' in getUserGroups(edit_props['user']): + colour = 'lightpurple' + + line = u'%s [[%s]] {\03{%s}%s\03{default}, %d ed.} (%s)' % (timestamp, edit_props['title'], colour, edit_props['user'], getUserEditcount(edit_props['user']), change) + if not editWar(edit_props) and mustBeAnalysed(edit_props): + wikipedia.output(u'== Analyzing ==> %s' % line) + thread.start_new_thread(analize, (edit_props,)) + else: + wikipedia.output(line) + +def rcAPI(): + site = wikipedia.Site("en", "wikipedia") + + rctimestamp = "" + rcs = site.recentchanges(number=1) + for rc in rcs: + rctimestamp = rc[1] + + rcdir = "newer" + rchistory = [] + while True: + rcs = site.recentchanges(number=100, rcstart=rctimestamp, rcdir=rcdir) #fix no devuelve los oldid, mejor hacerme mi propia wikipedia.query + + for rc in rcs: + rcsimple = [rc[0].title(), rc[1], rc[2], rc[3]] + if rcsimple not in rchistory: + rchistory = rchistory[-1000:] + rchistory.append(rcsimple) + edit_props = {'page': rc[0], 'title': rc[0].title(), 'timestamp': rc[1], 'user': rc[2], 'comment': rc[3], } + thread.start_new_thread(fetchedEdit, (edit_props,)) + rctimestamp = rc[1] + time.sleep(3) + +def rcIRC(): + #partially from Bryan ircbot published with MIT License http://toolserver.org/~bryan/TsLogBot/TsLogBot.py + + while True: + try: + conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + conn.connect((preferences['server'], 6667)) + + conn.sendall('USER %s * * %s\r\n' % (preferences['ircname'], preferences['ircname'])) + conn.sendall('NICK %s\r\n' % (preferences['ircname'])) + conn.sendall('JOIN %s\r\n' % (preferences['channel'])) + + buffer = '' + while True: + if '\n' in buffer: + line = buffer[:buffer.index('\n')] + buffer = buffer[len(line) + 1:] + line = line.strip() + #print >>sys.stderr, line + + data = line.split(' ', 3) + if data[0] == 'PING': + conn.sendall('PONG %s\r\n' % data[1]) + elif data[1] == 'PRIVMSG': + nick = data[0][1:data[0].index('!')] + target = data[2] + message = data[3][1:] + message = unicode(message, 'utf-8') + message = re.sub(ur'\x03\d{0,2}', ur'', message) #No colors + message = re.sub(ur'\x02\d{0,2}', ur'', message) #No bold + if target == preferences['channel']: + if message.startswith('\x01ACTION'): + pass #log('* %s %s' % (nick, message[8:])) + else: + #todo esta regexp solo vale para ediciones, las páginas nuevas tienen rcid= y no diff: http://en.wikipedia.org/w/index.php?oldid=385928375&rcid=397223378 + m = re.compile(ur'(?im)^\[\[(?P.+?)\]\]\s+(?P<flag>[NMB]*?)\s+(?P<url>http://.+?diff=(?P<diff>\d+?)\&oldid=(?P<oldid>\d+?))\s+\*\s+(?P<user>.+?)\s+\*\s+\((?P<change>[\-\+]\d+?)\)\s+(?P<comment>.*?)$').finditer(message) + for i in m: + #flag, change, url + edit_props = {'page': wikipedia.Page(preferences['site'], i.group('title')), 'title': i.group('title'), 'timestamp': datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'), 'user': i.group('user'), 'comment': i.group('comment'), 'diff': i.group('diff'), 'oldid': i.group('oldid'), 'change': int(i.group('change'))} + thread.start_new_thread(fetchedEdit, (edit_props,)) + pass #log('<%s>\t%s' % (nick, message)) + else: + data = conn.recv(1024) + if not data: raise socket.error + buffer += data + except socket.error, e: + print >>sys.stderr, 'Socket error!', e + +def run(): + #irc or api + #por cada usuairo que llegue nuevo list=users (us) para saber cuando se registró + #evitar que coja ediciones repetidas + + if preferences["rcAPI"]: + rcAPI() + elif preferences["rcIRC"]: + rcIRC() + else: + print 'You have to choice a feed mode: --irc or --api' + +def welcome(): + print "#"*80, "\n# Welcome to AVBOT 2.0 \n", "#"*80 + + #running message? + #page = wikipedia.Page(preferences['site'], u'User:AVBOT/Sandbox') + #page.put(u'%d' % (random.randint(1000, 9999)), u'BOT - Testing') + +def bye(): + print "Bye, bye..." + +def main(): + welcome() + loadData() + run() + bye() + +if __name__ == '__main__': + main() diff --git a/stream.sample b/stream.sample new file mode 100644 index 0000000..11e6faf --- /dev/null +++ b/stream.sample @@ -0,0 +1,38 @@ +{'id': 847156565, 'timestamp': 1469721345, 'user': 'Mervat Salman', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 691696645, 'new': 731950145}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 5503, 'new': 5532}, 'title': 'Khorovats', 'comment': 'added [[Category:National dishes]] using [[WP:HC|HotCat]]', 'server_script_path': '/w'} +{'id': 847156566, 'timestamp': 1469721346, 'user': 'Jim Michael', 'server_name': 'en.wikipedia.org', 'minor': True, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731942401, 'new': 731950146}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 11063, 'new': 11061}, 'title': 'Jacques Hamel', 'comment': '', 'server_script_path': '/w'} +{'id': 847156567, 'timestamp': 1469721346, 'user': 'Anglicanus', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731914286, 'new': 731950148}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 4945, 'new': 4965}, 'title': 'Vincent Long Van Nguyen', 'comment': 'Undid revision 731914286 by [[Special:Contributions/120.155.17.46|120.155.17.46]] ([[User talk:120.155.17.46|talk]]) no reason provided', 'server_script_path': '/w'} +{'id': 847156568, 'timestamp': 1469721345, 'user': 'Mervat Salman', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:National dishes', 'comment': '[[:Khorovats]] added to category', 'server_script_path': '/w'} +{'id': None, 'wiki': 'enwiki', 'log_action': 'hit', 'user': '86.23.86.247', 'log_action_comment': '86.23.86.247 triggered [[Special:AbuseFilter/384|filter 384]], performing the action "edit" on [[Douglas Macmillan]]. Actions taken: Disallow ([[Special:AbuseLog/16228845|details]])', 'server_name': 'en.wikipedia.org', 'timestamp': 1469721348, 'type': 'log', 'log_id': 0, 'log_type': 'abusefilter', 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'title': 'Douglas Macmillan', 'log_params': {'log': 16228845, 'filter': 384, 'actions': 'disallow', 'action': 'edit'}, 'comment': '', 'server_script_path': '/w'} +{'id': 847156569, 'timestamp': 1469721346, 'user': 'Smallbones', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731948705, 'new': 731950147}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 87232, 'new': 86873}, 'title': 'Bernard Madoff', 'comment': '/* Jewish "affinity fraud" */ "Most of" not in source: OR', 'server_script_path': '/w'} +{'id': 847156579, 'timestamp': 1469721348, 'user': 'Qed237', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731949862, 'new': 731950149}, 'server_url': 'https://en.wikipedia.org', 'namespace': 10, 'bot': False, 'length': {'old': 3729, 'new': 3722}, 'title': 'Template:2016–17 Danish Superliga Regular Season table', 'comment': 'Standard', 'server_script_path': '/w'} +{'id': 847156580, 'timestamp': 1469721348, 'user': 'Boopbee', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 730255258, 'new': 731950150}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 10889, 'new': 10891}, 'title': 'Florida Gulf Coast Eagles', 'comment': 'Edited the name of the fight song to reflect the new version of the fight song.', 'server_script_path': '/w'} +{'id': 847156581, 'timestamp': 1469721349, 'user': '68.255.220.2', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 725819579, 'new': 731950151}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 6580, 'new': 6580}, 'title': 'Harry Winks', 'comment': '', 'server_script_path': '/w'} +{'id': 847156582, 'timestamp': 1469721350, 'user': 'Parsley Man', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731925897, 'new': 731950152}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 4202, 'new': 4084}, 'title': 'African American Defense League', 'comment': "Already redundant, given the last sentence's description.", 'server_script_path': '/w'} +{'id': 847156583, 'timestamp': 1469721350, 'user': 'Cormac1cormac1', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 679593302, 'new': 731950154}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 1241, 'new': 1403}, 'title': "O'Higgins (surname)", 'comment': '/* People of the Surname (Not always related to each other */', 'server_script_path': '/w'} +{'id': 847156584, 'timestamp': 1469721350, 'user': 'Ss112', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731948111, 'new': 731950153}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 18427, 'new': 18820}, 'title': 'Ben Lee', 'comment': '/* Promotional singles */ Added songs.', 'server_script_path': '/w'} +{'id': 847156585, 'timestamp': 1469721350, 'user': 'BlameRuiner', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 726054540, 'new': 731950155}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 1960, 'new': 2009}, 'title': 'Dmitry Dudar', 'comment': '', 'server_script_path': '/w'} +{'id': 847156586, 'timestamp': 1469721351, 'user': 'Masum Ibn Musa', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731950123, 'new': 731950156}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 1551, 'new': 1581}, 'title': 'Ariana Marie', 'comment': 'Added {{[[Template:notability|notability]]}} tag to article ([[WP:TW|TW]])', 'server_script_path': '/w'} +{'id': 847156587, 'timestamp': 1469721352, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'minor': True, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 695362368, 'new': 731950157}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 4987, 'new': 4964}, 'title': 'Vasiliki Angelopoulou', 'comment': 'diffusing swimmers-by-nationlity into gendered subcats, removed: [[Category:Greek sportswomen]] using [[Project:AWB|AWB]]', 'server_script_path': '/w'} +{'id': 847156588, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731611482, 'new': 731950158}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 767, 'new': 26}, 'title': 'Cooper Barnes', 'comment': 'convert [[WP:PROD]] to redirect', 'server_script_path': '/w'} +{'id': 847156589, 'timestamp': 1469721350, 'user': 'BlameRuiner', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:FC Slutsk players', 'comment': '[[:Dmitry Dudar]] added to category', 'server_script_path': '/w'} +{'id': 847156590, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Unreferenced BLPs from July 2016', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156591, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:All unreferenced BLPs', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156592, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:All articles proposed for deletion', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156593, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:BLP articles proposed for deletion', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156594, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:BLP articles proposed for deletion by days left', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156595, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Articles with hCards', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156596, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:1979 births', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156597, 'timestamp': 1469721352, 'user': 'Kvng', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Living people', 'comment': '[[:Cooper Barnes]] removed from category', 'server_script_path': '/w'} +{'id': 847156598, 'timestamp': 1469721352, 'user': 'MWright96', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 723898523, 'new': 731950159}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 16853, 'new': 16852}, 'title': 'Culture of London', 'comment': 'removed unused space', 'server_script_path': '/w'} +{'id': 847156599, 'timestamp': 1469721351, 'user': 'Masum Ibn Musa', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Articles with topics of unclear notability from July 2016', 'comment': '[[:Ariana Marie]] added to category', 'server_script_path': '/w'} +{'id': 847156600, 'timestamp': 1469721351, 'user': 'Masum Ibn Musa', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:All articles with topics of unclear notability', 'comment': '[[:Ariana Marie]] added to category', 'server_script_path': '/w'} +{'id': 847156601, 'timestamp': 1469721352, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek female swimmers', 'comment': '[[:Vasiliki Angelopoulou]] added to category', 'server_script_path': '/w'} +{'id': 847156602, 'timestamp': 1469721352, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek swimmers', 'comment': '[[:Vasiliki Angelopoulou]] removed from category', 'server_script_path': '/w'} +{'id': 847156603, 'timestamp': 1469721352, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek sportswomen', 'comment': '[[:Vasiliki Angelopoulou]] removed from category', 'server_script_path': '/w'} +{'id': 847156604, 'timestamp': 1469721352, 'user': '5.197.44.153', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731950032, 'new': 731950160}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 10467, 'new': 10490}, 'title': 'Kara Koyunlu', 'comment': '', 'server_script_path': '/w'} +{'id': 847156605, 'timestamp': 1469721354, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'minor': True, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 723491011, 'new': 731950161}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 4309, 'new': 4285}, 'title': 'Kalliopi Araouzou', 'comment': 'diffusing swimmers-by-nationlity into gendered subcats, removed: [[Category:Greek sportswomen]] using [[Project:AWB|AWB]]', 'server_script_path': '/w'} +{'id': 847156606, 'timestamp': 1469721354, 'user': 'SteveJEsposito', 'server_name': 'en.wikipedia.org', 'minor': False, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 731950056, 'new': 731950162}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 7221, 'new': 7220}, 'title': 'The Fall of Colossus', 'comment': '/* Plot summary */ Corrected errant plural', 'server_script_path': '/w'} +{'id': 847156607, 'timestamp': 1469721354, 'user': 'Jbpriede', 'server_name': 'en.wikipedia.org', 'minor': True, 'type': 'edit', 'wiki': 'enwiki', 'revision': {'old': 730320727, 'new': 731950163}, 'server_url': 'https://en.wikipedia.org', 'namespace': 0, 'bot': False, 'length': {'old': 28503, 'new': 29084}, 'title': 'Solyndra', 'comment': 'Updates to $1.5B lawsuit', 'server_script_path': '/w'} +{'id': 847156608, 'timestamp': 1469721354, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek female swimmers', 'comment': '[[:Kalliopi Araouzou]] added to category', 'server_script_path': '/w'} +{'id': 847156609, 'timestamp': 1469721354, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek swimmers', 'comment': '[[:Kalliopi Araouzou]] removed from category', 'server_script_path': '/w'} +{'id': 847156610, 'timestamp': 1469721354, 'user': 'BrownHairedGirl', 'server_name': 'en.wikipedia.org', 'type': 'categorize', 'wiki': 'enwiki', 'server_url': 'https://en.wikipedia.org', 'namespace': 14, 'bot': False, 'title': 'Category:Greek sportswomen', 'comment': '[[:Kalliopi Araouzou]] removed from category', 'server_script_path': '/w'}