Skip to content

Commit

Permalink
assessing vandalism, test edits and blanking
Browse files Browse the repository at this point in the history
  • Loading branch information
emijrp committed Jul 29, 2016
1 parent 72fbdf8 commit 2d1305a
Show file tree
Hide file tree
Showing 7 changed files with 688 additions and 363 deletions.
107 changes: 93 additions & 14 deletions avbot.py
Expand Up @@ -382,6 +382,8 @@ def isUserNewbie(self, user):
self.users[user] = {'groups': userprops['groups'], 'whitelisted': False, 'editcount': userprops['editcount']}
return self.isUserNewbie(user)



def analyseChange(self, change):
change['timestamp_utc'] = datetime.datetime.fromtimestamp(change['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
if change['user'] == self.wikiBotName: # Ignore own edits
Expand Down Expand Up @@ -506,24 +508,101 @@ def rcIRC(self):
ircbuffer += data.decode('utf-8')
except socket.error:
print >>sys.stderr, 'Socket error!'

def analyseEdit(self, change):

def getDiff(self, change):
""" Devuelve el diff de dos revisiones """
""" Return a diff of two revisions """

query = pywikibot.data.api.Request(parameters={'action': 'compare', 'fromrev': change['revision']['old'], 'torev': change['revision']['new']}, site=self.site)
data = query.submit()
diff = {'added': [], 'deleted': []}
if 'compare' in data and '*' in data['compare']:
added = []
m = re.findall(r'(?im)<ins [^<>]*?>([^<>]*?)</ins>', data['compare']['*'])
for i in m:
added.append(i)
m = re.findall(r'(?im)<td class="diff-addedline"><div>([^<>]*?)</div></td>', data['compare']['*'])
for i in m:
added.append(i)
added_plain = '\n'.join(added)

for filterr in self.filters:
m = re.findall(filterr['compiled'], added_plain)
diff['added'] += re.findall(r'(?im)<ins [^<>]*?>([^<>]*?)</ins>', data['compare']['*'])
diff['added'] += re.findall(r'(?im)<td class="diff-addedline"><div>([^<>]*?)</div></td>', data['compare']['*'])
diff['deleted'] += re.findall(r'(?im)<del [^<>]*?>([^<>]*?)</ins>', data['compare']['*'])
diff['deleted'] += re.findall(r'(?im)<td class="diff-deletedline"><div>([^<>]*?)</div></td>', data['compare']['*'])
return diff

def getScore(self, diff):
""" Calcula la puntuación para un diff al pasarle los filtros """
""" Calculate score for diff using filters """

score = {
'test': {'added_score': 0, 'deleted_score': 0},
'vandalism': {'added_score': 0, 'deleted_score': 0},
'global': {'score': 0, 'group': 'unknown'},
}
for ifilter in self.filters:
for iadded in diff['added']:
m = re.findall(ifilter['compiled'], iadded)
for i in m:
print("!!!Encontrado %s (%s score)" % (filterr['regexp'], filterr['score']))
print("!!!Añadido %s (%s score)" % (ifilter['regexp'], ifilter['score']))
score[ifilter['group']]['added_score'] += ifilter['score']
for ideleted in diff['deleted']:
m = re.findall(ifilter['compiled'], ideleted)
for i in m:
print("!!!Eliminado %s (%s score)" % (ifilter['regexp'], ifilter['score'] * -1))
score[ifilter['group']]['deleted_score'] += ifilter['score'] * -1

score['global']['score'] = (score['test']['added_score'] + score['vandalism']['added_score']) + \
(score['test']['deleted_score'] + score['vandalism']['deleted_score'])
score['global']['group'] = score['test']['added_score'] <= score['vandalism']['added_score'] and 'test' or 'vandalism'
return score

def revertEdit(self, change, alledits=False):
""" Revierte una edición de un usuario o todas sus ediciones """
""" Revert one or all edits by a user """

print("---> Reverting %s edit(s) by %s" % (change['revision']['new'], change['user']))
pass

def isEditBlanking(self, change):
""" Evalúa si una edición es un blanqueo """
""" Assess whether an edit is a blanking """

lenOld = change['length']['old']
lenNew = change['length']['new']
if lenNew < lenOld and \
not re.search(r'(?im)(redirect|redirección)', '\n'.join(change['diff']['added'])):
percent = (lenOld-lenNew)/(lenOld/100.0)
if (lenOld>=500 and lenOld<1000 and percent>=90) or \
(lenOld>=1000 and lenOld<2500 and percent>=85) or \
(lenOld>=2500 and lenOld<5000 and percent>=75) or \
(lenOld>=5000 and lenOld<10000 and percent>=72.5) or \
(lenOld>=10000 and lenOld<20000 and percent>=70) or \
(lenOld>=20000 and percent>=65):
return True
return False

def isEditVandalism(self, change, score):
vandalismthreshold = -4
vandalismdensity = 150
if score['global']['group'] == 'vandalism':
if score['global']['score'] <= vandalismthreshold:
return True
elif score['global']['score'] < 0 and
return False

def analyseEdit(self, change):
""" Analiza una edición """
""" Analyse one edit """

diff = self.getDiff(change)
change['diff'] = diff
score = self.getScore(diff)
print("Score: %s" % (score))

# calcular score general o por tipos mejor?
if self.isEditTest(change, score):
self.revertEdit(change)
self.sendMessage(change, message='test')
elif self.isEditBlanking(change):
self.revertEdit(change)
self.sendMessage(change, message='blanking')
elif self.isEditVandalism(change, score):
self.revertEdit(change)
self.sendMessage(change, message='vandalism')

"""
for m in match:
Expand Down
26 changes: 1 addition & 25 deletions avbotanalysis.py
Expand Up @@ -356,31 +356,7 @@ def mustBeReverted(editData, cleandata, userClass):
editData['details']=u"" #no olvidar
return revertAllEditsByUser(editData, userClass, regexplist) #Revert

#Blanking edit?
lenOld=editData['lenOld']
lenNew=editData['lenNew']
if lenNew<lenOld and not re.search(avbotglobals.parserRegexps['blanqueos'], editData['newText']): #Avoid articles converted into #REDIRECT [[...]] and other legitimate blankings
percent=(lenOld-lenNew)/(lenOld/100.0)
if (lenOld>=500 and lenOld<1000 and percent>=90) or \
(lenOld>=1000 and lenOld<2500 and percent>=85) or \
(lenOld>=2500 and lenOld<5000 and percent>=75) or \
(lenOld>=5000 and lenOld<10000 and percent>=72.5) or \
(lenOld>=10000 and lenOld<20000 and percent>=70) or \
(lenOld>=20000 and percent>=65):
editData['type']='bl'
editData['score']=-(editData['lenNew']+1) #la puntuacion de los blanqueos es la nueva longitud + 1, negada, para evitar el -0
editData['details']=u''

return revertAllEditsByUser(editData, userClass, regexplist) #Revert
"""
if editData['lenOld']>=1000 and editData['lenNew']<=500 and editData['lenNew']<editData['lenOld']/7: # 1/7 es un buen numero, 85,7%
editData['type']='bl'
editData['score']=-(editData['lenNew']+1) #la puntuacion de los blanqueos es la nueva longitud + 1, negada, para evitar el -0
editData['details']=u''
return revertAllEditsByUser(editData, userClass, regexplist) #Revert
"""
#TODO: Blanking line like this, All glory to the hypnoto


#Interwiki and categories blanking. Example: http://es.wikipedia.org/w/index.php?title=Reciclaje&diff=34127808&oldid=34116543
oldCategoriesNumber=len(re.findall(avbotglobals.parserRegexps['categories'], editData['oldText']))
Expand Down
67 changes: 10 additions & 57 deletions avbotcomb.py
Expand Up @@ -163,19 +163,19 @@ def magicInterwiki(page, resumen, idioma):
else:
return nuevo, resumen

def namespaceTranslator(namespace):
def namespaceTranslator(namespaceid):
""" Carga espacios de nombres por idioma """
""" Load namespace per language """
data=avbotglobals.preferences['site'].getUrl("/w/index.php?title=Special:RecentChanges")
data=data.split('<select id="namespace" name="namespace" class="namespaceselector">')[1].split('</select>')[0]
m=re.compile(ur'<option value="([1-9]\d*)">(.*?)</option>').finditer(data)
wikipedianm=u''

data = avbotglobals.preferences['site'].getUrl("/w/index.php?title=Special:RecentChanges")
data = data.split('<select id="namespace" name="namespace" class="namespaceselector">')[1].split('</select>')[0]
m = re.compile(ur'<option value="(?P<nmid>[0-9]+)">(?P<nmname>[^<>]+?)</option>').finditer(data)
for i in m:
number=int(i.group(1))
name=i.group(2)
if number==namespace:
wikipedianm+=name
return wikipedianm
nmid = int(i.group('nmid'))
nmname = i.group('nmname')
if namespaceid == nmid:
return nmname
return ''

def resumeTranslator(editData):
""" Traductor de resúmenes de edición primitivo """
Expand Down Expand Up @@ -261,25 +261,6 @@ def getParameters():
wikipedia.output(u"Not all obligatory parameters were found. Please, check (*) parameters.")
sys.exit()

def getTime():
""" Coge la hora del sistema """
""" Get system time """
return time.strftime('%H:%M:%S')

def encodeLine(line):
""" Codifica una cadena en UTF-8 a poder ser """
""" Encode string into UTF-8 """

try:
line2=unicode(line,'utf-8')
except UnicodeError:
try:
line2=unicode(line,'iso8859-1')
except UnicodeError:
print u'Unknown codification'
return ''
return line2

def getUserClass(editData):
""" Averigua el tipo de usuario del que se trata """
""" Check user class """
Expand Down Expand Up @@ -343,31 +324,3 @@ def checkBlockInEnglishWikipedia(editData):

return comment, isProxy

def checkForUpdates():
fullpath = "/"+"/".join(os.path.abspath( __file__ ).split("/")[:-1])+"/"
svn='http://avbot.googlecode.com/svn/trunk/'
f=urllib.urlopen(svn)
html=f.read()
m=re.compile(ur">(?P<filename>[^<]+?\.py)</a>").finditer(html)
for i in m:
filename=i.group("filename")
wikipedia.output(u"Checking file %s..." % filename)
g=open(fullpath+filename, 'r')
h=urllib.urlopen(svn+filename)
if g.read()!=h.read():
wikipedia.output(u"%s has changed!!!" % filename)
g.close()
return True
else:
wikipedia.output(u"OK!")
g.close()
f.close()
return False

def existenceFile():
while True:
if not os.path.isfile(avbotglobals.existFile):
existFile=open(avbotglobals.existFile, 'w')
existFile.write(str("hi"))
existFile.close()
time.sleep(60) # debe ser menor que el time del cron / 2

0 comments on commit 2d1305a

Please sign in to comment.