Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Prevent re-listing in less than 30 days.

The words in a listing are now scraped into a bag of words. All new listings are now checked against previous listings from the same email domain. If there is a more than 60% match in the bags of words, the listing is refused. We use this approach instead of a direct comparison to make it more robust. Listings will not be considered unique with simple formatting changes, by shifting words around, or by substituting a few throwaway lines. They'll need to be substantially different. Listings are checked against the email domain rather than email address because new addresses are cheap while new domains are not.

This commit also fixes a long known bug that allowed the email address to be changed after a listing was published.
  • Loading branch information...
commit 09cd6ff99def98b21fb87a60551943702db11212 1 parent b8e1d38
@jace jace authored
View
2  forms.py
@@ -38,7 +38,7 @@ class ListingForm(Form):
validators=[Required(u"If this job doesn’t have a fixed location, use “Anywhere”")])
job_relocation_assist = BooleanField("Relocation assistance available")
job_description = TextAreaField("Description",
- description=u"Our apologies for the mismatched font you see here. We’re working on it.",
+ description=u"Our apologies for the mismatched font you see here. We’re working on it",
@jace Owner
jace added a note

Yeah, still working on it. We've now got a solid use case history between a HTML editor in the job board and a Markdown editor in Funnel, and I still can't decide which approach is better.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
validators=[Required("A description of the job is required")])
job_perks = BooleanField("Job perks are available")
job_perks_description = TextAreaField("Describe job perks",
View
1  models.py
@@ -126,6 +126,7 @@ class JobPost(db.Model):
md5sum = db.Column(db.String(32), nullable=False, index=True)
# Payment, audit and workflow fields
+ words = db.Column(db.UnicodeText, nullable=True) # All words in description, perks and how_to_apply
promocode = db.Column(db.String(40), nullable=True)
status = db.Column(db.Integer, nullable=False, default=POSTSTATUS.DRAFT)
ipaddr = db.Column(db.String(45), nullable=False)
View
8 sass/_layout.scss
@@ -193,6 +193,9 @@ footer {
.post-company-logo {
float: right;
}
+ #apply-info {
+ line-height: 1.5;
+ }
#detailed-info {
float: left;
max-width: 600px;
@@ -524,5 +527,10 @@ h2 {
margin: 1em 0;
}
+#newpost_headline:hover {
+ @include border-radius(2px);
+ @include box-shadow(#ccf 0 0 5px, #ccf 0 0 5px 0 inset);
+}
+
/* Spam protection */
.z {display: none;}
View
16 static/css/screen.css
@@ -531,6 +531,9 @@ footer {
#sheet .post-company-logo {
float: right;
}
+#sheet #apply-info {
+ line-height: 1.5;
+}
#sheet #detailed-info {
float: left;
max-width: 600px;
@@ -930,6 +933,19 @@ h2 {
margin: 1em 0;
}
+#newpost_headline:hover {
+ -moz-border-radius: 2px;
+ -webkit-border-radius: 2px;
+ -o-border-radius: 2px;
+ -ms-border-radius: 2px;
+ -khtml-border-radius: 2px;
+ border-radius: 2px;
+ -moz-box-shadow: #ccccff 0 0 5px, #ccccff 0 0 5px 0 inset;
+ -webkit-box-shadow: #ccccff 0 0 5px, #ccccff 0 0 5px 0 inset;
+ -o-box-shadow: #ccccff 0 0 5px, #ccccff 0 0 5px 0 inset;
+ box-shadow: #ccccff 0 0 5px, #ccccff 0 0 5px 0 inset;
+}
+
/* Spam protection */
.z {
display: none;
View
2  templates/detail.html
@@ -75,7 +75,7 @@
</div>
</div>
</div>
- <div class="section">
+ <div class="section" id="apply-info">
<h2>Apply for this position</h2>
<p>{{ post.how_to_apply|scrubemail(('z', 'y')) }}</p>
</div>
View
54 utils.py
@@ -188,6 +188,60 @@ def convertemail(m):
return data
+WORDSPLIT_RE = re.compile('\W+')
+TAGSPLIT_RE = re.compile('<.*?>')
+
+def striptags(text):
+ """
+ Remove HTML/XML tags from text, inserting spaces in their place:
+
+ >>> striptags('<h1>title</h1>')
+ ' title '
+ >>> striptags('plain text')
+ 'plain text'
+ >>> striptags(u'word<br>break')
+ u'word break'
+ """
+ return TAGSPLIT_RE.sub(' ', text)
+
+
+def getwords(text):
+ """
+ Get words in text by splitting text along punctuation
+ and stripping out the punctuation:
+
+ >>> getwords('this is some text.')
+ ['this', 'is', 'some', 'text']
+ >>> getwords('and/or')
+ ['and', 'or']
+ >>> getwords('one||two')
+ ['one', 'two']
+ >>> getwords("does not is doesn't")
+ ['does', 'not', 'is', 'doesn', 't']
+ >>> getwords(u'hola unicode!')
+ [u'hola', u'unicode']
+ """
+ result = WORDSPLIT_RE.split(text)
+ # Blank tokens will only be at beginning or end of text.
+ if result[0] == '':
+ result.pop(0)
+ if result and result[-1] == '':
+ result.pop(-1)
+ return result
+
+
+def get_word_bag(text):
+ """
+ Return a string containing all unique words in the given text, in alphabetical order.
+
+ >>> get_word_bag("This is a piece\tof text with this extra bit!")
+ 'a bit extra is of piece text this with'
+ """
+ words = list(set(simplify_text(striptags(text)).split(' ')))
+ words.sort()
+ return " ".join(words)
+
+
if __name__ == '__main__':
import doctest
doctest.testmod()
View
104 views.py
@@ -6,6 +6,7 @@
from datetime import date, datetime, timedelta
from urllib import quote, quote_plus
from pytz import utc, timezone
+from difflib import SequenceMatcher
from flask import (render_template, redirect, url_for, request, session, abort,
flash, g, Response, Markup, escape, jsonify)
from flaskext.mail import Mail, Message
@@ -16,7 +17,7 @@
from models import db, POSTSTATUS, JobPost, JobType, JobCategory, JobPostReport, ReportCode, unique_hash, agelimit
import forms
from uploads import uploaded_logos, process_image
-from utils import sanitize_html, scrubemail, md5sum, get_email_domain
+from utils import sanitize_html, scrubemail, md5sum, get_email_domain, get_word_bag
from search import do_search
mail = Mail()
@@ -273,7 +274,7 @@ def jobdetail(hashid):
db.session.add(report)
db.session.commit()
if request.is_xhr:
- return "<p>Thanks! This job listing has been flagged for review.</p>" #Ugh!
+ return "<p>Thanks! This job listing has been flagged for review.</p>" #FIXME: Ugh!
else:
flash("Thanks! This job listing has been flagged for review.", "interactive")
elif request.method == 'POST' and request.is_xhr:
@@ -375,48 +376,71 @@ def editjob(hashid, key, form=None, post=None, validated=False):
form.job_type.choices = [(ob.id, ob.title) for ob in JobType.query.filter_by(public=True).order_by('seq')]
form.job_category.choices = [(ob.id, ob.title) for ob in JobCategory.query.filter_by(public=True).order_by('seq')]
if post is None:
- post = JobPost.query.filter_by(hashid=hashid).first()
- if post is None:
- abort(404)
+ post = JobPost.query.filter_by(hashid=hashid).first_or_404()
if key != post.edit_key:
abort(403)
- #if request.method == 'POST' and post.status != POSTSTATUS.DRAFT:
- # form.poster_email.data = post.email
+ # Don't allow email address to be changed once its confirmed
+ if request.method == 'POST' and post.status >= POSTSTATUS.CONFIRMED:
+ form.poster_email.data = post.email
if request.method == 'POST' and (validated or form.validate()):
- post.headline = form.job_headline.data
- post.type_id = form.job_type.data
- post.category_id = form.job_category.data
- post.location = form.job_location.data
- post.relocation_assist = form.job_relocation_assist.data
- post.description = sanitize_html(form.job_description.data)
- post.perks = sanitize_html(form.job_perks_description.data) if form.job_perks.data else ''
- post.how_to_apply = form.job_how_to_apply.data
- post.company_name = form.company_name.data
- post.company_url = form.company_url.data
- post.email = form.poster_email.data
- post.email_domain = get_email_domain(post.email)
- post.md5sum = md5sum(post.email)
-
- # TODO: Provide option of replacing logo or leaving it alone
- if request.files['company_logo']:
- thumbnail = g.company_logo
- #if 'company_logo' in g:
- # # The validator saved a copy of the processed logo
- # thumbnail = g['company_logo']
- #else:
- # thumbnail = process_image(request.files['company_logo'])
- logofilename = uploaded_logos.save(thumbnail, name='%s.' % post.hashid)
- post.company_logo = logofilename
+ form_description = sanitize_html(form.job_description.data)
+ form_perks = sanitize_html(form.job_perks_description.data) if form.job_perks.data else ''
+ form_how_to_apply = form.job_how_to_apply.data
+ form_email_domain = get_email_domain(form.poster_email.data)
+ form_words = get_word_bag(u' '.join((form_description, form_perks, form_how_to_apply)))
+
+ similar = False
+ for oldpost in JobPost.query.filter(JobPost.email_domain == form_email_domain).filter(
+ JobPost.status > POSTSTATUS.PENDING).filter(
+ JobPost.datetime > datetime.utcnow() - agelimit).all():
+ if oldpost.id != post.id:
+ if oldpost.words:
+ s = SequenceMatcher(None, form_words, oldpost.words)
+ if s.ratio() > 0.6:
@jace Owner
jace added a note

This is where all the action happens.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
+ similar = True
+ break
+
+ if similar:
+ flash("This listing is very similar to an earlier listing. You may not relist the same job "
+ "in less than %d days. If you believe this to be an error, please email us at %s." % (agelimit.days,
+ app.config['ADMINS'][0]), category='interactive')
else:
- if form.company_logo_remove.data:
- post.company_logo = None
+ post.headline = form.job_headline.data
+ post.type_id = form.job_type.data
+ post.category_id = form.job_category.data
+ post.location = form.job_location.data
+ post.relocation_assist = form.job_relocation_assist.data
+ post.description = form_description
+ post.perks = form_perks
+ post.how_to_apply = form_how_to_apply
+ post.company_name = form.company_name.data
+ post.company_url = form.company_url.data
+ post.email = form.poster_email.data
+ post.email_domain = form_email_domain
+ post.md5sum = md5sum(post.email)
+ # To protect from gaming, don't allow words to be removed in edited listings once the post
+ # has been confirmed. Just add the new words.
+ if post.status >= POSTSTATUS.CONFIRMED:
+ prev_words = post.words or ''
+ else:
+ prev_words = u''
+ post.words = get_word_bag(u' '.join((prev_words, form_description, form_perks, form_how_to_apply)))
+
+ if request.files['company_logo']:
+ # The form's validator saved the processed logo in g.company_logo.
+ thumbnail = g.company_logo
+ logofilename = uploaded_logos.save(thumbnail, name='%s.' % post.hashid)
+ post.company_logo = logofilename
+ else:
+ if form.company_logo_remove.data:
+ post.company_logo = None
- db.session.commit()
- userkeys = session.get('userkeys', [])
- userkeys.append(post.edit_key)
- session['userkeys'] = userkeys
- session.permanent = True
- return redirect(url_for('jobdetail', hashid=post.hashid), code=303)
+ db.session.commit()
+ userkeys = session.get('userkeys', [])
+ userkeys.append(post.edit_key)
+ session['userkeys'] = userkeys
+ session.permanent = True
+ return redirect(url_for('jobdetail', hashid=post.hashid), code=303)
elif request.method == 'POST':
flash("Please correct the indicated errors", category='interactive')
elif request.method == 'GET':
@@ -434,7 +458,7 @@ def editjob(hashid, key, form=None, post=None, validated=False):
form.company_url.data = post.company_url
form.poster_email.data = post.email
- return render_template('postjob.html', form=form)#, no_email=post.status != POSTSTATUS.DRAFT)
+ return render_template('postjob.html', form=form, no_email=post.status > POSTSTATUS.DRAFT)
@app.route('/new', methods=('GET', 'POST'))

1 comment on commit 09cd6ff

@jace
Owner

Yeah, still working on it. We've now got a solid use case history between a HTML editor in the job board and a Markdown editor in Funnel, and I still can't decide which approach is better.

@jace
Owner

This is where all the action happens.

@ghoseb

Looks pretty good :-)

Please sign in to comment.
Something went wrong with that request. Please try again.