In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [4]:
# https://www.kaggle.com/datasets/benhamner/nips-2015-papers

In [5]:
data = pd.read_csv('Papers.csv')
data.columns

Index(['Id', 'Title', 'EventType', 'PdfName', 'Abstract', 'PaperText'], dtype='object')

In [7]:
paper_text = data['PaperText']
paper_1 = paper_text.iloc[0]
paper_1

'Double or Nothing: Multiplicative\nIncentive Mechanisms for Crowdsourcing\nNihar B. Shah\nUniversity of California, Berkeley\nnihar@eecs.berkeley.edu\n\nDengyong Zhou\nMicrosoft Research\ndengyong.zhou@microsoft.com\n\nAbstract\nCrowdsourcing has gained immense popularity in machine learning applications\nfor obtaining large amounts of labeled data. Crowdsourcing is cheap and fast, but\nsuffers from the problem of low-quality data. To address this fundamental challenge in crowdsourcing, we propose a simple payment mechanism to incentivize\nworkers to answer only the questions that they are sure of and skip the rest. We\nshow that surprisingly, under a mild and natural “no-free-lunch” requirement, this\nmechanism is the one and only incentive-compatible payment mechanism possible. We also show that among all possible incentive-compatible mechanisms\n(that may or may not satisfy no-free-lunch), our mechanism makes the smallest possible payment to spammers. Interestingly, this unique mec

In [8]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [9]:
stopwords = list(STOP_WORDS)
nlp = spacy.load('en_core_web_sm')
doc = nlp(paper_1)

In [10]:
tokens = [token.text for token in doc]
print(tokens)

['Double', 'or', 'Nothing', ':', 'Multiplicative', '\n', 'Incentive', 'Mechanisms', 'for', 'Crowdsourcing', '\n', 'Nihar', 'B.', 'Shah', '\n', 'University', 'of', 'California', ',', 'Berkeley', '\n', 'nihar@eecs.berkeley.edu', '\n\n', 'Dengyong', 'Zhou', '\n', 'Microsoft', 'Research', '\n', 'dengyong.zhou@microsoft.com', '\n\n', 'Abstract', '\n', 'Crowdsourcing', 'has', 'gained', 'immense', 'popularity', 'in', 'machine', 'learning', 'applications', '\n', 'for', 'obtaining', 'large', 'amounts', 'of', 'labeled', 'data', '.', 'Crowdsourcing', 'is', 'cheap', 'and', 'fast', ',', 'but', '\n', 'suffers', 'from', 'the', 'problem', 'of', 'low', '-', 'quality', 'data', '.', 'To', 'address', 'this', 'fundamental', 'challenge', 'in', 'crowdsourcing', ',', 'we', 'propose', 'a', 'simple', 'payment', 'mechanism', 'to', 'incentivize', '\n', 'workers', 'to', 'answer', 'only', 'the', 'questions', 'that', 'they', 'are', 'sure', 'of', 'and', 'skip', 'the', 'rest', '.', 'We', '\n', 'show', 'that', 'surpris

In [11]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [12]:
punctuation = punctuation + '\n' 
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1
max_frequency = max(word_frequencies.values())
print("The most frequent word appeared {} times.".format(max_frequency))
# from collections import Counter
# word_frequencies = Counter(doc.text)

The most frequent word appeared 88 times.


In [13]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

In [15]:
sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)

[Double or Nothing: Multiplicative
Incentive Mechanisms for Crowdsourcing
Nihar B. Shah
University of California, Berkeley
nihar@eecs.berkeley.edu

Dengyong Zhou
Microsoft Research
dengyong.zhou@microsoft.com

Abstract
Crowdsourcing has gained immense popularity in machine learning applications
for obtaining large amounts of labeled data., Crowdsourcing is cheap and fast, but
suffers from the problem of low-quality data., To address this fundamental challenge in crowdsourcing, we propose a simple payment mechanism to incentivize
workers to answer only the questions that they are sure of and skip the rest., We
show that surprisingly, under a mild and natural “no-free-lunch” requirement, this
mechanism is the one and only incentive-compatible payment mechanism possible., We also show that among all possible incentive-compatible mechanisms
(that may or may not satisfy no-free-lunch), our mechanism makes the smallest possible payment to spammers., Interestingly, this unique mechanism takes

In [17]:
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]
sentence_scores

{Double or Nothing: Multiplicative
 Incentive Mechanisms for Crowdsourcing
 Nihar B. Shah
 University of California, Berkeley
 nihar@eecs.berkeley.edu
 
 Dengyong Zhou
 Microsoft Research
 dengyong.zhou@microsoft.com
 
 Abstract
 Crowdsourcing has gained immense popularity in machine learning applications
 for obtaining large amounts of labeled data.: 3.829545454545455,
 Crowdsourcing is cheap and fast, but
 suffers from the problem of low-quality data.: 0.6363636363636364,
 To address this fundamental challenge in crowdsourcing, we propose a simple payment mechanism to incentivize
 workers to answer only the questions that they are sure of and skip the rest.: 3.375000000000001,
 We
 show that surprisingly, under a mild and natural “no-free-lunch” requirement, this
 mechanism is the one and only incentive-compatible payment mechanism possible.: 3.8068181818181817,
 We also show that among all possible incentive-compatible mechanisms
 (that may or may not satisfy no-free-lunch), our mec

In [22]:
from heapq import nlargest

In [23]:
select_length = int(len(sentence_tokens)*0.2)
select_length

59

In [25]:
summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)
summary

[3
 
 
 We will call any payment function f as an incentive-compatible mechanism if the expected payment
 of the worker under this payment function is strictly maximized when the worker responds in the
 manner desired.2
 
 3
 
 Main results: Incentive-compatible mechanism and guarantees
 
 In this section, we present the main results of the paper, namely, the design of incentive-compatible
 mechanisms with practically useful properties.,
 Here, “0” denotes that the worker skipped the question, “ 1” denotes that
 the worker attempted to answer the question and that answer was incorrect, and “+1” denotes that
 the worker attempted to answer the question and that answer was correct.,
 A natural question
 that arises is: can we design an alternative mechanism satisfying incentive compatibility and nofree-lunch that operates somewhere in between?
 3.2
 
 Uniqueness of the Mechanism
 
 In the previous section we showed that our proposed multiplicative mechanism is incentive compatible and sa

In [26]:
final_summary = [word.text for word in summary]
summary = ' '.join(final_summary)
print(summary)

3

We will call any payment function f as an incentive-compatible mechanism if the expected payment
of the worker under this payment function is strictly maximized when the worker responds in the
manner desired.2

3

Main results: Incentive-compatible mechanism and guarantees

In this section, we present the main results of the paper, namely, the design of incentive-compatible
mechanisms with practically useful properties. Here, “0” denotes that the worker skipped the question, “ 1” denotes that
the worker attempted to answer the question and that answer was incorrect, and “+1” denotes that
the worker attempted to answer the question and that answer was correct. A natural question
that arises is: can we design an alternative mechanism satisfying incentive compatibility and nofree-lunch that operates somewhere in between?
3.2

Uniqueness of the Mechanism

In the previous section we showed that our proposed multiplicative mechanism is incentive compatible and satisfies the intuitive req