# Discovery and Representation of Open Making Related Terms

This notebook sketches the initial exercise on discovering the open making related keywords. The input text is harvested via a Web crawler that identifies and crawls semantically related wikipedia articles.   

In [1]:
from utils import tokenizer
import nltk
from nltk import FreqDist
from math import log
import json, csv

## 1. Loading a reference English language corpus

In [2]:
from nltk.corpus import brown
brown.categories()

['adventure',
 'belles_lettres',
 'editorial',
 'fiction',
 'government',
 'hobbies',
 'humor',
 'learned',
 'lore',
 'mystery',
 'news',
 'religion',
 'reviews',
 'romance',
 'science_fiction']

## 2. Stop words

### 2.1 Standard stop words

In [3]:
with open("data/stopwords_standard.txt", "r") as f:
    STOP_WORDS_STANDARD = set(f.read().strip().split("\n"))
print(STOP_WORDS_STANDARD)

{'more', 'being', 'get', "it's", 'am', 'does', "wasn't", "i've", 'in', 'after', "doesn't", 'ourselves', 'they', 'any', 'an', 'myself', 'then', 'than', 'when', "who's", 'whom', "wouldn't", 'by', "don't", 'up', 'such', 'has', "didn't", 'to', 'their', 'would', 'while', "couldn't", "i'll", 'only', "shan't", 'i', 'him', 'but', 'can', 'further', "here's", 'no', 'this', 'not', 'doing', 'on', 'them', "haven't", 'how', "let's", 'is', 'from', 'the', 'as', 'each', 'had', 'there', 'these', 'just', 'should', 'during', "won't", 'ours ', 'same', "can't", 'are', "they're", 'until', 'herself', 'and', "we're", "hasn't", 'most', 'like', 'me', 'his', 'nor', 'all', "where's", 'what', "he's", "there's", 'which', 'above', "shouldn't", 'few', 'once', "they'd", 'r', 'she', 'here', 'were', 'with', "when's", 'do', 'over', 'who', 'yourself', 'own', 'he', "she'd", "you've", "she'll", 'between', 'com', "they've", "why's", "i'm", 'if', 'ought', 'too', 'www', "you're", "we've", 'did', "weren't", 'was', "they'll", "yo

### 2.2 Open-making related stop words

In [4]:
with open("data/stopwords_openmaker.txt", "r") as f:
    STOP_WORDS_OPENMAKER = set(f.read().strip().split("\n"))
print(STOP_WORDS_OPENMAKER)

{'may', 'almost'}


## 3. Removing stop words from the reference English corpus

In [5]:
# merging the two list together
STOP_WORDS = STOP_WORDS_STANDARD.union(STOP_WORDS_OPENMAKER)
print(STOP_WORDS)

{'more', 'being', 'get', "it's", "wasn't", "i've", 'after', "doesn't", 'myself', 'then', 'than', 'by', "don't", 'up', 'has', 'to', 'would', 'while', "couldn't", 'only', "shan't", 'can', 'no', 'them', "haven't", 'is', 'from', 'as', 'just', 'should', 'same', "can't", 'are', 'herself', 'and', "we're", "hasn't", 'his', 'nor', 'all', "where's", "he's", 'which', 'above', "shouldn't", 'few', 'once', "they'd", 'r', 'she', 'were', 'almost', 'with', 'who', 'own', 'between', "they've", "i'm", 'if', 'ought', 'too', "you're", "weren't", 'was', "they'll", "you'd", 'under', "you'll", 'other', 'it', 'very', 'that', "aren't", 'why', 'where', 'themselves', "we'd", 'know', 'because', "he'll", 'yourselves', 'at', 'http', "she's", 'itself', 'again', 'about', 'cannot', 'may', "mustn't", "how's", 'for', 'through', 'himself', 'am', 'does', 'in', 'ourselves', 'they', 'any', 'an', 'when', "who's", 'whom', "wouldn't", 'such', "didn't", 'their', "i'll", 'i', 'him', 'but', 'further', "here's", 'this', 'not', 'doin

In [6]:
# load english words from the Brown corpus removing stop words.
english_freq_dist = FreqDist([w.lower() for w in nltk.corpus.brown.words()
                              if w not in STOP_WORDS])

## 4. Removing the rare words.

Below we remove rare words and get total count. The code below keeps all words with a occurance frequency above 2. 

In [7]:
english_freq_dist = {k:v for k,v in english_freq_dist.items() if v > 2}

## 5. Loading the input Open Maker corpus

In [8]:
# load the harvested text from wikipedia.
with open("data/wikipedia.json", "r") as f: OM_Corpus_text = f.read()
OM_Corpus = json.loads(OM_Corpus_text)

In [9]:
# The toatl number of wiki articles used:
print(len(OM_Corpus))

152


In [10]:
# Column names of the the corpus.
OM_Corpus[0].keys()

dict_keys(['theme.id', 'title', 'url', 'depth', 'text'])

In [11]:
def display_pages(tid):
    meme = [page for page in OM_Corpus if page['theme.id'] == tid]
    for m in meme:
        print(m['depth'],m['title'], m['url'])

In [12]:
display_pages(0)

0 Do it yourself https://en.wikipedia.org/wiki/Do_it_yourself
1 Edupunk https://en.wikipedia.org/wiki/Edupunk
1 Prosumer https://en.wikipedia.org/wiki/Prosumer
1 How-to https://en.wikipedia.org/wiki/How-to
1 Kludge https://en.wikipedia.org/wiki/Kludge
1 Bricolage https://en.wikipedia.org/wiki/Bricolage
1 Junk box https://en.wikipedia.org/wiki/Junk_box
1 Number 8 wire https://en.wikipedia.org/wiki/Number_8_wire
1 Ready-to-assemble furniture https://en.wikipedia.org/wiki/Ready-to-assemble_furniture
1 Open design https://en.wikipedia.org/wiki/Open_Design
1 Hackerspace https://en.wikipedia.org/wiki/Hackerspace
1 Instructables https://en.wikipedia.org/wiki/Instructables
1 Handyman https://en.wikipedia.org/wiki/Handyman
1 Circuit bending https://en.wikipedia.org/wiki/Circuit_bending
1 Project GreenWorld International https://en.wikipedia.org/wiki/Project_GreenOman
1 3D printing https://en.wikipedia.org/wiki/3D_printing


In [13]:
display_pages(1)

0 Open design https://en.wikipedia.org/wiki/Open_design
1 Knowledge commons https://en.wikipedia.org/wiki/Knowledge_commons
1 Open Source Ecology https://en.wikipedia.org/wiki/Open_Source_Ecology
1 Computer-aided design https://en.wikipedia.org/wiki/Computer-aided_design
1 Open Source Initiative https://en.wikipedia.org/wiki/Open_Source_Initiative
1 Open Architecture Network https://en.wikipedia.org/wiki/Open_Architecture_Network
1 Open-source architecture https://en.wikipedia.org/wiki/Open-source_architecture
1 Commons-based peer production https://en.wikipedia.org/wiki/Commons-based_peer_production
1 Open standard https://en.wikipedia.org/wiki/Open_standard
1 OpenCores https://en.wikipedia.org/wiki/OpenCores
1 Co-creation https://en.wikipedia.org/wiki/Co-creation
1 OpenBTS https://en.wikipedia.org/wiki/OpenBTS
1 Open manufacturing https://en.wikipedia.org/wiki/Open_manufacturing
1 Open-source hardware https://en.wikipedia.org/wiki/Open-source_hardware
1 Open source appropriate techno

In [14]:
display_pages(2)

0 Sustainability https://en.wikipedia.org/wiki/Sustainability
1 Sustainability standards and certification https://en.wikipedia.org/wiki/Sustainability_standards_and_certification
1 Appropriate technology https://en.wikipedia.org/wiki/Appropriate_technology
1 Sustainable development https://en.wikipedia.org/wiki/Sustainable_development
1 Environmental issue https://en.wikipedia.org/wiki/Environmental_issue
1 World Cities Summit https://en.wikipedia.org/wiki/World_Cities_Summit
1 Ecopsychology https://en.wikipedia.org/wiki/Ecopsychology
1 Book:Sustainability https://en.wikipedia.org/wiki/Book:Sustainability
1 Sustainable design https://en.wikipedia.org/wiki/Sustainable_design
1 Circles of Sustainability https://en.wikipedia.org/wiki/Circles_of_Sustainability
1 Sustainability science https://en.wikipedia.org/wiki/Sustainability_science
1 Sustainable living https://en.wikipedia.org/wiki/Sustainable_living
1 Index of sustainability articles https://en.wikipedia.org/wiki/List_of_sustainabil

In [15]:
display_pages(3)

0 Maker culture https://en.wikipedia.org/wiki/Maker_culture
1 Modular design https://en.wikipedia.org/wiki/Modular_design
1 Open-source car https://en.wikipedia.org/wiki/Open-source_car
1 Electric vehicle conversion https://en.wikipedia.org/wiki/Electric_vehicle_conversion
1 Thingiverse https://en.wikipedia.org/wiki/Thingiverse
1 Fab lab https://en.wikipedia.org/wiki/Fab_Lab_(fabrication_laboratory)
1 SparkFun Electronics https://en.wikipedia.org/wiki/SparkFun
1 RepRap project https://en.wikipedia.org/wiki/RepRap
1 Distributed manufacturing https://en.wikipedia.org/wiki/Distributed_manufacturing
1 Craft production https://en.wikipedia.org/wiki/Craft_production
1 Autonomous building https://en.wikipedia.org/wiki/Autonomous_building
1 Open-source hardware https://en.wikipedia.org/wiki/Open_source_hardware
1 Kit car https://en.wikipedia.org/wiki/Kit_car


In [16]:
display_pages(4)

0 Innovation https://en.wikipedia.org/wiki/Innovation
1 Competitive intelligence https://en.wikipedia.org/wiki/Creative_competitive_intelligence
1 Multiple discovery https://en.wikipedia.org/wiki/Multiple_discovery
1 UNDP Innovation Facility https://en.wikipedia.org/wiki/UNDP_Innovation_Facility
1 Open Innovations (event) https://en.wikipedia.org/wiki/Open_Innovations_(Forum_and_Technology_Show)
1 Trans-cultural diffusion https://en.wikipedia.org/wiki/Diffusion_(anthropology)
1 Individual capital https://en.wikipedia.org/wiki/Individual_capital
1 Innovation system https://en.wikipedia.org/wiki/Innovation_system
1 Public domain https://en.wikipedia.org/wiki/Public_domain
1 Ingenuity https://en.wikipedia.org/wiki/Ingenuity
1 Sustainable Development Goals https://en.wikipedia.org/wiki/Sustainable_Development_Goals
1 Participatory design https://en.wikipedia.org/wiki/Participatory_design
1 Innovation management https://en.wikipedia.org/wiki/Innovation_management
1 Information revolution ht

In [17]:
display_pages(5)

0 Collaboration https://en.wikipedia.org/wiki/Collaboration
1 Wikinomics https://en.wikipedia.org/wiki/Wikinomics
1 Collaborative editing https://en.wikipedia.org/wiki/Collaborative_editing
1 Telepresence https://en.wikipedia.org/wiki/Telepresence
1 Knowledge management https://en.wikipedia.org/wiki/Knowledge_management
1 The Culture of Collaboration https://en.wikipedia.org/wiki/The_Culture_of_Collaboration
1 Collaborative governance https://en.wikipedia.org/wiki/Collaborative_governance
1 Community film https://en.wikipedia.org/wiki/Community_film
1 Collaborative innovation network https://en.wikipedia.org/wiki/Collaborative_innovation_network
1 Design thinking https://en.wikipedia.org/wiki/Design_thinking
1 Role-based collaboration https://en.wikipedia.org/wiki/Role-based_collaboration
1 Intranet portal https://en.wikipedia.org/wiki/Intranet_portal
1 Critical thinking https://en.wikipedia.org/wiki/Critical_thinking
1 Facilitation (business) https://en.wikipedia.org/wiki/Facilitation

## 6. Analyzing a specific corpus based on a theme

In [18]:
# Note that theme.id: 0 corresponds to the the Do IT YOURSELF
input_text = " ".join([page['text'] for page in OM_Corpus if page['theme.id'] == 0])

In [38]:
# Tokenizing the input text:
tokenized = tokenizer.tokenize_words(input_text)
number_of_words = len(tokenized)
print(number_of_words),OM_Corpus[0]['title']

30073


(None, 'Do it yourself')

### 6.1 Computing frequency distributions of each token, i.e word, term, pancuation, etc.

In [20]:
input_freq_dist = FreqDist(tokenized)

In [21]:
input_freq_dist.most_common(20)

[('\n', 3787),
 ('the', 1257),
 ('and', 776),
 ('of', 771),
 ('a', 661),
 ('to', 642),
 ('in', 563),
 ('"', 429),
 ('is', 303),
 ('as', 276),
 ('for', 257),
 ('that', 224),
 ('or', 206),
 ('by', 186),
 ('with', 182),
 ('on', 156),
 ('are', 151),
 ('3d', 142),
 ('from', 129),
 ('it', 119)]

### 6.2 Removing punctuation and stopwords from the input corpus

In [22]:
for stopword in STOP_WORDS:
    if stopword in input_freq_dist:
        del input_freq_dist[stopword]
        
for punctuation in tokenizer.CHARACTERS_TO_SPLIT:
    if punctuation in input_freq_dist:
        del input_freq_dist[punctuation]

# Re-control most common words after cleaning:
input_freq_dist.most_common(20)

[('3d', 142),
 ('printing', 94),
 ('design', 75),
 ('used', 72),
 ('open', 65),
 ('also', 63),
 ('one', 58),
 ('new', 56),
 ('many', 55),
 ('kludge', 55),
 ('term', 53),
 ('diy', 52),
 ('manufacturing', 51),
 ('use', 50),
 ('project', 49),
 ('bricolage', 46),
 ('often', 45),
 ('work', 45),
 ('hackerspaces', 44),
 ('handyman', 43)]

### 6.3 Removing rare words from input distribution

In [23]:
input_freq_dist = {k:v for k,v in input_freq_dist.items() if v > 1}

## 7. Comparing input vs English corpus volumes

### 7.1 Total words (after cleaning) 

In [24]:
n_input = sum(input_freq_dist.values())
n_english = sum(english_freq_dist.values())
n_input, n_english

(12914, 685422)

### 7.2 Unique words (after cleaning)

In [25]:
n_unique_word_input = len(input_freq_dist.items())
n_unique_word_brown = len(english_freq_dist.items())
n_unique_word_input, n_unique_word_brown

(2386, 20591)

### 7.3 Cleaned set of input words/terms

List of words in the corpus in case, for a visual inspection. Such inspections will be used both to improve tokenization as well as filetring.

In [26]:
input_freq_dist

{'uses': 23,
 'see': 29,
 'disambiguation': 5,
 'diy': 52,
 'redirects': 3,
 'article': 21,
 'multiple': 13,
 'issues': 10,
 'please': 9,
 'help': 20,
 'improve': 15,
 'discuss': 4,
 'page': 2,
 'learn': 14,
 'remove': 11,
 'template': 10,
 'possibly': 4,
 'contains': 3,
 'original': 11,
 'research': 17,
 'verifying': 2,
 'claims': 5,
 'made': 19,
 'adding': 11,
 'inline': 2,
 'citations': 7,
 'statements': 2,
 'consisting': 3,
 'removed': 8,
 'november': 3,
 'message': 10,
 'needs': 8,
 'additional': 5,
 'better': 6,
 'verification': 3,
 'reliable': 3,
 'sources': 12,
 'unsourced': 3,
 'material': 27,
 'challenged': 3,
 'september': 2,
 'part': 23,
 'series': 11,
 'individualism': 4,
 'topics': 4,
 'concepts': 2,
 'autonomy': 2,
 'free': 29,
 'love': 6,
 'freethought': 2,
 'human': 12,
 'rights': 8,
 'individual': 13,
 'reclamation': 2,
 'liberty': 4,
 'negative': 3,
 'personal': 7,
 'property': 12,
 'positive': 4,
 'private': 4,
 'self-ownership': 2,
 'mile': 2,
 'armand': 2,
 'alber

### 7.4 Set of terms/words that occure in both corpus.

In [27]:
common_words = [w for w in input_freq_dist.keys() & english_freq_dist.keys()]
print(len(common_words))

1945


In [28]:
for w in common_words: print(w)

print
marketplace
covered
appeared
notable
manuals
consumer
british
thing
concept
controlled
minutes
interest
resource
evolved
paint
french
spurred
diminutive
phrase
contact
especially
chip
catalog
hardware
layer
employment
acquisition
do-it-yourself
knowledge
easy
translation
message
works
damage
topic
done
corner
students
architectural
advertise
opened
sometimes
rigorous
asking
computers
assessment
materials
corporate
build
estimates
thin
based
tech
drywall
three-dimensional
nature
august
producer
overall
along
drives
biggest
ask
characters
art
dictionary
vehicles
company
solving
technician
explain
apollo
texts
quickly
collage
repeated
murray
cure
asked
model
due
danish
flat
tips
majority
refer
will
wanting
filed
meetings
children's
women
studies
presses
leave
leading
thinking
need
motors
sex
grow
price
operators
types
tangible
either
operating
france
industry
november
firms
volunteer
consistent
solve
manner
items
launch
variety
seems
local
reason
feelings
schools
definitions
act
aut

### 7.5 Set of terms/words that occure in the sample but not in the reference corpus.

TO BE EXAMINED: This specific set needs to be incorporated. In fact, it may capture specifity of the content to a great extend. We need to assign a mapping score for each words in this set.

In [29]:
input_specifics = dict()
for w in input_freq_dist.keys() - english_freq_dist.keys():
    input_specifics[w] = input_freq_dist[w]
    print(w)

hridith
edupunk
automated
inventora
regimes
uk
lvi-strauss
handymen
cognitive
marxism
ccc
photopolymerization
websites
precursors
matured
kuntz
incorporating
verifying
commercialization
grid
prosumers
uv
libertarian
armand
repairing
downes
implants
bavatuesdays
left-wing
anarcho-capitalism
freethought
manufactures
nepal
improvisation
neuroscience
synthesizers
computer-aided
c-base
andor
kit
dpi
'kluge'
cf
bricoleur
threeding
comics
multilingual
high-end
day'
app
bahrain
wikihow
xinchejian
2008-06-16
hometalk
moulding
blurring
abs
redirects
metalworking
hackspace
knockdown
autodesk
toffler's
handyman
actuators
forums
innovators
infrastructure
disambiguation
matrix
copyright
remodeling
youtube
recycled
amf
zine
desktop
szymusiak
ikea
metalab
amateurs
ip
collaborate
commercialized
datamation
logo
trash
fix-up
granholm
top-down
worldwide
informational
robot
welding
2009-02-23
camcorders
recycling
acronym
recycle
sprinkler
self-correcting
dr
manifesto
workspace
hackerspace
mainstream
josiah

In [30]:
print(len(input_specifics))

441


## 8. Stemming (in case needed) 

In [31]:
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
for k,v in input_freq_dist.items():
    stemmed = stemmer.stem(k)
    if stemmed != k: print(k, "->", stemmed)

uses -> use
disambiguation -> disambigu
redirects -> redirect
article -> articl
multiple -> multipl
issues -> issu
please -> pleas
improve -> improv
remove -> remov
template -> templat
possibly -> possibl
contains -> contain
original -> origin
verifying -> verifi
claims -> claim
adding -> ad
inline -> inlin
citations -> citat
statements -> statement
consisting -> consist
removed -> remov
november -> novemb
message -> messag
needs -> need
additional -> addit
verification -> verif
reliable -> reliabl
sources -> sourc
unsourced -> unsourc
material -> materi
challenged -> challeng
september -> septemb
series -> seri
individualism -> individu
topics -> topic
concepts -> concept
autonomy -> autonomi
rights -> right
individual -> individu
reclamation -> reclam
liberty -> liberti
negative -> neg
personal -> person
property -> properti
positive -> posit
private -> privat
lysander -> lysand
henry -> henri
james -> jame
anarchism -> anarch
anarcho-capitalism -> anarcho-capit
liberalism -> liber
f

primary -> primari
'prosumption -> 'prosumpt
architecture -> architectur
decentralization -> decentr
agency -> agenc
recently -> recent
works -> work
worldwide -> worldwid
mentioned -> mention
economy -> economi
despite -> despit
several -> sever
theoretical -> theoret
george -> georg
characteristic -> characterist
prosumers -> prosum
companies -> compani
wages -> wage
areas -> area
industry -> industri
years -> year
choices -> choic
stores -> store
ways -> way
names -> name
giving -> give
recorded -> record
indeed -> inde
generally -> gener
going -> go
choice -> choic
producing -> produc
taking -> take
hobbyists -> hobbyist
today's -> today'
energy -> energi
households -> household
times -> time
surplus -> surplu
national -> nation
distribution -> distribut
consume -> consum
panels -> panel
generating -> gener
electricity -> electr
gas -> ga
innovation -> innov
programme -> programm
leisure -> leisur
pursuits -> pursuit
initial -> initi
combination -> combin
hobbies -> hobbi
rising ->

solving -> solv
trying -> tri
testing -> test
kincheloe -> kinchelo
denote -> denot
employed -> employ
foundation -> foundat
researchers -> research
rigorous -> rigor
provide -> provid
sophisticated -> sophist
understanding -> understand
capable -> capabl
sis -> si
competitive -> competit
advantage -> advantag
tinkering -> tinker
allowing -> allow
turkle -> turkl
workspace -> workspac
productivity -> product
advocates -> advoc
conventional -> convent
variety -> varieti
enables -> enabl
fully -> fulli
tasks -> task
successfully -> success
served -> serv
incorporating -> incorpor
purposes -> purpos
candy -> candi
immediately -> immedi
children's -> children'
weapons -> weapon
operators -> oper
hams -> ham
resistors -> resistor
capacitors -> capacitor
screws -> screw
nuts -> nut
bolts -> bolt
homebrewers -> homebrew
boxes -> box
homebrewing -> homebrew
keeping -> keep
provides -> provid
repairs -> repair
removing -> remov
quantities -> quantiti
treasure -> treasur
tracking -> track
commer

## 9. Computing representation power of common words.

In [32]:
# combine
makerness = {}
for w in common_words:
    # Consider only words whose charcater length is larger than 1
    if len(w) > 1:
        # Log likelihood scores are computed:
        score = log((input_freq_dist[w] / n_input) / (english_freq_dist[w] / n_english))
        makerness[w] = score

In [33]:
# Sorting by scores:
for k,v in sorted(makerness.items(), key=lambda x:x[1], reverse=True): print(k,v)

additive 6.240406254084403
printer 5.86884269765192
printing 5.624645737139878
digital 5.398839068406185
franchise 5.13487352257172
users 5.070335001434149
global 5.070335001434149
citation 5.001342129947197
bending 4.9702515428771665
deposition 4.819020573153243
do-it-yourself 4.760180073130309
manufacturing 4.725494515142419
hardware 4.664869893325984
computers 4.664869893325984
evolutionary 4.664869893325984
junk 4.664869893325984
non-profit 4.664869893325984
homeowners 4.664869893325984
jargon 4.664869893325984
template 4.664869893325984
layer 4.622310278907189
fabrication 4.600331372188413
bug 4.531338500701462
lab 4.48254833653203
individualist 4.48254833653203
zealand 4.48254833653203
coined 4.48254833653203
hack 4.48254833653203
portal 4.48254833653203
media 4.451295793027925
computer 4.451295793027925
commons 4.377187820874203
consumers 4.339447492891357
maker 4.320029407034255
circuit 4.270215701322035
catalog 4.2594047852178205
eric 4.2594047852178205
surname 4.2594047852178

community 1.5738274399676688
owner 1.5738274399676688
market 1.5673549254620511
flat 1.558789562603128
various 1.558789562603128
chinese 1.5558089344649904
adopted 1.55135458411561
contain 1.55135458411561
application 1.5439744768179877
folk 1.5439744768179877
currently 1.5439744768179877
initial 1.5439744768179877
canada 1.5439744768179877
roles 1.5439744768179877
appropriate 1.5439744768179877
anticipated 1.529375677396835
expectations 1.529375677396835
plates 1.529375677396835
don 1.529375677396835
liberty 1.529375677396835
difficulties 1.529375677396835
spencer 1.529375677396835
channels 1.529375677396835
substrate 1.529375677396835
encouraging 1.529375677396835
current 1.5245563909608861
services 1.522155429423348
book 1.5217323280842667
india 1.5207176146537202
working 1.5193922334126517
sometimes 1.5179989904147269
routine 1.5149869399447353
waste 1.5149869399447353
context 1.5149869399447353
stores 1.5149869399447353
educational 1.5149869399447353
movements 1.5078694721758714
p

union 0.15401038680913437
condition 0.15401038680913437
management 0.15401038680913437
words 0.1503540756060241
southern 0.1503540756060241
seem 0.1474386216459001
among 0.13766124880760505
success 0.13227040017272867
questions 0.12869257882484472
color 0.12869257882484472
try 0.12869257882484472
showed 0.12157511105598069
job 0.10888995152866497
private 0.10574364583929986
behavior 0.10052170185814831
thinking 0.09360125901357448
increased 0.08672837972581253
increase 0.08501751532218306
power 0.0828221247587474
bridge 0.07990241465541263
least 0.07990241465541263
day 0.07844575015894845
major 0.0717722885721624
gas 0.06975004319139466
modern 0.06975004319139466
general 0.06370772873543196
earth 0.059699707337893274
likely 0.053055164619224604
events 0.04974937648472516
director 0.04974937648472516
playing 0.04974937648472516
james 0.04974937648472516
standing 0.04974937648472516
story 0.039897080041713544
rise 0.03989708004171333
end 0.03744909840307342
class 0.02529828062056089
fina

In [34]:
with open('makerness.csv', 'w') as csvfile:
    thewriter = csv.writer(csvfile, delimiter=',')
    for k,v in sorted(makerness.items(), key=lambda x:x[1], reverse=True):
        thewriter.writerow([k,v])