# Discovery and Representation of Open Making Related Terms

This notebook sketches the initial exercise on discovering the open making related keywords. The input text is harvested via a Web crawler that identifies and crawls semantically related wikipedia articles.   

In [1]:
from utils import tokenizer
import nltk
from nltk import FreqDist
from math import log
import json, csv

## 1. Loading a reference English language corpus

In [2]:
from nltk.corpus import brown
brown.categories()

['adventure',
 'belles_lettres',
 'editorial',
 'fiction',
 'government',
 'hobbies',
 'humor',
 'learned',
 'lore',
 'mystery',
 'news',
 'religion',
 'reviews',
 'romance',
 'science_fiction']

## 2. Stop words

### 2.1 Standard stop words

In [42]:
with open("data/stopwords_standard.txt", "r") as f:
    STOP_WORDS_STANDARD = set(f.read().strip().split("\n"))
print(STOP_WORDS_STANDARD)

{'more', 'being', 'get', "it's", 'am', 'does', "wasn't", "i've", 'in', 'after', "doesn't", 'ourselves', 'they', 'any', 'an', 'myself', 'then', 'than', 'when', "who's", 'whom', "wouldn't", 'by', "don't", 'up', 'such', 'has', "didn't", 'to', 'their', 'would', 'while', "couldn't", "i'll", 'only', "shan't", 'i', 'him', 'but', 'can', 'further', "here's", 'no', 'this', 'not', 'doing', 'on', 'them', "haven't", 'how', "let's", 'is', 'from', 'the', 'as', 'each', 'had', 'there', 'these', 'just', 'should', 'during', "won't", 'ours ', 'same', "can't", 'are', "they're", 'until', 'herself', 'and', "we're", "hasn't", 'most', 'like', 'me', 'his', 'nor', 'all', "where's", 'what', "he's", "there's", 'which', 'above', "shouldn't", 'few', 'once', "they'd", 'r', 'she', 'here', 'were', 'with', "when's", 'do', 'over', 'who', 'yourself', 'own', 'he', "she'd", "you've", "she'll", 'between', 'com', "they've", "why's", "i'm", 'if', 'ought', 'too', 'www', "you're", "we've", 'did', "weren't", 'was', "they'll", "yo

### 2.2 Open-making related stop words

In [43]:
with open("data/stopwords_openmaker.txt", "r") as f:
    STOP_WORDS_OPENMAKER = set(f.read().strip().split("\n"))
print(STOP_WORDS_OPENMAKER)

{'one', 'many', 'may', 'often', 'also', 'almost'}


## 3. Removing stop words from the reference English corpus

In [44]:
# merging the two list together
STOP_WORDS = STOP_WORDS_STANDARD.union(STOP_WORDS_OPENMAKER)
print(STOP_WORDS)

{'more', 'being', 'get', "it's", "wasn't", "i've", 'after', "doesn't", 'myself', 'then', 'than', 'by', "don't", 'up', 'has', 'to', 'would', 'while', "couldn't", 'only', "shan't", 'can', 'no', 'them', "haven't", 'is', 'from', 'as', 'just', 'should', 'same', "can't", 'are', 'herself', 'and', "we're", "hasn't", 'his', 'nor', 'all', "where's", "he's", 'which', 'above', "shouldn't", 'few', 'once', "they'd", 'r', 'she', 'were', 'almost', 'one', 'with', 'who', 'own', 'between', "they've", "i'm", 'if', 'ought', 'too', "you're", "weren't", 'was', "they'll", "you'd", 'under', "you'll", 'other', 'it', 'very', 'that', "aren't", 'why', 'where', 'themselves', "we'd", 'know', 'because', "he'll", 'yourselves', 'at', 'http', "she's", 'itself', 'again', 'about', 'cannot', 'may', "mustn't", "how's", 'for', 'through', 'himself', 'am', 'does', 'in', 'ourselves', 'they', 'any', 'an', 'when', "who's", 'whom', "wouldn't", 'such', "didn't", 'their', "i'll", 'i', 'him', 'but', 'further', "here's", 'this', 'not'

In [45]:
# load english words from the Brown corpus removing stop words.
english_freq_dist = FreqDist([w.lower() for w in nltk.corpus.brown.words()
                              if w not in STOP_WORDS])

## 4. Removing the rare words.

Below we remove rare words and get total count. The code below keeps all words with a occurance frequency above 2. 

In [46]:
english_freq_dist = {k:v for k,v in english_freq_dist.items() if v > 2}

## 5. Loading the input Open Maker corpus

In [47]:
# load the harvested text from wikipedia.
with open("data/wikipedia.json", "r") as f: OM_Corpus_text = f.read()
OM_Corpus = json.loads(OM_Corpus_text)

In [48]:
# The toatl number of wiki articles used:
print(len(OM_Corpus))

152


In [49]:
# Column names of the the corpus.
OM_Corpus[0].keys()

dict_keys(['theme.id', 'title', 'url', 'depth', 'text'])

In [50]:
def display_pages(tid):
    meme = [page for page in OM_Corpus if page['theme.id'] == tid]
    for m in meme:
        print(m['depth'],m['title'], m['url'])

In [51]:
display_pages(0)

0 Do it yourself https://en.wikipedia.org/wiki/Do_it_yourself
1 Edupunk https://en.wikipedia.org/wiki/Edupunk
1 Prosumer https://en.wikipedia.org/wiki/Prosumer
1 How-to https://en.wikipedia.org/wiki/How-to
1 Kludge https://en.wikipedia.org/wiki/Kludge
1 Bricolage https://en.wikipedia.org/wiki/Bricolage
1 Junk box https://en.wikipedia.org/wiki/Junk_box
1 Number 8 wire https://en.wikipedia.org/wiki/Number_8_wire
1 Ready-to-assemble furniture https://en.wikipedia.org/wiki/Ready-to-assemble_furniture
1 Open design https://en.wikipedia.org/wiki/Open_Design
1 Hackerspace https://en.wikipedia.org/wiki/Hackerspace
1 Instructables https://en.wikipedia.org/wiki/Instructables
1 Handyman https://en.wikipedia.org/wiki/Handyman
1 Circuit bending https://en.wikipedia.org/wiki/Circuit_bending
1 Project GreenWorld International https://en.wikipedia.org/wiki/Project_GreenOman
1 3D printing https://en.wikipedia.org/wiki/3D_printing


In [52]:
display_pages(1)

0 Open design https://en.wikipedia.org/wiki/Open_design
1 Knowledge commons https://en.wikipedia.org/wiki/Knowledge_commons
1 Open Source Ecology https://en.wikipedia.org/wiki/Open_Source_Ecology
1 Computer-aided design https://en.wikipedia.org/wiki/Computer-aided_design
1 Open Source Initiative https://en.wikipedia.org/wiki/Open_Source_Initiative
1 Open Architecture Network https://en.wikipedia.org/wiki/Open_Architecture_Network
1 Open-source architecture https://en.wikipedia.org/wiki/Open-source_architecture
1 Commons-based peer production https://en.wikipedia.org/wiki/Commons-based_peer_production
1 Open standard https://en.wikipedia.org/wiki/Open_standard
1 OpenCores https://en.wikipedia.org/wiki/OpenCores
1 Co-creation https://en.wikipedia.org/wiki/Co-creation
1 OpenBTS https://en.wikipedia.org/wiki/OpenBTS
1 Open manufacturing https://en.wikipedia.org/wiki/Open_manufacturing
1 Open-source hardware https://en.wikipedia.org/wiki/Open-source_hardware
1 Open source appropriate techno

In [53]:
display_pages(2)

0 Sustainability https://en.wikipedia.org/wiki/Sustainability
1 Sustainability standards and certification https://en.wikipedia.org/wiki/Sustainability_standards_and_certification
1 Appropriate technology https://en.wikipedia.org/wiki/Appropriate_technology
1 Sustainable development https://en.wikipedia.org/wiki/Sustainable_development
1 Environmental issue https://en.wikipedia.org/wiki/Environmental_issue
1 World Cities Summit https://en.wikipedia.org/wiki/World_Cities_Summit
1 Ecopsychology https://en.wikipedia.org/wiki/Ecopsychology
1 Book:Sustainability https://en.wikipedia.org/wiki/Book:Sustainability
1 Sustainable design https://en.wikipedia.org/wiki/Sustainable_design
1 Circles of Sustainability https://en.wikipedia.org/wiki/Circles_of_Sustainability
1 Sustainability science https://en.wikipedia.org/wiki/Sustainability_science
1 Sustainable living https://en.wikipedia.org/wiki/Sustainable_living
1 Index of sustainability articles https://en.wikipedia.org/wiki/List_of_sustainabil

In [54]:
display_pages(3)

0 Maker culture https://en.wikipedia.org/wiki/Maker_culture
1 Modular design https://en.wikipedia.org/wiki/Modular_design
1 Open-source car https://en.wikipedia.org/wiki/Open-source_car
1 Electric vehicle conversion https://en.wikipedia.org/wiki/Electric_vehicle_conversion
1 Thingiverse https://en.wikipedia.org/wiki/Thingiverse
1 Fab lab https://en.wikipedia.org/wiki/Fab_Lab_(fabrication_laboratory)
1 SparkFun Electronics https://en.wikipedia.org/wiki/SparkFun
1 RepRap project https://en.wikipedia.org/wiki/RepRap
1 Distributed manufacturing https://en.wikipedia.org/wiki/Distributed_manufacturing
1 Craft production https://en.wikipedia.org/wiki/Craft_production
1 Autonomous building https://en.wikipedia.org/wiki/Autonomous_building
1 Open-source hardware https://en.wikipedia.org/wiki/Open_source_hardware
1 Kit car https://en.wikipedia.org/wiki/Kit_car


In [55]:
display_pages(4)

0 Innovation https://en.wikipedia.org/wiki/Innovation
1 Competitive intelligence https://en.wikipedia.org/wiki/Creative_competitive_intelligence
1 Multiple discovery https://en.wikipedia.org/wiki/Multiple_discovery
1 UNDP Innovation Facility https://en.wikipedia.org/wiki/UNDP_Innovation_Facility
1 Open Innovations (event) https://en.wikipedia.org/wiki/Open_Innovations_(Forum_and_Technology_Show)
1 Trans-cultural diffusion https://en.wikipedia.org/wiki/Diffusion_(anthropology)
1 Individual capital https://en.wikipedia.org/wiki/Individual_capital
1 Innovation system https://en.wikipedia.org/wiki/Innovation_system
1 Public domain https://en.wikipedia.org/wiki/Public_domain
1 Ingenuity https://en.wikipedia.org/wiki/Ingenuity
1 Sustainable Development Goals https://en.wikipedia.org/wiki/Sustainable_Development_Goals
1 Participatory design https://en.wikipedia.org/wiki/Participatory_design
1 Innovation management https://en.wikipedia.org/wiki/Innovation_management
1 Information revolution ht

In [56]:
display_pages(5)

0 Collaboration https://en.wikipedia.org/wiki/Collaboration
1 Wikinomics https://en.wikipedia.org/wiki/Wikinomics
1 Collaborative editing https://en.wikipedia.org/wiki/Collaborative_editing
1 Telepresence https://en.wikipedia.org/wiki/Telepresence
1 Knowledge management https://en.wikipedia.org/wiki/Knowledge_management
1 The Culture of Collaboration https://en.wikipedia.org/wiki/The_Culture_of_Collaboration
1 Collaborative governance https://en.wikipedia.org/wiki/Collaborative_governance
1 Community film https://en.wikipedia.org/wiki/Community_film
1 Collaborative innovation network https://en.wikipedia.org/wiki/Collaborative_innovation_network
1 Design thinking https://en.wikipedia.org/wiki/Design_thinking
1 Role-based collaboration https://en.wikipedia.org/wiki/Role-based_collaboration
1 Intranet portal https://en.wikipedia.org/wiki/Intranet_portal
1 Critical thinking https://en.wikipedia.org/wiki/Critical_thinking
1 Facilitation (business) https://en.wikipedia.org/wiki/Facilitation

## 6. Analyzing a specific corpus based on a theme

In [57]:
# Note that theme.id: 0 corresponds to the the Do IT YOURSELF
input_text = " ".join([page['text'] for page in OM_Corpus if page['theme.id'] == 0])

In [58]:
# Tokenizing the input text:
tokenized = tokenizer.tokenize_words(input_text)
number_of_words = len(tokenized)
print(number_of_words),OM_Corpus[0]['title']

30073


(None, 'Do it yourself')

### 6.1 Computing frequency distributions of each token, i.e word, term, pancuation, etc.

In [59]:
input_freq_dist = FreqDist(tokenized)

In [60]:
input_freq_dist.most_common(20)

[('\n', 3787),
 ('the', 1257),
 ('and', 776),
 ('of', 771),
 ('a', 661),
 ('to', 642),
 ('in', 563),
 ('"', 429),
 ('is', 303),
 ('as', 276),
 ('for', 257),
 ('that', 224),
 ('or', 206),
 ('by', 186),
 ('with', 182),
 ('on', 156),
 ('are', 151),
 ('3d', 142),
 ('from', 129),
 ('it', 119)]

### 6.2 Removing punctuation and stopwords from the input corpus

In [70]:
for stopword in STOP_WORDS:
    if stopword in input_freq_dist:
        del input_freq_dist[stopword]
        
for punctuation in tokenizer.CHARACTERS_TO_SPLIT:
    if punctuation in input_freq_dist:
        del input_freq_dist[punctuation]

# Re-control most common words after cleaning:
input_freq_dist.most_common(80)

[('3d', 142),
 ('printing', 94),
 ('design', 75),
 ('used', 72),
 ('open', 65),
 ('new', 56),
 ('kludge', 55),
 ('term', 53),
 ('diy', 52),
 ('manufacturing', 51),
 ('use', 50),
 ('project', 49),
 ('bricolage', 46),
 ('work', 45),
 ('hackerspaces', 44),
 ('handyman', 43),
 ('projects', 38),
 ('using', 38),
 ('parts', 37),
 ('music', 35),
 ('furniture', 34),
 ('people', 33),
 ('production', 33),
 ('software', 33),
 ('kluge', 33),
 ('technology', 32),
 ('home', 31),
 ('circuit', 31),
 ('common', 30),
 ('make', 30),
 ('first', 30),
 ('see', 29),
 ('free', 29),
 ('social', 29),
 ('culture', 29),
 ('process', 29),
 ('additive', 29),
 ('material', 27),
 ('example', 27),
 ('world', 27),
 ('printers', 27),
 ('electronic', 26),
 ('materials', 25),
 ('hackerspace', 25),
 ('prosumer', 25),
 ('digital', 25),
 ('well', 25),
 ('processes', 24),
 ('printed', 24),
 ('repair', 24),
 ('metal', 24),
 ('uses', 23),
 ('part', 23),
 ('time', 23),
 ('include', 23),
 ('products', 23),
 ('layer', 23),
 ('build

### 6.3 Removing rare words from input distribution

In [23]:
input_freq_dist = {k:v for k,v in input_freq_dist.items() if v > 1}

## 7. Comparing input vs English corpus volumes

### 7.1 Total words (after cleaning) 

In [24]:
n_input = sum(input_freq_dist.values())
n_english = sum(english_freq_dist.values())
n_input, n_english

(12914, 685422)

### 7.2 Unique words (after cleaning)

In [25]:
n_unique_word_input = len(input_freq_dist.items())
n_unique_word_brown = len(english_freq_dist.items())
n_unique_word_input, n_unique_word_brown

(2386, 20591)

### 7.3 Cleaned set of input words/terms

List of words in the corpus in case, for a visual inspection. Such inspections will be used both to improve tokenization as well as filtering.

In [26]:
input_freq_dist

{'uses': 23,
 'see': 29,
 'disambiguation': 5,
 'diy': 52,
 'redirects': 3,
 'article': 21,
 'multiple': 13,
 'issues': 10,
 'please': 9,
 'help': 20,
 'improve': 15,
 'discuss': 4,
 'page': 2,
 'learn': 14,
 'remove': 11,
 'template': 10,
 'possibly': 4,
 'contains': 3,
 'original': 11,
 'research': 17,
 'verifying': 2,
 'claims': 5,
 'made': 19,
 'adding': 11,
 'inline': 2,
 'citations': 7,
 'statements': 2,
 'consisting': 3,
 'removed': 8,
 'november': 3,
 'message': 10,
 'needs': 8,
 'additional': 5,
 'better': 6,
 'verification': 3,
 'reliable': 3,
 'sources': 12,
 'unsourced': 3,
 'material': 27,
 'challenged': 3,
 'september': 2,
 'part': 23,
 'series': 11,
 'individualism': 4,
 'topics': 4,
 'concepts': 2,
 'autonomy': 2,
 'free': 29,
 'love': 6,
 'freethought': 2,
 'human': 12,
 'rights': 8,
 'individual': 13,
 'reclamation': 2,
 'liberty': 4,
 'negative': 3,
 'personal': 7,
 'property': 12,
 'positive': 4,
 'private': 4,
 'self-ownership': 2,
 'mile': 2,
 'armand': 2,
 'alber

### 7.4 Set of terms/words that occure in both corpus.

In [90]:
common_words = [w for w in input_freq_dist.keys() & english_freq_dist.keys()]
print(len(common_words))

3819


In [91]:
for w in common_words: print(w)

print
marketplace
covered
appeared
notable
commodity
sugar
encounters
manuals
consumer
providing
emitted
court
thing
hoc
clubs
british
wholly
concept
detroit
controlled
minutes
shelter
reductions
inspection
authority
interest
resource
evolved
paint
french
capitalism
disadvantages
spurred
responsibility
pad
shells
demolished
hat
fallen
diminutive
phrase
lenses
contact
dominant
especially
chip
questioned
catalog
doubled
hardware
layer
trivial
painter
officers
employment
thumb
bulb
procedural
acquisition
operation
hurt
knowledge
easy
investment
leaving
educating
do-it-yourself
tops
competitors
message
translation
disappear
works
hub
damage
topic
done
corner
students
architectural
advertise
z
opened
receiving
sometimes
deal
rigorous
lives
asking
computers
p
81
assessment
materials
leaning
corporate
build
estimates
thin
goldberg
attract
stations
payment
based
tom
tech
floor
drywall
three-dimensional
nearby
nature
august
110
purpose
slowed
producer
notably
overall
partner
along
drives
cracke

circuit
industrial
technological
alter
recreation
served
concentrated
patches
synonyms
phone
utilizing
austria
metropolitan
founder
presume
eliminating
deep
obtaining
methodological
complex
producing
review
force
socialization
trend
consisting
liberty
proceeds
homeowners
turned
tony
humans
hull
radical
futile
bank
glossary
nails
capable
reception
violence
learn
protocol
passes
forms
record
instrumental
increased
oscar
farmer's
located
york
companies
estimate
bottom
serves
separately
accordingly
diminishing
pat
household
disability
allowed
existence
porous
result
cerebral
easily
engineers'
montreal
america
apply
tours
relevant
sealing
stars
marshall
raising
sale
experiments
improvement
adoption
earlier
ruled
fire
requests
notes
narrow
relationships
written
largest
maker
architecture
manufacturers
construct
fails
fixture
studs
assemble
derived
limit
attempting
incentives
industries
licensed
similarly
serve
patent
organize
effectiveness
purchased
offset
improvised
scope
compared
artificia

cheap
loads
relations
must
kitchens
generators
article
anthony
4th
jean
shop
problems
essence
u
standardized
funk
2nd
needs
environmental
thomas
camp
hot
cabinets
dates
repair
dick
assess
scots
band
fast
include
allowing
beads
philosopher
blur
vapor
creatures
disorder
broader
identity
printed
jury
greek
challenge
craftsmanship
market
check
successful
checks
suburban
drawn
judge
major
fewer
role
included
finish
toys
endeavors
replacement
dot
baking
quite
ready
believed
hair
george
accompanied
portal
status
keeping
every
fix
newt
friends
hill
predict
christian
resolution
goods
personnel
portion
exclusive
movable
standards
host
direct
cleaning
regulation
refusal
good
greeks
aviation
urban
concurrence
sometime
regional
economics
character
willingness
set
ideology
homemade
around
develop
suggested
praise
rate
change
two
registered
coming
today's
rising
elderly
drafted
pay
whether
dots
lunar
licensing
owner
subsidies
designers
inquiry
purchases
'
shared
returned


### 7.5 Set of terms/words that occure in the sample but not in the reference corpus.

TO BE EXAMINED: This specific set needs to be incorporated. In fact, it may capture specifity of the content to a great extend. We need to assign a mapping score for each words in this set.

In [92]:
input_specifics = dict()
for w in input_freq_dist.keys() - english_freq_dist.keys():
    input_specifics[w] = input_freq_dist[w]
    print(w)

kd
regimes
unmodified
planner
strati
chi-ying
pc
rhyming
nanomaterials
neurological
nagoya
respirators
renewable
oversized
uv
libertarian
ljudmila
electricians
2008-09-22
lockset
'non-commercial'
ultra-thin
70years
georges
cyclopaedia
dvd
text's
mentuhotep
walkthrough
c-base
hooks
veklaunn
slotted
scandinavian
bricoleur
verges
threeding
app
polystylism
cove
professional-level
communization
contexts
2008-06-16
nestor
recyclability
psr-6
audi
'junker'
autodesk
handyman
carly's
adhesives
lowe's
disambiguation
reprappro
978-3-639-89210-9
readwrite
tracheobronchomalacia
greenoman'
orally
tolstoy
artefact
readme
remodeling
self-directed
klu
entrepreneurship
raster
cruft
chronicled
downloads
desktop
abolitionism
wallpapering
slicer
netbook
neuroscientist
life-hacking
amateurs
collaborate
mains
datamation
gutters
trash
fix-up
granholm
subsidiaries
ghalib
remodelers
informational
recycling
standardization
tune-up
femme-fatale
microcasting
barrington
hacker's
manifesto
underutilized
carpenter-pl

In [93]:
print(len(input_specifics))

1995


## 8. Stemming (in case needed) 

In [94]:
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
for k,v in input_freq_dist.items():
    stemmed = stemmer.stem(k)
    if stemmed != k: print(k, "->", stemmed)

uses -> use
disambiguation -> disambigu
redirects -> redirect
article -> articl
multiple -> multipl
issues -> issu
please -> pleas
improve -> improv
remove -> remov
template -> templat
messages -> messag
possibly -> possibl
contains -> contain
original -> origin
verifying -> verifi
claims -> claim
adding -> ad
inline -> inlin
citations -> citat
statements -> statement
consisting -> consist
removed -> remov
november -> novemb
message -> messag
needs -> need
additional -> addit
verification -> verif
reliable -> reliabl
sources -> sourc
unsourced -> unsourc
material -> materi
challenged -> challeng
september -> septemb
series -> seri
individualism -> individu
topics -> topic
concepts -> concept
autonomy -> autonomi
liberties -> liberti
eremitism -> eremit
rights -> right
individual -> individu
reclamation -> reclam
individuation -> individu
laissez-faire -> laissez-fair
libertinism -> libertin
liberty -> liberti
methodological -> methodolog
negative -> neg
personal -> person
property -> p

laurie -> lauri
rocks -> rock
changing -> chang
expectations -> expect
heritage -> heritag
wikieducator -> wikieduc
july -> juli
apersonal -> aperson
reflection -> reflect
editor's -> editor'
feelings -> feel
rewriting -> rewrit
encyclopedic -> encycloped
consumes -> consum
produces -> produc
derived -> deriv
prosumption -> prosumpt
production -> product
coined -> coin
writers -> writer
perspective -> perspect
devices -> devic
digital -> digit
cameras -> camera
definitions -> definit
origins -> origin
general -> gener
meanings -> mean
producer -> produc
non-corporate -> non-corpor
company -> compani
reception -> recept
adoption -> adopt
company's -> company'
blurring -> blur
roles -> role
producers -> produc
cooperative -> cooper
crises -> crise
depression -> depress
1930s -> 1930
enthusiasts -> enthusiast
always -> alway
standards -> standard
complexity -> complex
functionality -> function
refers -> refer
semiprofessional -> semiprofession
well-accepted -> well-accept
category -> cate

resource -> resourc
oligopolistic -> oligopolist
hostile -> hostil
exploiting -> exploit
manipulating -> manipul
spending -> spend
directly -> directli
youre -> your
manufacturer -> manufactur
widgets -> widget
customers -> custom
asks -> ask
important -> import
losing -> lose
seriously -> serious
solve -> solv
specific -> specif
arrangement -> arrang
effects -> effect
immediate -> immedi
meets -> meet
demonstrates -> demonstr
willingness -> willing
satisfied -> satisfi
featureproductservice -> featureproductservic
commons-based -> commons-bas
readwrite -> readwrit
produsage -> produsag
lorimer -> lorim
interviewed -> interview
episode -> episod
december -> decemb
septemberoctober -> septemberoctob
lessons -> lesson
sustainability -> sustain
solutions -> solut
prosumerism -> prosumer
studying -> studi
values -> valu
accomplishment -> accomplish
descriptions -> descript
wiki-based -> wiki-bas
guides -> guid
manuals -> manual
description -> descript
non-experts -> non-expert
leave -> lea

thoughts -> thought
kludgy -> kludgi
molded -> mold
twists -> twist
turns -> turn
precisely -> precis
gary -> gari
marcus -> marcu
compares -> compar
-powered -> -power
wipers -> wiper
accelerated -> acceler
uphill -> uphil
slowed -> slow
stopped -> stop
altogether -> altogeth
vertebrate -> vertebr
eye's -> eye'
installed -> instal
facing -> face
kinds -> kind
gets -> get
wiring -> wire
passes -> pass
leaves -> leav
spots -> spot
genetically -> genet
engineered -> engin
beings -> be
nietzscheans -> nietzschean
disparagingly -> disparagingli
unmodified -> unmodifi
humans -> human
reclusive -> reclus
unprecedented -> unpreced
databases -> databas
adopts -> adopt
charles -> charl
bodging -> bodg
rigging -> rig
macgyver -> macgyv
replacement -> replac
self-referential -> self-referenti
philosophy -> philosophi
academics -> academ
literature -> literatur
cultural -> cultur
studies -> studi
psychology -> psycholog
improvisation -> improvis
bricoler -> bricol
denotes -> denot
endeavors -> end

publicly -> publicli
facilitated -> facilit
performed -> perform
monetary -> monetari
compensation -> compens
identical -> ident
co-creation -> co-creat
external -> extern
stakeholder -> stakehold
directions -> direct
compared -> compar
traced -> trace
aggressive -> aggress
patenting -> patent
o'reilly -> o'reilli
larry -> larri
expression -> express
perens -> peren
realized -> realiz
policies -> polici
vallance -> vallanc
corporation -> corpor
simultaneously -> simultan
closely -> close
lamberts -> lambert
title -> titl
thesis -> thesi
formalized -> formal
farming -> farm
currently -> current
unites -> unit
apply -> appli
funding -> fund
lacking -> lack
countries -> countri
ecological -> ecolog
single -> singl
country -> countri
involve -> involv
mechanism -> mechan
collaborate -> collabor
otherwise -> otherwis
fairly -> fairli
holds -> hold
respects -> respect
suited -> suit
collaborative -> collabor
models -> model
photographs -> photograph
visually -> visual
members -> member
usefu

drywall -> drywal
remodeling -> remodel
carpentry -> carpentri
moulding -> mould
occasionally -> occasion
politicians -> politician
leaders -> leader
substantial -> substanti
organizational -> organiz
overhauling -> overhaul
division -> divis
particle -> particl
hinges -> hing
rounded -> round
edges -> edg
genetic -> genet
inclined -> inclin
constraints -> constraint
reporter -> report
commented -> comment
family's -> family'
petered -> peter
hammering -> hammer
try -> tri
injuries -> injuri
mistakes -> mistak
avoided -> avoid
mortaring -> mortar
seeping -> seep
semi-skilled -> semi-skil
prestigious -> prestigi
occupation -> occup
carpenter -> carpent
chains -> chain
perception -> percept
professionalism -> profession
valued -> valu
neurological -> neurolog
handyman's -> handyman'
relieve -> reliev
pressure -> pressur
injury -> injuri
boy's -> boy'
saved -> save
installing -> instal
steps -> step
estimate -> estim
home-maintenance -> home-mainten
annually -> annual
aging -> age
populat

defense -> defens
medical -> medic
replacements -> replac
turbines -> turbin
indicator -> indic
adopter -> adopt
ties -> tie
robots -> robot
computed -> comput
tomography -> tomographi
sculpture -> sculptur
merankhre -> merankhr
threeding -> threed
sociocultural -> sociocultur
facilitate -> facilit
variants -> variant
prototypes -> prototyp
turning -> turn
grinding -> grind
squeezing -> squeez
foods -> food
candidates -> candid
chocolate -> chocol
crackers -> cracker
3d-printed -> 3d-print
bikinis -> bikini
shoes -> shoe
dresses -> dress
manufacture -> manufactur
football -> footbal
balance -> balanc
athletes -> athlet
styling -> style
lenses -> lens
glasses -> glass
unibody -> unibodi
fuselage -> fuselag
utilizes -> util
urbee -> urbe
mounted -> mount
debuted -> debut
airbus -> airbu
force -> forc
eurofighter -> eurofight
israeli -> isra
aviation -> aviat
revealed -> reveal
helicopter -> helicopt
reducing -> reduc
am's -> am'
firearms -> firearm
dimensions -> dimens
possibilities -> p

## 9. Computing representation power of common words.

In [95]:
# combine
makerness = {}
# common_words = [w[0] for w in common_words]
for w in common_words:
    # Consider only words whose charcater length is larger than 1
    if len(w) > 1:
        # Log likelihood scores are computed:
        score = log((input_freq_dist[w] / n_input) / (english_freq_dist[w] / n_english))
        makerness[w] = score

In [96]:
common_words

['print',
 'marketplace',
 'covered',
 'appeared',
 'notable',
 'commodity',
 'sugar',
 'encounters',
 'manuals',
 'consumer',
 'providing',
 'emitted',
 'court',
 'thing',
 'hoc',
 'clubs',
 'british',
 'wholly',
 'concept',
 'detroit',
 'controlled',
 'minutes',
 'shelter',
 'reductions',
 'inspection',
 'authority',
 'interest',
 'resource',
 'evolved',
 'paint',
 'french',
 'capitalism',
 'disadvantages',
 'spurred',
 'responsibility',
 'pad',
 'shells',
 'demolished',
 'hat',
 'fallen',
 'diminutive',
 'phrase',
 'lenses',
 'contact',
 'dominant',
 'especially',
 'chip',
 'questioned',
 'catalog',
 'doubled',
 'hardware',
 'layer',
 'trivial',
 'painter',
 'officers',
 'employment',
 'thumb',
 'bulb',
 'procedural',
 'acquisition',
 'operation',
 'hurt',
 'knowledge',
 'easy',
 'investment',
 'leaving',
 'educating',
 'do-it-yourself',
 'tops',
 'competitors',
 'message',
 'translation',
 'disappear',
 'works',
 'hub',
 'damage',
 'topic',
 'done',
 'corner',
 'students',
 'archit

In [97]:
# Sorting by scores:
for k,v in sorted(makerness.items(), key=lambda x:x[1], reverse=True): print(k,v)

additive 6.240406254084403
printer 5.86884269765192
printing 5.624645737139878
digital 5.398839068406185
franchise 5.13487352257172
users 5.070335001434149
global 5.070335001434149
citation 5.001342129947197
bending 4.9702515428771665
deposition 4.819020573153243
do-it-yourself 4.760180073130309
manufacturing 4.725494515142419
hardware 4.664869893325984
computers 4.664869893325984
evolutionary 4.664869893325984
junk 4.664869893325984
non-profit 4.664869893325984
homeowners 4.664869893325984
jargon 4.664869893325984
template 4.664869893325984
layer 4.622310278907189
fabrication 4.600331372188413
bug 4.531338500701462
lab 4.48254833653203
individualist 4.48254833653203
zealand 4.48254833653203
coined 4.48254833653203
hack 4.48254833653203
portal 4.48254833653203
media 4.451295793027925
computer 4.451295793027925
commons 4.377187820874203
consumers 4.339447492891357
maker 4.320029407034255
circuit 4.270215701322035
catalog 4.2594047852178205
eric 4.2594047852178205
surname 4.2594047852178

impact 1.9642546707112927
meaning 1.960748970363664
molding 1.9568196922237746
onto 1.9568196922237746
liberalism 1.9568196922237746
brain 1.9568196922237744
associated 1.940290390272564
soft 1.940290390272564
assessment 1.9348407855049994
trend 1.9348407855049994
manufacturer 1.9348407855049994
registered 1.9348407855049994
allowing 1.9240298694007836
revolution 1.9204520480528995
similar 1.9112091809717222
companies 1.903709866909826
product 1.903709866909826
machine 1.9019430819979404
built 1.9019430819979404
equipment 1.898251338072707
encounters 1.8922811710862033
pad 1.8922811710862033
estimates 1.8922811710862033
petitions 1.8922811710862033
revision 1.8922811710862033
wanting 1.8922811710862033
audiences 1.8922811710862033
greece 1.8922811710862033
bugs 1.8922811710862033
complaints 1.8922811710862033
planking 1.8922811710862033
earn 1.8922811710862033
modes 1.8922811710862033
analogous 1.8922811710862033
discussing 1.8922811710862033
echoes 1.8922811710862033
relates 1.8922811

lyrics 1.2636725116638292
scale 1.2636725116638292
writings 1.2636725116638292
ryan 1.2636725116638292
discouraged 1.2636725116638292
sponsored 1.2636725116638292
arriving 1.2636725116638292
ties 1.2636725116638292
era 1.2636725116638292
senses 1.2636725116638292
rounded 1.2636725116638292
trick 1.2636725116638292
crude 1.2636725116638292
consequence 1.2636725116638292
anchor 1.2636725116638292
selecting 1.2636725116638292
israel 1.2636725116638292
termed 1.2636725116638292
folded 1.2636725116638292
stem 1.2636725116638292
self-help 1.2636725116638292
alter 1.2636725116638292
eliminating 1.2636725116638292
radical 1.2636725116638292
foster 1.2636725116638292
governed 1.2636725116638292
execution 1.2636725116638292
decrease 1.2636725116638292
client 1.2636725116638292
hastily 1.2636725116638292
fence 1.2636725116638292
task 1.2636725116638292
cambridge 1.2636725116638292
hire 1.2636725116638292
bees 1.2636725116638292
parent 1.2636725116638292
dissolved 1.2636725116638292
divisions 1.26

professor 0.6218186254914345
spot 0.6218186254914345
value 0.619315495273316
play 0.619315495273316
trade 0.6183159949402325
scientific 0.6159877051806413
allowed 0.6159877051806413
national 0.6097460442571652
questioned 0.6044268827795652
signals 0.6044268827795652
eternal 0.6044268827795652
ethical 0.6044268827795652
tea 0.6044268827795652
glasses 0.6044268827795652
perception 0.6044268827795652
activity 0.6044268827795652
causes 0.6044268827795652
jurisdiction 0.6044268827795652
unions 0.6044268827795652
stone 0.6044268827795652
touch 0.6044268827795652
stars 0.6044268827795652
shapes 0.6044268827795652
perfect 0.6044268827795652
argue 0.6044268827795652
helpful 0.6044268827795652
contributions 0.6044268827795652
person 0.6044268827795652
encouraged 0.6044268827795652
suggests 0.6044268827795652
suburban 0.6044268827795652
effort 0.6044268827795651
paid 0.6044268827795651
world 0.599331330352965
new 0.5976763201698347
carry 0.5929981869559425
announced 0.5929981869559425
area 0.5888

supplies 0.12157511105598069
job 0.10888995152866497
private 0.10574364583929986
grand 0.10052170185814831
precisely 0.10052170185814831
plain 0.10052170185814831
destroy 0.10052170185814831
suit 0.10052170185814831
royal 0.10052170185814831
resistance 0.10052170185814831
occasionally 0.10052170185814831
integration 0.10052170185814831
operate 0.10052170185814831
rolled 0.10052170185814831
behavior 0.10052170185814831
concrete 0.10052170185814831
tired 0.10052170185814831
thinking 0.09360125901357448
increased 0.08672837972581253
increase 0.08501751532218306
power 0.0828221247587474
request 0.07990241465541263
till 0.07990241465541263
leaves 0.07990241465541263
measures 0.07990241465541263
bridge 0.07990241465541263
millions 0.07990241465541263
least 0.07990241465541263
day 0.07844575015894845
major 0.0717722885721624
gas 0.06975004319139466
modern 0.06975004319139466
general 0.06370772873543196
earth 0.059699707337893274
mention 0.059699707337893274
towns 0.059699707337893274
wooden 0

In [98]:
with open('makerness.csv', 'w') as csvfile:
    thewriter = csv.writer(csvfile, delimiter=',')
    for k,v in sorted(makerness.items(), key=lambda x:x[1], reverse=True):
        thewriter.writerow([k,v])