In [1]:
%pylab inline
import random as r
import copy as c
import csv

#this is a list of english words I found online
with open('wordsEn.txt') as words_file:
    words = set(word.strip().lower() for word in words_file)

class Monkey():
    avgwords = 0
    
    def __init__(self,  chars = None):
        if chars is None:
            chars = [['a',5],['b',5],['c',5],['d',5],['e',5],['f',5],['g',5],['h',5],['i',5],['j',5],['k',5],['l',5],['m',5],['n',5],['o',5],['p',5],['q',5],['r',5],['s',5],['t',5],['u',5],['v',5],['w',5],['x',5],['y',5],['z',5]]
        self.wchars = chars

#copy-pasted from
#http://stackoverflow.com/questions/3679694/a-weighted-version-of-random-choice
def weighted_choice(choices):
    total= sum(w for c, w in choices)
    x = r.uniform(0, total)
    upto = 0
    for c, w in choices:
        if upto + w >= x:
            return c
        upto += w
    
def lavg(list): #lavg = list average
    tot = sum(list)
    return tot/len(list)

def chk(word):
    return word.lower() in words

#returns a random string based on the weighted or nonweighted
#letter choices provided in 'chars'
def wrand_string(n,chars):
    string = " "
    for i in range(n):
        x = r.randrange(100)
        #20% chance of a space, but spaces cannot be consecutive
        if x < 20 and string[-1] != ' ': 
            string += " "
        #80% chance of a letter
        else:
            string += weighted_choice(chars)
    return string

#produces random strings and then checks them for words
#of length 3 letters to 7 letters.
def monkeys(n,chars):
    total = wrand_string(n,chars)
    words = []
    for q in range(2,7):
        for a in range(len(total)-q):
            s = total[a:a+q]
            if chk(s) == True and len(s) > 2:
                words.append(s)
    return words

#runs the monkey function a bunch of times ('smooth' times)
#with numchars of characters and the weighted or nonweighted letters in chars
#and returns the average number of wordlen words found
def monkeysaverage(numchars,chars,smooth,wordlen):
    results = []
    for i in range(smooth):
        words = monkeys(numchars,chars)
        foo = len([x for x in words if len(x) == wordlen])
        results.append(foo)
    return lavg(results)

def evolve(members, generations):
    with open('{0}{1}.csv'.format(members,generations), 'w') as csvfile:
        mywriter = csv.writer(csvfile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL)
        mems = []
        for m in range(members-1): #seed the first generation of monkeys
            mems.append(Monkey())
        for n in range(generations):
            for i in range(members-1):
                ran = r.randrange(0,26)
                mems[i].wchars[ran][1] += r.uniform(-0.1,0.1)
                #how many words will the monkey make with those frequencies
                mems[i].avgwords = average(500,mems[i].wchars,10,3)
            best = (max(mems,key=lambda x:x.avgwords))
            mywriter.writerow([n,best.avgwords,best.wchars])
            parentgen = c.deepcopy(mems)
            for i in range(members-1):
                #choose a parent; more words found means higher chance of heredity
                parent = weighted_choice([(m,m.avgwords) for m in parentgen])
                mems[i].avgwords = 0 #reset the monkeys
                mems[i].wchars = c.deepcopy(parent.wchars)
        #return the monkey who found the most words in the last generation
    return (max(mems,key=lambda x:x.avgwords)).wchars

Populating the interactive namespace from numpy and matplotlib


In [None]:
# A bunch of code for graphing the letter frequencies
unweightedchars = [['a',5],['b',5],['c',5],['d',5],['e',5],['f',5],['g',5],['h',5],['i',5],['j',5],['k',5],['l',5],['m',5],['n',5],['o',5],['p',5],['q',5],['r',5],['s',5],['t',5],['u',5],['v',5],['w',5],['x',5],['y',5],['z',5]]
realchars = [('a',8.167),('b',1.492),('c',2.782),('d',4.253),('e',12.702),('f',2.228),('g',2.015),('h',6.094),('i',6.966),('j',0.153),('k',0.772),('l',4.025),('m',2.406),('n',6.749),('o',7.507),('p',1.929),('q',0.095),('r',5.987),('s',6.327),('t',9.056),('u',2.758),('v',0.978),('w',2.361),('x',0.150),('y',1.974),('z',0.074)]
chars1001000 = [['a', 5.617120585465051], ['b', 4.789201823706438], ['c', 5.087507561220437], ['d', 5.4731560936128565], ['e', 5.361291745651773], ['f', 4.082550237452202], ['g', 4.746232789256478], ['h', 4.869034339639426], ['i', 4.828025063101885], ['j', 5.2384965881246], ['k', 5.131686528757773], ['l', 4.7601987503968815], ['m', 4.555479665076931], ['n', 5.106153382074974], ['o', 5.675349112806487], ['p', 4.966692762826592], ['q', 4.7739600759446725], ['r', 4.977355621181748], ['s', 5.244866833369329], ['t', 5.3509658449808155], ['u', 5.255902061042606], ['v', 4.670272978057114], ['w', 4.653827853727887], ['x', 4.813091266397289], ['y', 4.6115131819433355], ['z', 4.497237379998011]]
chars20010000g1545 = [['a', 6.7506566498197325], ['b', 5.128853209486541], ['c', 4.473231252664038], ['d', 4.65090145607248], ['e', 5.591694763857763], ['f', 4.710560994754785], ['g', 4.483004865586396], ['h', 3.969467928758286], ['i', 6.132991944652612], ['j', 4.848892832994941], ['k', 3.751229339327439], ['l', 5.0078123663793646], ['m', 4.341565557287692], ['n', 4.867772101781643], ['o', 4.9224598176985], ['p', 5.766726503796711], ['q', 4.247919830786322], ['r', 5.396279769457094], ['s', 5.15577942486841], ['t', 5.615410122983482], ['u', 5.212744799748192], ['v', 4.7502808275155095], ['w', 5.332682492820394], ['x', 3.9216512356269613], ['y', 4.092494830112895], ['z', 5.366867773575506]]
chars20010000g4595 = [['a', 7.444404504446903], ['b', 5.889015315292889], ['c', 4.151916020762137], ['d', 3.67515832028174], ['e', 6.368178324184994], ['f', 4.729376251452973], ['g', 4.507145732340048], ['h', 2.981855699246655], ['i', 6.14129655660719], ['j', 4.55891078787808], ['k', 3.275370298316], ['l', 5.039409380615194], ['m', 4.739515371862414], ['n', 4.707301011190136], ['o', 5.960050671399897], ['p', 5.443822789713091], ['q', 3.5937066777297346], ['r', 5.7051453207827265], ['s', 6.086364810699781], ['t', 6.869530983674681], ['u', 3.8494965363007045], ['v', 4.348759062101854], ['w', 5.310723206266573], ['x', 2.6689808440905], ['y', 2.558313982762918], ['z', 4.413449853763129]]
chars20010000g6115 = [['a', 7.066894950641749], ['b', 5.775627238312435], ['c', 4.172894518406351], ['d', 3.698133108767051], ['e', 7.453896122185516], ['f', 3.8269452449688495], ['g', 3.8711492975028], ['h', 2.7099825381530156], ['i', 6.70593581867677], ['j', 4.310090105531945], ['k', 2.3220765997389203], ['l', 6.1019024309425545], ['m', 4.852887823762179], ['n', 4.849050979027497], ['o', 6.146162161407311], ['p', 6.488706791736703], ['q', 3.302158197096853], ['r', 5.657106458609143], ['s', 6.365532320545931], ['t', 7.994731233558941], ['u', 4.445151179960468], ['v', 3.6353566419903256], ['w', 5.3920483423488355], ['x', 2.087047960587697], ['y', 2.9834094051215736], ['z', 3.155429526536478]]
chars20010000g7634 = [['a', 7.357023704687252], ['b', 5.871105932263335], ['c', 4.202619633149176], ['d', 4.058198296999329], ['e', 8.282356201103791], ['f', 3.2118908679364235], ['g', 3.4701022515377855], ['h', 2.8071129179610645], ['i', 6.305719659073039], ['j', 4.336846461194616], ['k', 2.1512074890388266], ['l', 6.832907952717474], ['m', 5.861787995223657], ['n', 4.192485332017982], ['o', 7.5649912598942235], ['p', 6.177450782215932], ['q', 2.491829931794844], ['r', 5.601227903112878], ['s', 6.083656234493242], ['t', 7.9904061788506215], ['u', 4.868191734625808], ['v', 3.2113625487830015], ['w', 5.112019475411859], ['x', 1.9235260785404296], ['y', 3.2979968103018624], ['z', 2.2940498937331313]]
chars20010000g9999 = [['a', 8.122534457892154], ['b', 5.971898511401099], ['c', 4.965001383554182], ['d', 3.7732119610869104], ['e', 8.47696096629556], ['f', 2.5653729642126666], ['g', 3.5985452095622197], ['h', 2.463833395113153], ['i', 5.583430323647656], ['j', 3.7140144079799824], ['k', 2.393060336125771], ['l', 7.312660376725955], ['m', 5.171011123557881], ['n', 3.3327912330637783], ['o', 8.189822680516604], ['p', 6.42820248271009], ['q', 1.2317869639930783], ['r', 5.96314517435067], ['s', 6.357210662262221], ['t', 9.021465877613318], ['u', 3.4878605114403753], ['v', 2.842368014563001], ['w', 4.7960896278572545], ['x', 1.2291781652888083], ['y', 3.1834249716378835], ['z', 0.5327083094175566]]
chars2001000000g9121 = [['a', 8.736371205007003], ['b', 4.131485872086321], ['c', 3.884824596975486], ['d', 3.5606279313050533], ['e', 8.41138350773185], ['f', 4.30477941976183], ['g', 3.666664257673072], ['h', 3.825245242553374], ['i', 5.060002652004456], ['j', 2.4732095533571146], ['k', 2.5802530847497125], ['l', 4.181898550354766], ['m', 4.594852089747574], ['n', 4.923995878251929], ['o', 7.717615512711585], ['p', 6.267413652518519], ['q', 2.3471842544747115], ['r', 6.337271456427059], ['s', 6.404482660848713], ['t', 7.382413831260027], ['u', 3.6856520320123374], ['v', 0.648344558123984], ['w', 4.210010781075308], ['x', 4.038764232733261], ['y', 6.152341051840384], ['z', 0.492386032726022]]
chars2001000000g10628 = [['a', 8.809877502797628], ['b', 3.9745205362886264], ['c', 3.246089527866428], ['d', 3.3980173096054958], ['e', 8.744567325627104], ['f', 4.403995203371425], ['g', 3.8274769320862254], ['h', 3.8254947075698476], ['i', 5.515050064539028], ['j', 1.9828197944151806], ['k', 2.6484062805091937], ['l', 3.574096918639996], ['m', 4.307626035702732], ['n', 4.592213162985549], ['o', 8.090850370564363], ['p', 6.510798448266812], ['q', 1.7241725687234657], ['r', 5.8410593765747825], ['s', 6.628713972468628], ['t', 7.202232928892094], ['u', 4.396686807293007], ['v', 0.8631183499629709], ['w', 3.6740336146326693], ['x', 3.4826734993580355], ['y', 6.378398102644314], ['z', 0.035114423695395164]]
chars2001000000g13634 = [['a', 9.622086724551059], ['b', 3.5902759248441614], ['c', 3.166993744738504], ['d', 2.724678872344489], ['e', 10.132892467292976], ['f', 2.9198873982609075], ['g', 3.2251231947138628], ['h', 3.488405773291345], ['i', 5.163005713214998], ['j', 0.958750706926687], ['k', 1.194567889573313], ['l', 2.7291114727592665], ['m', 3.774453472643921], ['n', 4.405626153206054], ['o', 9.819279280717845], ['p', 7.004686730937127], ['q', 0.25140084500504023], ['r', 6.049584437437702], ['s', 5.723857057731305], ['t', 7.644441245290013], ['u', 4.613053679307557], ['v', 0.6201020382995599], ['w', 3.9825249399463507], ['x', 3.0786544184527913], ['y', 5.32145115425143], ['z', -1.073313381207702]]
testchars = [['a', 7.6129392378506173], ['c', 4.0463188015881588], ['b', 1.9563267038862413], ['e', 11.609962242199554], ['d', 3.668740797137636], ['g', 2.8794733797348293], ['f', 1.3681989710972646], ['i', 8.7635309478159602], ['h', 2.2195940635455975], ['k', 0.87117757073305302], ['j', 0.18798700986236741], ['m', 2.7455941212890478], ['l', 5.3777330563073491], ['o', 6.1381287486459701], ['n', 6.941083502375502], ['q', 0.18306812336995054], ['p', 2.8245101697978234], ['s', 8.9134500535196235], ['r', 7.3810030465016556], ['u', 3.3404585899263344], ['t', 6.6468057713509081], ['w', 0.85759716672138031], ['v', 1.0400236962010156], ['y', 1.6948771935827778], ['x', 0.290642032312807], ['z', 0.438636356345524]]
chars2001000000g15130 = [['a', 10.331142161117556], ['b', 4.031472337149219], ['c', 4.010327190952468], ['d', 2.5916584029622105], ['e', 10.80269731124787], ['f', 2.6773137830492546], ['g', 3.3176091299894757], ['h', 3.8035180031860554], ['i', 5.108353274450604], ['j', 0.5712196971448389], ['k', 0.6624464564757842], ['l', 2.89644302551085], ['m', 4.212344583603034], ['n', 4.314963046255836], ['o', 9.004944639962243], ['p', 7.423945610088933], ['q', 0.26245096576432053], ['r', 7.090491738947087], ['s', 5.5283193586365815], ['t', 8.401448379751676], ['u', 4.877521167751871], ['v', 0.3344842417475946], ['w', 3.36602589891575], ['x', 2.5954657713092777], ['y', 5.871338028734254], ['z', -0.620424496423035]]
chars2001000000g18107 = [['a', 11.170139234696238], ['b', 3.204368774264232], ['c', 4.127539725627583], ['d', 2.171834047650169], ['e', 12.001755510876183], ['f', 2.1407382863883626], ['g', 3.4509618223158998], ['h', 2.706657508834799], ['i', 3.7360294553895463], ['j', 0.0014362043186150891], ['k', -0.3978451155934556], ['l', 3.3765891887694472], ['m', 5.73645496331918], ['n', 4.012508592634439], ['o', 8.98052123538016], ['p', 7.335267193902966], ['q', -0.3437126337998163], ['r', 7.680978799471079], ['s', 6.165132276557653], ['t', 7.624048375735283], ['u', 4.470381333343184], ['v', -0.2444795314708122], ['w', 4.255896631403939], ['x', 2.4409380021575986], ['y', 5.497052271350486], ['z', -1.1650471098676802]]
chars2001000000g21071 = [['a', 10.686438585606084], ['b', 2.902374955807226], ['c', 3.865990115537317], ['d', 1.1691758886555397], ['e', 12.312068515390184], ['f', 1.5773381784180753], ['g', 3.275677662282531], ['h', 3.0592375511132435], ['i', 3.0150934415294217], ['j', -0.2898806350424081], ['k', -0.40739112171261527], ['l', 2.816953811736128], ['m', 4.671473554612819], ['n', 3.465933812138197], ['o', 9.078629429624115], ['p', 7.3864842478090456], ['q', -0.4345411796353431], ['r', 8.09251333758832], ['s', 5.70343545536917], ['t', 7.989169431479273], ['u', 5.31258631100396], ['v', -0.04342297038514767], ['w', 2.362291541753972], ['x', 1.0862107689374345], ['y', 5.795573975616606], ['z', -1.088024589847045]]

smooth = 50
wordlen = 3
with open('data2.csv','w') as csvfile:
    mywriter = csv.writer(csvfile, delimiter=',',quotechar='"',quoting=csv.QUOTE_MINIMAL)
    for i in range(1,1000,10):
        mywriter.writerow([i,
                           monkeysaverage(i,unweightedchars,smooth,wordlen),
                           monkeysaverage(i,chars20010000g1545,smooth,wordlen),
                           monkeysaverage(i,chars20010000g6115,smooth,wordlen),
                           monkeysaverage(i,chars2001000000g10628,smooth,wordlen),
                           monkeysaverage(i,chars2001000000g15130,smooth,wordlen),
                           monkeysaverage(i,chars2001000000g18107,smooth,wordlen),
                           monkeysaverage(i,chars2001000000g21071,smooth,wordlen),
                           monkeysaverage(i,realchars,smooth,wordlen),                         
                           #monkeysaverage(i,test,smooth,wordlen)
                          ])

In [None]:
# More code for graphing
with open('data2.csv','r') as file:
    data= [row for row in file]
data = data[0::2]
data = [row.strip() for row in data]
data = [row.split(',') for row in data]
data = [[float(item) for item in row] for row in data]

In [None]:
# Even more graphing
labels = ['unweighted',
          '200x1545',
          '200x6115',
          '200x10628',
          '200x15130',
          '200x18107',
          '200x21071',
          'real']
colors = ['blue','green','green','green','green','green','green','blue']

ax = subplot(1,1,1)
for i in range(1,len(labels)+1):
    plot([row[0] for row in data],[row[i] for row in data],label=labels[i-1],color=colors[i-1])
handles, lbls = ax.get_legend_handles_labels()
ax.legend(handles[::-1], lbls[::-1],bbox_to_anchor=(1.36, 1.05))
xlabel('Number of Characters in Random String')
ylabel('Number of Word Found')
title('4 Letter Words')