In [1]:
import requests
import string
import os
import itertools
import nltk
# this allows you to plot in the notebook
%matplotlib inline

In [110]:
def y_to_i(word):
    if word.endswith('yre'):
        return word[:-3] + 'ire'
    return word

In [111]:
def get_rhymes(end_words, scheme='ababbcbcc'):
    rhymes = []
    a = []
    b = []
    c = []
    for word, rhyme in zip(end_words, scheme):
        if rhyme == 'a':
            a.append(word)
        if rhyme == 'b':
            b.append(word)
        if rhyme == 'c':
            c.append(word)
    for group in [a, b, c]:
        for pair in itertools.combinations(group, 2):
            rhymes.append(pair)
    return rhymes


In [112]:
pairs = []
for filename in os.listdir("texts/spenser/"):
    with open("texts/spenser/" + filename) as f:
        fq = f.read()
        for stanza in fq.split("\n\n"):
            if len([line for line in stanza.split("\n") if line.strip()]) != 9:
                continue
            end_words = [line.strip(string.punctuation).split()[-1].lower() for line in stanza.split("\n")]
            pairs.extend(get_rhymes(end_words))
            
    

In [113]:
fd = nltk.FreqDist(pairs)

In [114]:
# fd.most_common()

In [115]:
import networkx as nx

In [116]:

G = nx.Graph()

In [117]:
clean_pairs = [(y_to_i(fst), y_to_i(snd)) for (fst, snd) in pairs]
G.add_edges_from(clean_pairs)

In [118]:
G.size()

18997

In [119]:
cfd = nltk.ConditionalFreqDist(pairs)

In [120]:
cfd['queene'].items()

[('vnseene', 1),
 ('weene', 2),
 ('beseene', 3),
 ('sustene', 1),
 ('teene', 1),
 ('greene', 2),
 ('sheene', 2),
 ('bene', 1),
 ('cleene', 1),
 ('seene', 8),
 ('shene', 1),
 ('beene', 6)]

In [121]:
print max([(key, cfd[key]) for key in cfd], key=lambda x: len(x[1]))
    

('eye', FreqDist({'hye': 7, 'by': 4, 'dye': 3, 'fly': 3, 'maiestie': 2, 'skye': 2, 'enuye': 2, 'melancholy': 2, 'thereby': 2, 'company': 2, ...}))


In [122]:
# G.neighbors('beene')

In [123]:
sorted(nx.degree(G).items(), key=lambda x: -x[1])[:10]

[('eye', 99),
 ('bee', 85),
 ('see', 78),
 ('went', 76),
 ('fly', 73),
 ('intent', 65),
 ('apply', 65),
 ('red', 63),
 ('side', 60),
 ('thereby', 57)]

In [124]:
for graph in nx.connected_component_subgraphs(G):
    break

In [125]:
for word in nx.shortest_path(G, 'limited', 'out-goe'):
    print word, len(cfd[word].items())

limited 4
bed 22
dread 11
speed 18
meed 13
ire 13
inquire 11
heare 23
neare 25
few 5
vntrew 13
shew 23
lowe 2
foe 18
out-goe 2


In [126]:
# nx.draw_networkx(graph)

In [127]:
len(graph)

1615

In [128]:
import itertools
m = set(itertools.chain(*[c for c in nx.find_cliques(graph) if 'meed' in c]))
a = set(itertools.chain(*[c for c in nx.find_cliques(graph) if 'ire' in c]))
e = set(itertools.chain(*[c for c in nx.find_cliques(graph) if 'attire' in c]))

In [129]:
m & a

{'desire', 'entire', 'ire', 'meed'}

In [130]:
a & e

{'attire',
 'desire',
 'enquire',
 'entire',
 'expire',
 'fire',
 'hire',
 'inquire',
 'inspire',
 'ire',
 'mire',
 'require',
 'sire',
 'squire',
 'stire',
 'tire'}

In [133]:
yre_set = set(itertools.chain(*[c for c in nx.find_cliques(graph) if 'ire' in c]))
rhymes = {}
for word in yre_set:
    word_set = set(itertools.chain(*[c for c in nx.find_cliques(graph) if word in c]))
    rhymes[word] = len(word_set & yre_set)

In [134]:
print len(graph['ire'].keys())
print len(graph['fire'].keys())
print len(graph['desire'].keys())
print len(graph['meed'].keys())


25
25
26
27


In [135]:
yre_set = set(itertools.chain(*[c for c in nx.find_cliques(graph) if 'ire' in c]))

In [140]:
sorted(rhymes.items(), key=lambda x: x[1])

[('copper-wire', 3),
 ('shire', 4),
 ('dire', 4),
 ('meed', 4),
 ('gire', 5),
 ('conspire', 7),
 ('enquire', 7),
 ('tire', 8),
 ('inspire', 10),
 ('stire', 10),
 ('admire', 10),
 ('respire', 12),
 ('expire', 13),
 ('aspire', 13),
 ('retire', 14),
 ('inquire', 14),
 ('squire', 16),
 ('attire', 16),
 ('mire', 16),
 ('require', 16),
 ('sire', 17),
 ('entire', 17),
 ('hire', 17),
 ('desire', 21),
 ('fire', 24),
 ('ire', 26)]