In [None]:
# Imports

from cltkreaders.lat import LatinTesseraeCorpusReader
from pprint import pprint

## Deforming Catullus

In [None]:
# Load corpus reader and select 'Catullus' files

T = LatinTesseraeCorpusReader()
catullus = [file for file in T.fileids() if 'catullus' in file]

In [None]:
# Show text as it appears in file

print(T.raw(catullus)[:490])

In [None]:
# Split raw text into individual poems

poems = T.raw(catullus).strip().split('\n\n')
pprint(poems[0])

In [None]:
# Get index of poem numbers using regular expressions

import re
poem_nos = [re.match(r'<cat\. (\d+\w?)\.\d+>', poem).group(1) for poem in poems]
print(poem_nos)

## Reading backwards

In [None]:
# Look at poem 85

poem_85 = poems[85]
print(poem_85)


In [None]:
# Look at poem 85 with pprint

pprint(poem_85)

In [None]:
# Split poem into lines

lines_85 = []

lines = poem_85.split('\n')

print(lines)

In [None]:
# Split lines into citation and text; keep only text

text_lines = [line.split('\t')[1] for line in lines]
print(text_lines)

In [None]:
# Reverse the lines of a poem

reversed_text_lines = reversed(text_lines)
reversed_text = '\n'.join(reversed_text_lines)
print(reversed_text)

In [None]:
# Exercise some editorial discretion...

reversed_text_edition = reversed_text[0].upper() + reversed_text[1:].replace('odi', 'Odi') + '?'
print(reversed_text_edition)


## Punctuation Poetry

In [None]:
# Helper function to extract poem text

def get_poem_text(poem):
    lines = poem.split('\n')
    text_lines = [line.split('\t')[1] for line in lines]
    return '\n'.join(text_lines)

poem_85_text = get_poem_text(poems[85])
    
print(poem_85_text)

In [None]:
# Remove all spaces from poem

poem_85_spaceless = ''.join(poem_85_text.split())
print(poem_85_spaceless)

In [None]:
# Remove all letters from poem

poem_85_letterless = ''.join([char for char in poem_85_text if not char.isalpha()])
print(poem_85_letterless)

In [None]:
# Helper function to remove all spaces and letters (or conversely to leave only punctuation)

def leave_punctuation(poem):
    poem = ''.join(poem.split())
    return [char for char in poem if not char.isalpha()]

print(leave_punctuation(poem_85_text))

In [None]:
# Iterate over all of the poems to get a list of punctuation-only poems

punc_poems = ["".join(leave_punctuation(get_poem_text(poem))) for poem in poems]

# Note that this is the same as...

punc_poems = [get_poem_text(poem) for poem in poems]
punc_poems = [leave_punctuation(poem) for poem in punc_poems]
punc_poems = ["".join(poem) for poem in punc_poems]

print(punc_poems[:5])


In [None]:
# Write each punctuation poem with its citation to a txt file

with open('output/catullus_punc_poems.txt', 'w') as f:
    for citation, poem in zip(poem_nos, punc_poems):
        f.write(f'{citation}: {poem}\n')

In [None]:
# As above, but with sorting each punctuation poem before writing

with open('output/catullus_sorted_punc_poems.txt', 'w') as f:
    for citation, poem in zip(poem_nos, punc_poems):
        f.write(f'{citation}: {"".join(sorted(poem))}\n')

In [None]:
# Write all of the punctuation poems consecutively as if one large punctuation poem

punc_poems_running = []
for poem in punc_poems:
    punc_poems_running.append(poem)
punc_poems_all = ''.join(punc_poems_running)

with open('output/catullus_punc.txt', 'w') as f:
    f.write(''.join(punc_poems_all))

In [None]:
# Write all of the punctuation poems consecutively as if one large punctuation poem; alternate regex approach

catullus_text = next(T.texts(catullus))
catullus_text_no_letters = re.sub(r'[\w\s]', '', catullus_text)
print(catullus_text_no_letters)

## Verbing Vergil

In [None]:
# Get text of Aeneid 1

aen_1 = 'vergil.aeneid.part.1.tess'
print(next(T.texts(aen_1))[:315])

In [None]:
# Get the first pos-tagged sentence in the Aeneid
# NB: POS-tagging is work in progress!


virgil_pos_sents = next(T.tokenized_sents('vergil.aeneid.part.1.tess', preprocess=lambda x: x.lower()))[:2]


In [None]:
# Print sample of tagged sents

print(virgil_pos_sents[0])

In [None]:
# Collect tagged tokens into list

pos_tokens = []

for sent in virgil_pos_sents:
    for item in sent:
        pos_tokens.append(f'{item[0]}/{item[2]}')

In [None]:
# Print sample of tagged token

pos_tokens[0]

In [None]:
# Recompose lines

indices = [(0, 9), (9, 16), (16, 25), (25, 33), (33, 43), (43, 51), (51, len(pos_tokens))]
virgil_pos_lines = [pos_tokens[s:e] for s,e in indices] # cf. https://stackoverflow.com/a/18571043

for line in virgil_pos_lines:
    print(line)

In [None]:
# Remove non-verbs by looping through lines, checking pos tag, and keeping matches

verb_lines = []

for line in virgil_pos_lines:
    pairs = [ ]
    for pair in line:
        word, pos = pair.split('/')
        if pos == 'VERB' or pos == 'PUNCT':
            pairs.append(word)
        else:
            pairs.append('     ')
    verb_lines.append(pairs)

print(verb_lines)

In [None]:
# Make verb-only poem

for line in verb_lines:
    print(" ".join(line))

In [None]:
verb_lines[0][2] = 'cano'
verb_lines[6][0] = 'inferretque'
verb_lines[6][4] = '     '

In [None]:
# Make verb-only poem

for line in verb_lines:
    print(" ".join(line))