In [7]:
# from cltk.corpus.utils.importer import CorpusImporter
# corpus_importer = CorpusImporter('latin')
# corpus_importer.import_corpus('latin_text_latin_library')

In [8]:
# Imports

import html
import re

import pandas

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

from cltk.corpus.latin import latinlibrary
from cltk.tokenize.word import WordTokenizer
from cltk.stem.latin.j_v import JVReplacer

from pprint import pprint

In [9]:
# Setup CLTK tools

word_tokenizer = WordTokenizer('latin')
replacer = JVReplacer()

In [10]:
# Setup files

files = latinlibrary.fileids()
print("There are %d files in the Latin Library corpus." % len(files))

There are 2159 files in the Latin Library corpus.


In [11]:
#Filter for classical texts

classical = []

remove = ["The Bible","Ius Romanum","Papal Bulls","Medieval Latin","Christian Latin","Christina Latin","Neo-Latin","The Miscellany","Contemporary Latin"]

for file in files:
   raw = latinlibrary.raw(file)
   if not any(x in raw for x in remove):
       classical.append(file)

files = classical
print("There are %d files in the Latin Library Classical subcorpus." % len(files))

There are 965 files in the Latin Library Classical subcorpus.


In [12]:
#Filter for Cicero texts

cicero = [file for file in latinlibrary.fileids() if 'cicero/' in file]


files = cicero
print(f"There are {len(files)} files in the Latin Library Classical subcorpus.")
pprint(files[:10])

There are 138 files in the Latin Library Classical subcorpus.
['cicero/acad.txt',
 'cicero/adbrutum1.txt',
 'cicero/adbrutum2.txt',
 'cicero/amic.txt',
 'cicero/arch.txt',
 'cicero/att1.txt',
 'cicero/att10.txt',
 'cicero/att11.txt',
 'cicero/att12.txt',
 'cicero/att13.txt']


In [13]:
# Preprocess texts

def preprocess(text):    

    text = html.unescape(text) # Handle html entities
    text = re.sub(r'&nbsp;?', ' ',text) #&nbsp; stripped incorrectly in corpus?
    text = re.sub('\x00',' ',text) #Another space problem?
    
    text = text.lower()
    text = replacer.replace(text) #Normalize u/v & i/j
    
    punctuation ="\"#$%&\'()*+,-/:;<=>@[\]^_`{|}~.?!«»"
    translator = str.maketrans({key: " " for key in punctuation})
    text = text.translate(translator)
    
    translator = str.maketrans({key: " " for key in '0123456789'})
    text = text.translate(translator)

    remove_list = [r'\bthe latin library\b',
                   r'\bthe classics page\b',
                   r'\bneo-latin\b', 
                   r'\bmedieval latin\b',
                   r'\bchristian latin\b',
                   r'\bchristina latin\b',
                   r'\bpapal bulls\b',
                   r'\bthe miscellany\b',
                  ]

    for pattern in remove_list:
        text = re.sub(pattern, '', text)
    
    text = re.sub('[ ]+',' ', text) # Remove double spaces
    text = re.sub('\s+\n+\s+','\n', text) # Remove double lines and trim spaces around new lines
    
    return text

In [14]:
# Make list of texts

raw_files = []

for file in files:
    raw = latinlibrary.raw(file)
    raw = preprocess(raw)
    if len(raw) < 1000:
        pass
    else:
        raw_tokens = raw.split()
        raw = " ".join(raw_tokens[50:-50])
        raw_files.append(raw)

In [15]:
def segment_document(document, length):
    segments = []
    wordlist = document.split()
    for i in range(0, len(wordlist), length):
        segments.append(wordlist[i:i+length])
    segments = [" ".join(segment) for segment in segments]
    if len(wordlist) % length:
        segments[-2:] = [' '.join(segments[-2:])]
        return segments
    else:
        return segments

In [16]:
segments = [segment_document(file, 500) for file in raw_files]
segments = [item for sublist in segments for item in sublist]

In [17]:
print(len(raw_files))
print(len(segments))

138
2143


### Following [Zou et al. 2006; Alajmi 2012]

In [22]:
# Make document-term matrix and vocabulary

vectorizer = CountVectorizer(input='content', min_df=3)
dtm = vectorizer.fit_transform(segments)
dtm = dtm.toarray()

vocab = vectorizer.get_feature_names()
vocab = np.array(vocab)

In [23]:
print(len(vocab))

27670


In [24]:
M = len(vocab)
N= len(raw_files)

In [25]:
# Make array of probabilities per book

raw_lengths = [len(tokens.split()) for tokens in segments]
l = np.array(raw_lengths)
ll = l.reshape(len(l),1)

probs = dtm/ll

P=probs

In [26]:
# Calculate mean probability
# i.e. Sum of probabilities for each word / number of documents

probsum = np.ravel(probs.sum(axis=0))
MP = probsum/N

In [27]:
# Make array of bar probability

length = sum(raw_lengths)
barprobs = dtm/length
bP=barprobs

In [28]:
variance = (P-bP) ** 2
varsum = np.ravel(variance.sum(axis=0))
VP = varsum/N

In [29]:
SAT = MP/VP

In [30]:
table_data = list(zip(vocab, MP, VP, SAT))

In [31]:
table1_data = sorted(table_data,key=lambda x: x[1], reverse=True)
table2_data = sorted(table_data,key=lambda x: x[2], reverse=True)
table3_data = sorted(table_data,key=lambda x: x[3])

In [32]:
# Get Table1 info in order
words = [item[0] for item in table1_data][:10]
print(words)

# Current output: 
# ['et', 'in', 'est', 'non', 'cum', 'ut', 'ad', 'quod', 'qui', 'sed']

translations = "and in is not when/with so/how at which/because who but".split()
print(translations)

mps = ['{:.4f}'.format(round(item[1], 4)) for item in table1_data][:10]
print(mps)

vps = ['{:.4f}'.format(round(item[2], 4)) for item in table1_data][:10]
print(vps)

sats = ['{:.4f}'.format(round(item[3], 4)) for item in table1_data][:10]
print(sats)

['et', 'in', 'non', 'est', 'ut', 'cum', 'quod', 'ad', 'qui', 'esse']
['and', 'in', 'is', 'not', 'when/with', 'so/how', 'at', 'which/because', 'who', 'but']
['0.3651', '0.3195', '0.2426', '0.2076', '0.1974', '0.1596', '0.1477', '0.1374', '0.1309', '0.1289']
['0.0103', '0.0076', '0.0046', '0.0037', '0.0032', '0.0021', '0.0019', '0.0017', '0.0016', '0.0015']
['35.3682', '41.8374', '52.4949', '55.8152', '62.3797', '74.5576', '77.9300', '80.0493', '81.7099', '84.7584']


In [33]:
table1 = [(word, translation, mp, vp) for word, translation, mp, vp, _ in zip(words, translations, mps, vps, sats)]
df1 = pandas.DataFrame(table1, columns=['Word', 'Translation', 'Mean Prob.', 'Var. Prob.'])

print("Table 1. Top 10 words with highest MP")
df1

Table 1. Top 10 words with highest MP


Unnamed: 0,Word,Translation,Mean Prob.,Var. Prob.
0,et,and,0.3651,0.0103
1,in,in,0.3195,0.0076
2,non,is,0.2426,0.0046
3,est,not,0.2076,0.0037
4,ut,when/with,0.1974,0.0032
5,cum,so/how,0.1596,0.0021
6,quod,at,0.1477,0.0019
7,ad,which/because,0.1374,0.0017
8,qui,who,0.1309,0.0016
9,esse,but,0.1289,0.0015


In [34]:
# Get Table2 info in order
words2 = [item[0] for item in table2_data][:10]
print(words2)

# Current output: 
# ['et', 'in', 'est', 'non', 'cum', 'ut', 'ad', 'quod', 'qui', 'si']

translations2 = "and in is not when/with so/how at which/because who if".split()
print(translations2)

mps2 = ['{:.4f}'.format(round(item[1], 4)) for item in table2_data][:10]
print(mps2)

vps2 = ['{:.5f}'.format(round(item[2], 5)) for item in table2_data][:10]
print(vps2)

sats2 = ['{:.4f}'.format(round(item[3], 4)) for item in table2_data][:10]
print(sats2)

['et', 'in', 'non', 'est', 'ut', 'cum', 'quod', 'ad', 'qui', 'si']
['and', 'in', 'is', 'not', 'when/with', 'so/how', 'at', 'which/because', 'who', 'if']
['0.3651', '0.3195', '0.2426', '0.2076', '0.1974', '0.1596', '0.1477', '0.1374', '0.1309', '0.1282']
['0.01032', '0.00764', '0.00462', '0.00372', '0.00316', '0.00214', '0.00189', '0.00172', '0.00160', '0.00160']
['35.3682', '41.8374', '52.4949', '55.8152', '62.3797', '74.5576', '77.9300', '80.0493', '81.7099', '80.0770']


In [35]:
table2 = [(word, translation, mp, vp) for word, translation, mp, vp, _ in zip(words2, translations2, mps2, vps2, sats2)]
df2 = pandas.DataFrame(table2, columns=['Word', 'Translation', 'Mean Prob.', 'Var. Prob.'])

print("Table 2. Top 10 words with lowest VP")
df2

Table 2. Top 10 words with lowest VP


Unnamed: 0,Word,Translation,Mean Prob.,Var. Prob.
0,et,and,0.3651,0.01032
1,in,in,0.3195,0.00764
2,non,is,0.2426,0.00462
3,est,not,0.2076,0.00372
4,ut,when/with,0.1974,0.00316
5,cum,so/how,0.1596,0.00214
6,quod,at,0.1477,0.00189
7,ad,which/because,0.1374,0.00172
8,qui,who,0.1309,0.0016
9,si,if,0.1282,0.0016


In [36]:
# Get Table3 info in order
words3 = [item[0] for item in table3_data][:10]
print(words3)

# Current output: 
# ['et', 'in', 'est', 'non', 'cum', 'ut', 'ad', 'quod', 'qui', 'me']

translations3 = "and in is not when/with so/how at which/because who me".split()
print(translations3)

mps3 = ['{:.4f}'.format(round(item[1], 4)) for item in table3_data][:10]
print(mps3)

vps3 = ['{:.5f}'.format(round(item[2], 5)) for item in table3_data][:10]
print(vps3)

sats3 = ['{:.4f}'.format(round(item[3], 4)) for item in table3_data][:10]
print(sats3)

['et', 'in', 'non', 'cereris', 'est', 'ut', 'sunto', 'te', 'aut', 'me']
['and', 'in', 'is', 'not', 'when/with', 'so/how', 'at', 'which/because', 'who', 'me']
['0.3651', '0.3195', '0.2426', '0.0003', '0.2076', '0.1974', '0.0004', '0.0837', '0.0931', '0.0852']
['0.01032', '0.00764', '0.00462', '0.00001', '0.00372', '0.00316', '0.00001', '0.00125', '0.00133', '0.00119']
['35.3682', '41.8374', '52.4949', '55.7312', '55.8152', '62.3797', '64.6519', '66.9224', '69.7164', '71.7725']


In [37]:
table3 = [(word, translation, mp, vp) for word, translation, mp, vp, _ in zip(words3, translations3, mps3, vps3, sats3)]
df3 = pandas.DataFrame(table3, columns=['Word', 'Translation', 'Mean Prob.', 'Var. Prob.'])

print("Table 3. Top 10 words with highest SAT")
df3

Table 3. Top 10 words with highest SAT


Unnamed: 0,Word,Translation,Mean Prob.,Var. Prob.
0,et,and,0.3651,0.01032
1,in,in,0.3195,0.00764
2,non,is,0.2426,0.00462
3,cereris,not,0.0003,1e-05
4,est,when/with,0.2076,0.00372
5,ut,so/how,0.1974,0.00316
6,sunto,at,0.0004,1e-05
7,te,which/because,0.0837,0.00125
8,aut,who,0.0931,0.00133
9,me,me,0.0852,0.00119


In [39]:
#Calculate entropies

with np.errstate(divide='ignore', invalid='ignore'):
    logprobs = np.where(probs != 0, np.log10(1/probs), 0)
ent = probs * logprobs
H = np.ravel(ent.sum(axis=0))

In [40]:
table_data = list(zip(vocab, MP, VP, SAT, H))

In [41]:
table4_data = sorted(table_data,key=lambda x: x[4], reverse=True)

In [42]:
# Get Table4 info in order
words4 = [item[0] for item in table4_data][:10]
print(words4)

# Current output: 
# ['et', 'in', 'est', 'non', 'cum', 'ut', 'ad', 'quod', 'qui', 'me']

translations4 = "and in is not when/with so/how at which/because who me".split()
print(translations3)

#mps3 = ['{:.4f}'.format(round(item[1], 4)) for item in table3_data][:10]
#print(mps3)

#vps3 = ['{:.5f}'.format(round(item[2], 5)) for item in table3_data][:10]
#print(vps3)

#sats3 = ['{:.4f}'.format(round(item[3], 4)) for item in table3_data][:10]
#print(sats3)

ents4 = ['{:.4f}'.format(round(item[4], 4)) for item in table4_data][:10]
#print(sats3)

['et', 'in', 'non', 'est', 'ut', 'cum', 'quod', 'ad', 'qui', 'esse']
['and', 'in', 'is', 'not', 'when/with', 'so/how', 'at', 'which/because', 'who', 'me']


In [43]:
table4 = [(word, translation, ent) for word, translation, ent in zip(words4, translations4, ents4)]
df4 = pandas.DataFrame(table4, columns=['Word', 'Translation', 'Entropy'])

print("Table 4. Top 10 words with highest entropy")
df4

Table 4. Top 10 words with highest entropy


Unnamed: 0,Word,Translation,Entropy
0,et,and,79.8614
1,in,in,72.8263
2,non,is,58.8568
3,est,not,51.6368
4,ut,when/with,50.0964
5,cum,so/how,42.2736
6,quod,at,39.6297
7,ad,which/because,37.1852
8,qui,who,35.6601
9,esse,me,35.3438


In [3]:
from cltk.stop.stop import CorpusStoplist

In [4]:
c = CorpusStoplist('latin')

In [5]:
help(c.build_stoplist)

Help on method build_stoplist in module cltk.stop.stop:

build_stoplist(texts, basis='zou', size=100, sort_words=True, inc_counts=False, lower=True, remove_punctuation=True, remove_numbers=True, include=[], exclude=[]) method of cltk.stop.stop.CorpusStoplist instance
    :param texts: list of strings used as document collection for extracting stopwords
    :param basis: Define the basis for extracting stopwords from the corpus. Available methods are:
                  - 'frequency', word counts
                  - 'mean', mean probabilities
                  - 'variance', variance probabilities
                  - 'entropy', entropy
                  - 'zou', composite measure as defined in the following paper
                    Zou, F., Wang, F.L., Deng, X., Han, S., and Wang, L.S. 2006. “Automatic Construction of Chinese Stop Word List.” In Proceedings of the 5th WSEAS International Conference on Applied Computer Science, 1010–1015. https://pdfs.semanticscholar.org/c543/8e216071f618

In [22]:
freq_stops = c.build_stoplist(segments, basis='frequency', inc_counts=True)

In [23]:
print(freq_stops[:25])

['ab', 'ac', 'ad', 'an', 'ante', 'apud', 'atque', 'aut', 'autem', 'causa', 'cum', 'de', 'ea', 'ego', 'eius', 'enim', 'eo', 'erat', 'esse', 'esset', 'est', 'et', 'etiam', 'eum', 'ex']


### Table: Word Reduction

In [39]:
import pandas as pd

from cltk.corpus.latin import latinlibrary
from cltk.stem.latin.j_v import JVReplacer
from cltk.stop.stop import CorpusStoplist

import pickle

from pprint import pprint

In [31]:
ll_files = latinlibrary.fileids()
ll_docs = [latinlibrary.raw(file) for file in ll_files]
ll_size = len(ll_files)

In [40]:
# Preprocess texts

import html
import re
from cltk.stem.latin.j_v import JVReplacer

replacer = JVReplacer()

def preprocess(text):    

    text = html.unescape(text) # Handle html entities
    text = re.sub(r'&nbsp;?', ' ',text) #&nbsp; stripped incorrectly in corpus?
#     text = re.sub('\x00',' ',text) #Another space problem?
    
#     text = text.lower()
    text = replacer.replace(text) #Normalize u/v & i/j

    remove_list = [r'\bthe latin library\b',
                   r'\bthe classics page\b',
                   r'\bneo-latin\b', 
                   r'\bmedieval latin\b',
                   r'\bchristian latin\b',
                   r'\bchristina latin\b',
                   r'\bpapal bulls\b',
                   r'\bthe miscellany\b',
                  ]

    for pattern in remove_list:
        text = re.sub(pattern, '', text)
    
    text = re.sub('[ ]+',' ', text) # Remove double spaces
    text = re.sub('\s+\n+\s+','\n', text) # Remove double lines and trim spaces around new lines
    
    return text

In [41]:
cicero_files = [file for file in latinlibrary.fileids() if 'cicero/' in file]
cicero_docs = [preprocess(latinlibrary.raw(file)) for file in cicero_files]
cicero_size = len(cicero_files)

biblia_sacra_files = [file for file in latinlibrary.fileids() if 'bible/' in file]
biblia_sacra_docs = [preprocess(latinlibrary.raw(file)) for file in biblia_sacra_files]
biblia_sacra_size = len(biblia_sacra_files)

ius_romanum_files = [file for file in latinlibrary.fileids() if 'justinian' in file 
                     or 'gaius' in file 
                     or 'theod' in file]
ius_romanum_docs = [preprocess(latinlibrary.raw(file)) for file in ius_romanum_files]
ius_romanum_size = len(ius_romanum_files)

In [43]:
## Get tokens for all document collections
## NB: Pickled to save loading time

# ll_tokens = latinlibrary.words(ll_files)
# cicero_tokens = latinlibrary.words(cicero_files)
# biblia_sacra_tokens = latinlibrary.words(biblia_sacra_files)
# ius_romanum_tokens = latinlibrary.words(ius_romanum_files)

# pickle.dump(list(ll_tokens), open('../data/serial/ll_tokens.p', 'wb'))
# pickle.dump(list(cicero_tokens), open('../data/serial/cicero_tokens.p', 'wb'))
# pickle.dump(list(biblia_sacra_tokens), open('../data/serial/biblia_sacra_tokens.p', 'wb'))
# pickle.dump(list(ius_romanum_tokens), open('../data/serial/ius_romanum_tokens.p', 'wb'))

In [44]:
ll_tokens = pickle.load(open('../data/serial/ll_tokens.p', 'rb'))
cicero_tokens = pickle.load(open('../data/serial/cicero_tokens.p', 'rb'))
biblia_sacra_tokens = pickle.load(open('../data/serial/biblia_sacra_tokens.p', 'rb'))
ius_romanum_tokens = pickle.load(open('../data/serial/ius_romanum_tokens.p', 'rb'))

In [45]:
ll_words_size = len(ll_tokens)
cicero_words_size = len(cicero_tokens)
biblia_sacra_words_size = len(biblia_sacra_tokens)
ius_romanum_words_size = len(ius_romanum_tokens)

In [46]:
corpora = ['Latin Library', 'Cicero', 'Biblia Sacra', 'Ius Romanum']
sizes = [ll_size, cicero_size, biblia_sacra_size, ius_romanum_size]
words_sizes = [ll_words_size, cicero_words_size, biblia_sacra_words_size, ius_romanum_words_size]
data = {'Corpora': corpora, 'No. of Files': sizes, 'No. of Words': words_sizes}
pd.DataFrame.from_dict(data)

Unnamed: 0,Corpora,No. of Files,No. of Words
0,Latin Library,2164,16656639
1,Cicero,140,1395928
2,Biblia Sacra,77,708851
3,Ius Romanum,88,2191932


In [47]:
c = CorpusStoplist('latin')

In [48]:
ll_stops = c.build_stoplist(ll_docs, basis='zou', inc_counts=True)
cicero_stops = c.build_stoplist(cicero_docs, basis='zou', inc_counts=True)
biblia_sacra_stops = c.build_stoplist(biblia_sacra_docs, basis='zou', inc_counts=True)
ius_romanum_stops = c.build_stoplist(ius_romanum_docs, basis='zou', inc_counts=True)

In [49]:
print(ll_stops)

['ab', 'ac', 'ad', 'atque', 'aut', 'autem', 'bellum', 'classics', 'contra', 'cum', 'de', 'dei', 'deus', 'dum', 'ea', 'ego', 'eius', 'enim', 'eo', 'erat', 'ergo', 'esse', 'esset', 'est', 'et', 'etiam', 'eum', 'ex', 'fuit', 'haec', 'hic', 'his', 'hoc', 'iam', 'id', 'illa', 'ille', 'in', 'inter', 'ipse', 'ita', 'latin', 'liber', 'library', 'livy', 'lt', 'me', 'mihi', 'modo', 'nam', 'ne', 'nec', 'neque', 'nihil', 'nisi', 'non', 'nos', 'nunc', 'omnes', 'omnia', 'page', 'per', 'periocha', 'post', 'pro', 'qua', 'quae', 'quam', 'quem', 'qui', 'quia', 'quibus', 'quid', 'quidem', 'quis', 'quo', 'quod', 'quoque', 'res', 'se', 'secundum', 'sed', 'semper', 'si', 'sibi', 'sic', 'sicut', 'sit', 'sub', 'sunt', 'tamen', 'te', 'the', 'tibi', 'tu', 'ubi', 'urbe', 'ut', 'vel', 'vero']


In [50]:
print(cicero_stops)

['ab', 'ac', 'ad', 'an', 'ante', 'atque', 'aut', 'autem', 'causa', 'cicero', 'cum', 'de', 'ea', 'ego', 'eius', 'enim', 'eo', 'erat', 'esse', 'esset', 'est', 'et', 'etiam', 'eum', 'ex', 'fuit', 'haec', 'hic', 'hoc', 'iam', 'id', 'igitur', 'iis', 'illa', 'ille', 'illud', 'in', 'ipse', 'is', 'ita', 'liber', 'me', 'mihi', 'modo', 'nam', 'natura', 'ne', 'nec', 'neque', 'nihil', 'nisi', 'nobis', 'non', 'nos', 'nunc', 'omnes', 'omnia', 'omnibus', 'omnium', 'per', 'potest', 'pro', 'publica', 'publicae', 'qua', 'quae', 'quam', 'quem', 'qui', 'quibus', 'quid', 'quidem', 'quis', 'quo', 'quod', 'quos', 're', 'rebus', 'rei', 'rem', 'res', 'se', 'sed', 'senatus', 'si', 'sic', 'sine', 'sit', 'sunt', 'tam', 'tamen', 'te', 'the', 'tibi', 'tu', 'tum', 'uel', 'uero', 'uos', 'ut']


In [36]:
print(biblia_sacra_stops)

['ab', 'ad', 'ait', 'autem', 'caput', 'christi', 'christo', 'cum', 'de', 'dei', 'deo', 'deum', 'deus', 'dicit', 'die', 'dixit', 'domine', 'domini', 'domino', 'dominum', 'dominus', 'eam', 'ecce', 'ego', 'ei', 'eis', 'eius', 'enim', 'eo', 'eorum', 'eos', 'erat', 'ergo', 'erit', 'est', 'et', 'eum', 'ex', 'filii', 'filius', 'fratres', 'haec', 'hierusalem', 'hoc', 'iesu', 'in', 'israhel', 'me', 'mea', 'meum', 'mihi', 'ne', 'nec', 'neque', 'nobis', 'non', 'nos', 'nunc', 'omnes', 'omni', 'omnia', 'omnibus', 'omnis', 'per', 'pro', 'propter', 'quae', 'quam', 'quasi', 'quem', 'qui', 'quia', 'quid', 'quis', 'quod', 'quoniam', 'rex', 'se', 'secundum', 'sed', 'si', 'sicut', 'suis', 'sum', 'sunt', 'super', 'suum', 'te', 'terra', 'terram', 'the', 'tibi', 'tu', 'tua', 'tui', 'tuum', 'usque', 'ut', 'vobis', 'vos']


In [37]:
print(ius_romanum_stops)

['aa', 'ab', 'ac', 'actio', 'actionem', 'ad', 'an', 'aut', 'autem', 'causa', 'cj', 'conss', 'constantinopoli', 'contra', 'cth', 'cum', 'dat', 'de', 'debet', 'dig', 'ea', 'eam', 'ed', 'ei', 'eius', 'enim', 'eo', 'eorum', 'eos', 'erit', 'esse', 'esset', 'est', 'et', 'etiam', 'eum', 'ex', 'fuerit', 'gt', 'hereditatem', 'heres', 'his', 'hoc', 'id', 'idem', 'imperator', 'imperatores', 'in', 'is', 'ita', 'iulianus', 'iure', 'ius', 'kal', 'lt', 'mihi', 'nam', 'ne', 'nec', 'neque', 'nisi', 'nomine', 'non', 'paulus', 'per', 'posse', 'possit', 'post', 'potest', 'pp', 'pr', 'pro', 'quae', 'quam', 'qui', 'quia', 'quibus', 'quid', 'quidem', 'quis', 'quo', 'quod', 'quoque', 'rei', 'rem', 'res', 'sab', 'se', 'sed', 'si', 'sibi', 'sine', 'sit', 'sive', 'sunt', 'tamen', 'ulpianus', 'ut', 'vel', 'vero']
