In [42]:
# Using https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-cos-v1

# import the SentenceTransformer, a wrapper on top of the model
from sentence_transformers import SentenceTransformer

# get 'sentence-transformers/multi-qa-mpnet-base-cos-v1', a pretrained model
model = SentenceTransformer(
  'sentence-transformers/multi-qa-mpnet-base-cos-v1'
)

docs = [
  "A paragon of virtue",
  "The hero of legend"
]

# Note that there is a limit of 512 word pieces:
# Text longer than that will be truncated.
# Further note that the model was just trained on
# input text up to 250 word pieces.
# It might not work well for longer text.

embeddings = model.encode(
  docs
  , batch_size=32
  , show_progress_bar=True
)

print(embeddings.shape)

Batches: 100%|██████████| 1/1 [00:00<00:00, 21.36it/s]

(2, 768)





In [43]:
import re

CHUNK_SIZE=300

def makeParagraphs(text):
  '''
  creates a list of paragraphs.
  text: a string of arbitrary length.
  '''
  paragraph_separator_re = re.compile(r'(\.\n\r?)+(\n\r?)*')
  return re.split(paragraph_separator_re, text)

def removeEmpty(chunks):
  '''
  removes chunks that only contain dots or new lines
  '''
  return [chunk.strip() for chunk in chunks if chunk and len(chunk.strip('.\n\t ')) > 0]

def split(chunk, size_limit=CHUNK_SIZE):
  '''
  splits chunks so that they contain no more than 150 words or the chosen limit
  '''
  if (len(chunk.split(' ')) > size_limit):
    periods = re.compile(r'\.|\?|;')
    return re.split(periods, chunk)
  return [chunk]

def rightSize(chunks, size_limit=CHUNK_SIZE):
  '''
  creates new chunks if needed so that chunks do not exceed the size limit.
  '''
  batch = []
  for chunk in chunks:
    rightsized_chunks = split(chunk, size_limit)
    for right_sized_chunk in rightsized_chunks:
      batch.append(right_sized_chunk)
  return batch

def makeEmbedding(chunks, model=model):
  '''
  makes embeddings out of a group of chunks
  model is 'sentence-transformers/multi-qa-mpnet-base-cos-v1' and warpped in SentenceTransformer
  '''
  return model.encode(
    chunks
    , batch_size=32
    , device='mps' # send work to Metal shaders in M1 macs
    , show_progress_bar=True
  )

def makeChunks(raw_text):
  '''
  creates chunks out of raw text. Chunks will have default length
  '''
  paragraphs = removeEmpty(makeParagraphs(raw_text))
  return rightSize(paragraphs)

def fileToChunks(filePath, encoding='utf-8'):
  '''
  turns a file of raw text into chunks that are rightsized
  '''
  with open(filePath, encoding=encoding) as file:
    raw_text = file.read()
    return makeChunks(raw_text)

def makeEmbeddingsPerChunk(forFile):
  '''
  makes embeddings out of a file and returns chunks per file and its respective embeddings
  '''
  chunks = fileToChunks(forFile)
  embeddings = makeEmbedding(chunks)
  return { chunks[i]:embeddings[i] for i in range(len(chunks))}

# these are ready to get inserted into a database:
embeddings = makeEmbeddingsPerChunk('pg2680.txt')

Batches: 100%|██████████| 50/50 [00:05<00:00,  8.99it/s]


In [44]:
# insert embeddings into the database
import psycopg2
from tqdm import tqdm

DATABASE = "semantic_search"
HOST = "127.0.0.1"
USER = "postgres"
PASSWORD = "123456"
CMD = """
insert into 
  items(embedding, text_chunk)
  values (%s, %s)
  returning id;
"""

with psycopg2.connect(
    host=HOST,
    database=DATABASE,
    user=USER,
    password=PASSWORD
  ) as connection:
    with connection.cursor() as cursor:
      for txt in tqdm(embeddings.keys()):
        if len(txt) > 1024:
           print('too big: %s' % txt)
           continue
        cursor.execute(CMD, (embeddings[txt].tolist(), txt))
        id = cursor.fetchone()[0]  
      cursor.close()
      connection.commit()


  7%|▋         | 108/1562 [00:00<00:04, 344.21it/s]

too big: The settlement made after these troubles might have been more
satisfactory but for an unexpected rising in the east. Avidius Cassius,
an able captain who had won renown in the Parthian wars, was at this
time chief governor of the eastern provinces. By whatever means induced,
he had conceived the project of proclaiming himself emperor as soon as
Marcus, who was then in feeble health, should die; and a report having
been conveyed to him that Marcus was dead, Cassius did as he had
planned. Marcus, on hearing the news, immediately patched up a peace and
returned home to meet this new peril. The emperors great grief was that
he must needs engage in the horrors of civil strife. He praised the
qualities of Cassius, and expressed a heartfelt wish that Cassius might
not be driven to do himself a hurt before he should have the opportunity
to grant a free pardon. But before he could come to the east news had
come to Cassius that the emperor still lived; his followers fell away
from him, 

 15%|█▍        | 230/1562 [00:00<00:03, 370.37it/s]

too big: He sees also the true essence of happiness. 'If happiness did consist
in pleasure, how came notorious robbers, impure abominable livers,
parricides, and tyrants, in so large a measure to have their part of
pleasures?' He who had all the world's pleasures at command can write
thus 'A happy lot and portion is, good inclinations of the soul, good
desires, good actions.'

By the irony of fate this man, so gentle and good, so desirous of quiet
joys and a mind free from care, was set at the head of the Roman Empire
when great dangers threatened from east and west. For several years he
himself commanded his armies in chief. In camp before the Quadi he dates
the first book of his _Meditations_, and shows how he could retire within
himself amid the coarse clangour of arms. The pomps and glories which
he despised were all his; what to most men is an ambition or a dream, to
him was a round of weary tasks which nothing but the stern sense of duty
could carry him through. And he did his wo

 21%|██        | 322/1562 [00:00<00:03, 412.15it/s]

too big: XI. Consider with thyself how man, and by what part of his, is joined
unto God, and how that part of man is affected, when it is said to be
diffused. There is nothing more wretched than that soul, which in a kind
of circuit compasseth all things, searching (as he saith) even the very
depths of the earth; and by all signs and conjectures prying into the
very thoughts of other men's souls; and yet of this, is not sensible,
that it is sufficient for a man to apply himself wholly, and to confine
all his thoughts and cares to the tendance of that spirit which is
within him, and truly and really to serve him. His service doth consist
in this, that a man keep himself pure from all violent passion and
evil affection, from all rashness and vanity, and from all manner of
discontent, either in regard of the gods or men. For indeed whatsoever
proceeds from the gods, deserves respect for their worth and excellency;
and whatsoever proceeds from men, as they are our kinsmen, should by us
be 

 33%|███▎      | 522/1562 [00:01<00:01, 585.20it/s]

too big: XIX. Whatsoever is expedient unto thee, O World, is expedient unto me;
nothing can either be 'unseasonable unto me, or out of date, which unto
thee is seasonable. Whatsoever thy seasons bear, shall ever by me be
esteemed as happy fruit, and increase. O Nature! from thee are all
things, in thee all things subsist, and to thee all tend. Could he say
of Athens, Thou lovely city of Cecrops; and shalt not thou say of the
world, Thou lovely city of God?

XX. They will say commonly, Meddle not with many things, if thou wilt
live cheerfully. Certainly there is nothing better, than for a man
to confine himself to necessary actions; to such and so many only, as
reason in a creature that knows itself born for society, will command
and enjoin. This will not only procure that cheerfulness, which from the
goodness, but that also, which from the paucity of actions doth usually
proceed. For since it is so, that most of those things, which we either
speak or do, are unnecessary; if a man shall

 47%|████▋     | 729/1562 [00:01<00:01, 651.24it/s]

too big: When then will there be an end?

XLII. Let the several deaths of men of all sorts, and of all sorts of
professions, and of all sort of nations, be a perpetual object of thy
thoughts,... so that thou mayst even come down to Philistio, Phœbus,
and Origanion. Pass now to other generations. Thither shall we after
many changes, where so many brave orators are; where so many grave
philosophers; Heraclitus, Pythagoras, Socrates. Where so many heroes of
the old times; and then so many brave captains of the latter times; and
so many kings. After all these, where Eudoxus, Hipparchus, Archimedes;
where so many other sharp, generous, industrious, subtile, peremptory
dispositions; and among others, even they, that have been the greatest
scoffers and deriders of the frailty and brevity of this our human life;
as Menippus, and others, as many as there have been such as he. Of all
these consider, that they long since are all dead, and gone. And what do
they suffer by it! Nay they that have no

 56%|█████▌    | 867/1562 [00:01<00:01, 586.80it/s]

too big: VI. Every particular nature hath content, when in its own proper course
it speeds. A reasonable nature doth then speed, when first in matter of
fancies and imaginations, it gives no consent to that which is either
false uncertain. Secondly, when in all its motions and resolutions it
takes its level at the common good only, and that it desireth nothing,
and flieth from nothing, bet what is in its own power to compass or
avoid. And lastly, when it willingly and gladly embraceth, whatsoever is
dealt and appointed unto it by the common nature. For it is part of it;
even as the nature of any one leaf, is part of the common nature of all
plants and trees. But that the nature of a leaf, is part of a nature
both unreasonable and unsensible, and which in its proper end may be
hindered; or, which is servile and slavish: whereas the nature of man is
part of a common nature which cannot be hindered, and which is both
reasonable and just. From whence also it is, that according to the
worth

 59%|█████▉    | 928/1562 [00:01<00:01, 531.38it/s]

too big: XXIX. From some high place as it were to look down, and to behold
here flocks, and there sacrifices, without number; and all kind of
navigation; some in a rough and stormy sea, and some in a calm: the
general differences, or different estates of things, some, that are now
first upon being; the several and mutual relations of those things that
are together; and some other things that are at their last. Their lives
also, who were long ago, and theirs who shall be hereafter, and the
present estate and life of those many nations of barbarians that are
now in the world, thou must likewise consider in thy mind. And how many
there be, who never so much as heard of thy name, how many that will
soon forget it; how many who but even now did commend thee, within a
very little while perchance will speak ill of thee. So that neither
fame, nor honour, nor anything else that this world doth afford, is
worth the while. The sum then of all; whatsoever doth happen unto thee,
whereof God is the 

 66%|██████▋   | 1038/1562 [00:02<00:01, 508.81it/s]

too big: VII. All parts of the world, (all things I mean that are contained
within the whole world), must of necessity at some time or other come to
corruption. Alteration I should say, to speak truly and properly; but
that I may be the better understood, I am content at this time to use
that more common word. Now say I, if so be that this be both hurtful
unto them, and yet unavoidable, would not, thinkest thou, the whole
itself be in a sweet case, all the parts of it being subject to
alteration, yea and by their making itself fitted for corruption, as
consisting of things different and contrary? And did nature then either
of herself thus project and purpose the affliction and misery of her
parts, and therefore of purpose so made them, not only that haply they
might, but of necessity that they should fall into evil; or did not she
know what she did, when she made them? For either of these two to say,
is equally absurd. But to let pass nature in general, and to reason of
things particul

 76%|███████▌  | 1180/1562 [00:02<00:00, 392.40it/s]

too big: XII. Will any contemn me? let him look to that, upon what grounds he
does it: my care shall be that I may never be found either doing or
speaking anything that doth truly deserve contempt. Will any hate me?
let him look to that. I for my part will be kind and loving unto all,
and even unto him that hates me, whom-soever he be, will I be ready to
show his error, not by way of exprobation or ostentation of my patience,
but ingenuously and meekly: such as was that famous Phocion, if so be
that he did not dissemble. For it is inwardly that these things must be:
that the Gods who look inwardly, and not upon the outward appearance,
may behold a man truly free from all indignation and grief. For what
hurt can it be unto thee whatsoever any man else doth, as long as thou
mayest do that which is proper and suitable to thine own nature? Wilt
not thou (a man wholly appointed to be both what, and as the common good
shall require) accept of that which is now seasonable to the nature
of the

 82%|████████▏ | 1278/1562 [00:02<00:00, 433.14it/s]

too big: XVII. No operation whatsoever it he, ceasing for a while, can be truly
said to suffer any evil, because it is at an end. Neither can he that
is the author of that operation; for this very respect, because his
operation is at an end, be said to suffer any evil. Likewise then,
neither can the whole body of all our actions (which is our life) if in
time it cease, be said to suffer any evil for this very reason, because
it is at an end; nor he truly be said to have been ill affected, that
did put a period to this series of actions. Now this time or certain
period, depends of the determination of nature: sometimes of particular
nature, as when a man dieth old; but of nature in general, however; the
parts whereof thus changing one after another, the whole world still
continues fresh and new. Now that is ever best and most seasonable,
which is for the good of the whole. Thus it appears that death of
itself can neither be hurtful to any in particular, because it is not a
shameful thin

 89%|████████▊ | 1384/1562 [00:02<00:00, 480.93it/s]

too big: 'MY DEAREST MASTER,--I am well. To-day I studied from the ninth hour of
the night to the second hour of day, after taking food. I then put on
my slippers, and from time second to the third hour had a most
enjoyable walk up and down before my chamber. Then booted and
cloaked-for so we were commanded to appear-I went to wait upon my lord
the emperor. We went a-hunting, did doughty deeds, heard a rumour that
boars had been caught, but there was nothing to see. However, we
climbed a pretty steep hill, and in the afternoon returned home. I went
straight to my books. Off with the boots, down with the cloak; I spent
a couple of hours in bed. I read Cato's speech on the Property of
Pulchra, and another in which he impeaches a tribune. Ho, ho! I hear
you cry to your man, Off with you as fast as you can, and bring me
these speeches from the library of Apollo. No use to send: I have those
books with me too. You must get round the Tiberian librarian; you will
have to spend something on th

100%|██████████| 1562/1562 [00:03<00:00, 495.53it/s]

too big: Creating the works from print editions not protected by U.S. copyright
law means that no one owns a United States copyright in these works,
so the Foundation (and you!) can copy and distribute it in the United
States without permission and without paying copyright
royalties. Special rules, set forth in the General Terms of Use part
of this license, apply to copying and distributing Project
Gutenberg™ electronic works to protect the PROJECT GUTENBERG™
concept and trademark. Project Gutenberg is a registered trademark,
and may not be used if you charge for an eBook, except by following
the terms of the trademark license, including paying royalties for use
of the Project Gutenberg trademark. If you do not charge anything for
copies of this eBook, complying with the trademark license is very
easy. You may use this eBook for nearly any purpose such as creation
of derivative works, reports, performances and research. Project
Gutenberg eBooks may be modified and printed and given awa




In [49]:
# query the book
q = "what is virtue?"
q_encoded = makeEmbedding([q])[0].tolist()
CMD = "select text_chunk from items order by embedding <=> '%s' limit 5"
results = None
with psycopg2.connect(
    host=HOST,
    database=DATABASE,
    user=USER,
    password=PASSWORD
  ) as connection:
    with connection.cursor() as cursor:
      cursor.execute(CMD % q_encoded)
      results = cursor.fetchall()

for result in results:
  print(result[0])
  print("---")


Batches: 100%|██████████| 1/1 [00:00<00:00, 17.31it/s]

 which of all the virtues is the proper
virtue for this present use
---
 For each fault in others, Nature (says
he) has given us a counteracting virtue
---
XVI. Under, above, and about, are the motions of the elements; but
the motion of virtue, is none of those motions, but is somewhat more
excellent and divine. Whose way (to speed and prosper in it) must be
through a way, that is not easily comprehended
---
XIV. As virtue and wickedness consist not in passion, but in action; so
neither doth the true good or evil of a reasonable charitable man
consist in passion, but in operation and action
---
' The virtue of suffering in itself is an idea which does
not meet us in the _Meditations_
---



