In [1]:
import uuid

from db.DocumentEntry import DocumentEntry
from db.DocumentRepository import DocumentRepository
from datetime import datetime, timedelta

Let's see if the DocumentRepository works by testing the API.

Let's first provide the data we want to save and load

In [2]:
documentRepository = DocumentRepository()

# Get the current local date and time
now = datetime.now()

# Format the date and time as a string
formatted_date_time = now.strftime("%Y-%m-%d %H:%M:%S")

entry1 = DocumentEntry("url1", ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5", "keyword6", "keyword7"], "This is a nice content page", datetime.now(), uuid.uuid4())
entry2 = DocumentEntry("url2", [], "content2", now - timedelta(days=1))

documentsToSave = [entry1, entry2]

SC: Connected to the db. Now you can go and build the best search engine around!


Let's first clear the db

In [3]:
documentRepository.deleteAllDocuments()
allDocuments = documentRepository.loadAllDocuments()
allDocuments

SC: Deleted all documents.


[]

How does it look after we save 2 elements?

In [4]:
documentRepository.saveAllDocuments(documentsToSave)
allDocuments = documentRepository.loadAllDocuments()
print(allDocuments)
print(len(allDocuments))

SC: All documents saved.
[DocumentEntry[id=654d05ef-1..., url=url1, keywords=['keyword1', 'keyword2', ...], content=This is a ..., last_updated=2024-06-30 19:24:22.613541], DocumentEntry[id=b78be371-e..., url=url2, keywords=[], content=content2, last_updated=2024-06-29 19:24:22.613375]]
2


Are the types correct?

In [5]:
print(type(allDocuments))
print(type(allDocuments[0]))

<class 'list'>
<class 'db.DocumentEntry.DocumentEntry'>


The first should be "list", the second "db.DocumentEntry.DocumentEntry".

Let's add one more element

In [6]:
entry3 = DocumentEntry("url3", ["keyword5"], "content3", datetime.now() - timedelta(days=2), uuid.uuid4())
documentRepository.saveDocument(entry3)
allDocuments = documentRepository.loadAllDocuments()
allDocuments

SC: Saved document.


[DocumentEntry[id=654d05ef-1..., url=url1, keywords=['keyword1', 'keyword2', ...], content=This is a ..., last_updated=2024-06-30 19:24:22.613541],
 DocumentEntry[id=b78be371-e..., url=url2, keywords=[], content=content2, last_updated=2024-06-29 19:24:22.613375],
 DocumentEntry[id=abf51256-5..., url=url3, keywords=['keyword5'], content=content3, last_updated=2024-06-28 19:24:22.653040]]

Lets update this element

In [7]:
entry4 = DocumentEntry(entry3.url, ["keyword5", "keyword6"], entry3.content, entry3.last_updated, entry3.id)  # added keyword6
documentRepository.updateDocument(entry4)
allDocuments = documentRepository.loadAllDocuments()
allDocuments

SC: Updated document.


[DocumentEntry[id=654d05ef-1..., url=url1, keywords=['keyword1', 'keyword2', ...], content=This is a ..., last_updated=2024-06-30 19:24:22.613541],
 DocumentEntry[id=b78be371-e..., url=url2, keywords=[], content=content2, last_updated=2024-06-29 19:24:22.613375],
 DocumentEntry[id=abf51256-5..., url=url3, keywords=['keyword5', 'keyword6'], content=content3, last_updated=2024-06-28 19:24:22.653040]]

Lets delete this elemente again

In [8]:
documentRepository.deleteDocument(entry4)
allDocuments = documentRepository.loadAllDocuments()
allDocuments

SC: Deleted document.


[DocumentEntry[id=654d05ef-1..., url=url1, keywords=['keyword1', 'keyword2', ...], content=This is a ..., last_updated=2024-06-30 19:24:22.613541],
 DocumentEntry[id=b78be371-e..., url=url2, keywords=[], content=content2, last_updated=2024-06-29 19:24:22.613375]]

Do we have 0 elements when we decide to clear the index?

In [9]:
documentRepository.deleteAllDocuments()
allDocuments = documentRepository.loadAllDocuments()
allDocuments

SC: Deleted all documents.


[]

When you get "[]", everything was right.