In [16]:
from pymilvus import connections, Collection
from fpdf import FPDF
import re

# Check if the default connection exists and remove it
if connections.has_connection("default"):
    connections.remove_connection("default")

# Now, reconnect with your new configuration
connections.connect(alias="default", host="localhost", port="19530")

# Specify the collection name
collection_name = "text_collection"

# Retrieve text data from 'text_collection'
collection = Collection(name=collection_name)

# Retrieve all the text data where the text field is not empty
texts = collection.query(expr="text != ''", output_fields=["text"])

# Format the text data
formatted_text = "\n".join([f"{i+1}. {entry['text']}" for i, entry in enumerate(texts)])

# Create a PDF with the formatted text
# Create a PDF with the formatted text
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=10)

# Define a regular expression pattern for ASCII characters
ascii_pattern = re.compile(r'[\x00-\x7F]+')

# Define a function to replace sequences of more than 10 repeating characters with just 10 of them
def replace_repeating_characters(match):
    character = match.group(0)[0]
    return character * 5  # replace with 10 occurrences

for i, entry in enumerate(texts):
    # Filter out non-ASCII characters using the defined pattern
    ascii_text = ' '.join(ascii_pattern.findall(entry['text']))
    # Replace sequences of more than 10 repeating characters
    ascii_text = re.sub(r'(.)\1{10,}', replace_repeating_characters, ascii_text)

    # Write the cleaned text to the PDF
    pdf.multi_cell(0, 5, f"{i+1}. {ascii_text}")
    pdf.ln(2)  # Add a small line break after the text for spacing

    # Get the current Y position and draw a line
    y_after_text = pdf.get_y()
    pdf.line(10, y_after_text, 200, y_after_text)  # Draw a black line

    # Add another small line break after the black line for spacing
    pdf.ln(0.5)


# Save the PDF to the current directory
pdf_output_name = "milvus_text_data.pdf"
pdf.output(pdf_output_name)





# Output the path to the generated PDF
print(f"The PDF has been created at: {pdf_output_path}")


The PDF has been created at: milvus_text_data.pdf


In [2]:
pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25ldone
[?25h  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40702 sha256=0edd8a2b43fa440933eb52aca55806af9673b90fe2c166373e0ec88708a64615
  Stored in directory: /Users/garfieldgreglim/Library/Caches/pip/wheels/65/4f/66/bbda9866da446a72e206d6484cd97381cbc7859a7068541c36
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2
Note: you may need to restart the kernel to use updated packages.
