In [1]:
import csv
import os
from fpdf import FPDF
import textwrap
import re

def smart_wrap(text, width=100):
    return '\n'.join(textwrap.wrap(force_break_long_words(text), width=width, break_long_words=True))

def force_break_long_words(text, max_word_length=40, break_every=20):
    def insert_breaks(word):
        if len(word) <= max_word_length:
            return word
        parts = [word[i:i+break_every] for i in range(0, len(word), break_every)]
        return '\u00AD'.join(parts)  # soft hyphen inserted every break_every chars

    # Replace all super long non-space sequences
    return re.sub(r'\S+', lambda m: insert_breaks(m.group(0)), text)

def csvs_to_pdf(folder_path, relation_templates, default_template, output_pdf_path):
    sentences = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            filepath = os.path.join(folder_path, filename)
            with open(filepath, mode='r', encoding='utf-8') as csvfile:
                reader = csv.reader(csvfile)
                next(reader)  
                for row in reader:
                    subject, relation, obj = [x.strip().replace('_', ' ') for x in row]

                    # Handle special case: if it's a time_open relation and object is 'closed'
                    if relation.endswith("time open") and obj == "Closed":
                        day = relation.split("'")[0]  # Extract 'Monday' from "Monday's time open"
                        sentence = f"On {day}, {subject} is closed."
                    else:
                        # Use the normal template
                        template = relation_templates.get(relation, default_template)
                        sentence = template.format(subject=subject, relationship=relation, object=obj)

                    sentences.append(sentence)

    pdf = FPDF()
    pdf.add_page()
    pdf.add_font('NotoSans', '', 'NotoSans-Regular.ttf')
    pdf.set_font("NotoSans", size=11)

    for sentence in sentences:
        wrapped = smart_wrap(sentence)
        pdf.multi_cell(0, 10, wrapped)
        pdf.set_x(pdf.l_margin)
        pdf.ln(2)

    pdf.output(output_pdf_path)



In [2]:
relation_templates = {
    "main services are": "The main services of {subject} are {object}.",
    "other services are": "The other services of {subject} are {object}.",
    "phone number is": "The phone number of {subject} is {object}",
    "website is": "The website for {subject} is {object}.",
    "is located at": "{subject} is located at {object}.",
    "url is located at": "The Google Maps url for the location of {subject} is {object}.",
    "has an availability status of": "{subject} is {object}.",
    "description is": "{object}.",
    "speaks": "{subject} speaks {object}.",
    "costs": "The cost of {subject} is {object}.",
    "has Google Reviews": "It is {object} that {subject} has Google Reviews.",
    "covers": "{subject}: {object}",
    "zipcode is": "The zipcode of {subject} is {object}.",
    "24hours status is": "It is {object} that {subject} is open 24 hours.",
    "Monday's time open": "On Monday, {subject} is open from {object}.",
    "Tuesday's time open": "On Tuesday, {subject} is open from {object}.",
    "Wednesday's time open": "On Wednesday, {subject} is open from {object}.",
    "Thursday's time open": "On Thursday, {subject} is open from {object}.",
    "Friday's time open": "On Friday, {subject} is open from {object}.",
    "Saturday's time open": "On Saturday, {subject} is open from {object}.",
    "Sunday's time open": "On Sunday, {subject} is open from {object}.",
    "has a Google Rating of": "{subject} has a Google Rating of {object}.",
    "offers": "{subject} offers {object}.",
    "serves": "{subject} serves {object}.",
    "socials are": "The socials of {subject} are {object}.",
    "coordinates are": "The coordinates of {subject} are {object}.",
}

default_template = "{subject} has a relationship of type {relationship} with {object}."

csvs_to_pdf(
    folder_path="Leo/Riverside/Triples_CSVs",
    relation_templates=relation_templates,
    default_template=default_template,
    output_pdf_path="Riverside_Sentences.pdf"
)