In [3]:
from helpers import gemini_chunk, save_chunks_to_db
from langchain_core.documents import Document
import json
import uuid

metadata = {"category": "localplan", "lpa": "oxford_city_council"}


def read_file_in_sections(file_path, section_size=100):
    sections = []
    try:
        with open(file_path, "r") as file:
            while True:
                lines = []
                for _ in range(section_size):
                    line = file.readline()
                    if not line:
                        break
                    lines.append(line.strip())
                if not lines:
                    break
                sections.append("\n".join(lines))
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

    return sections


index = 0
# Example usage
file_path = "policy.txt"
sections = read_file_in_sections(file_path)
for i, section in enumerate(sections):
    docs = []

    json_chunk = gemini_chunk(section)
    print(json_chunk)
    chunks = json.loads(json_chunk)

    for chunk in chunks:
        local_metadata = dict(metadata)
        local_metadata["id"] = str(uuid.uuid4())
        local_metadata["index"] = index
        local_metadata["text"] = chunk["text"]
        local_metadata["policy"] = chunk["policy"]
        local_metadata["site"] = chunk["site"]
        local_metadata["sections"] = ">".join(chunk["headings"])

        print("---" * 30)

        print("Sections")
        print(local_metadata["sections"])

        print("Text")
        print(local_metadata["text"])

        print("---" * 30)

        doc = Document(
            page_content=local_metadata["text"],
            metadata=local_metadata,
        )

        docs.append(doc)

        index += 1

    save_chunks_to_db(docs)

[
  {
    "text": "This forward-thinking Local Plan aims to address the key issues facing Oxford’s residents and businesses. It endeavours to find the right balance to help us tackle the housing crisis and climate emergency, support our communities residents, ensure an attractive, resilient and healthy living and working environment, and to make Oxford’s successful economy work for all residents.",
    "headings": [
      "FOREWORD"
    ],
    "policy": false,
    "site": false
  },
  {
    "text": "The Local Plan sets out how we will tackle the existential threat of climate change. It will require all new homes and businesses in Oxford to be net zero-carbon and to operate without using fossil fuels. Resilience to climate important and policies require planting of trees and other green features, as well as flood-resistance measures such as sustainable drainage, in new developments. In addition, policies aim to preserve soil and air quality.",
    "headings": [
      "FOREWORD"
    ],
 



['b7c04caf-9621-4e39-8b24-d8ce23969592', 'c1f4bb98-cd5f-4f79-a236-f82f8fc68e66', '94f33752-5e25-4adb-9888-b68664804e51', 'b26717f3-584c-4ac5-be40-9562fa2c5d7b', '07dfb34b-db9f-4547-b951-9247fcdc590f', 'e28f8c67-e91f-4a51-a05b-28dadfb0c8a6', '5452020e-83d6-4aae-92b7-f2e0d892bff3', 'e28de1e5-229b-4d68-831f-28a9cf9fa5df', 'c0c4e58e-d9e2-4c08-beb6-ef7ce875750f', '08a8066c-c6a0-404c-a9eb-88748c980062', 'fe076fa8-519d-4f64-a8b6-d40cfb28ac4f', '6bbe49c0-374c-4e27-a34c-8a41b82811ea', '5efd79c3-5f72-41d1-8632-cc2a68681110', 'e016aaf5-3335-4c4a-af61-896b97908bb9', 'e8f3bb51-2eff-4a41-9a51-fa696a1a5002', '38518b59-5760-4409-bb7d-8ac36e587562', '1a4e38a2-124e-486d-9c1f-607fa768b5b4', '7a953c09-196c-4ee6-9ff1-2ebf5cf98d79', '0267c92f-903c-4ce2-a3cc-6da38eb073bd', 'a64dc327-0d61-4ecc-9e76-f04d0b7f7bbc', 'e340b57c-70bb-45c0-8cd9-5fab3c38c947', 'acaa9a05-9e60-43fd-aa9e-ecebcc5202b8', '614c2ec6-5a77-4bc1-b98a-642d468efc72', '8c95bf05-1f3b-4243-afb0-e48a73e0cddb', '5a8a9000-0baf-4012-9751-7099870b0ef3',



['d84536a5-8225-4f44-8dae-83b73d38f7f4', 'b9445b13-0750-4b5d-bf9d-51b98ff91d1d', 'f78286a0-7a33-4472-be09-10c1f7a8a2fd', '70d99c37-5bc6-4a4c-82bd-892dba9cefd6', '0fbe79e4-7aea-4a44-ad70-682b6fad213a', '6e2a5467-510c-4c34-b672-b5f117d82bd2', 'b47c8567-232b-41a4-ac22-148d8d1f3786', 'c5273154-d546-4ecd-b4ba-3694fab574e2', '64c0ec1e-f919-4b47-89e5-fc12cc4311e3', '474b3fd7-f190-4c03-999a-727c87dfdf92', '92308a59-d451-45c7-86a9-adae4a405cf4', 'bff34563-977b-4d95-868c-c8baec3222af', '4118f317-4aff-48d8-92c4-cadce8893982', '21f6055a-4930-4884-a9ca-1d3037dd92c2', 'bfba1fa8-bf97-4585-be54-8d35e1ae88da', '2dbaad43-0d05-4672-926d-419d111aad65', '8ef0b23c-713a-467d-944c-3647cc14e8e2', '84bd53f3-cd62-4200-9549-dc7d8ddcaf08', 'b67ebcd8-8eae-4f04-97dc-32ef1d141577', '6bee6a72-4e19-4a73-8dab-30496a7b21ab', '01ae5bf8-d23a-487f-a96d-93f450b42fd6', 'a688d998-01c3-4bb4-8111-576c718de592', '60906434-09a7-4cc1-b1a4-fa5ff82b1511', '5c4817fd-dd02-48a7-8f0c-793eb166efd5', 'b465de31-9714-47f2-a381-aad1de007b8e',

JSONDecodeError: Invalid control character at: line 3 column 493 (char 496)