In [5]:
import pandas as pd
import spacy
import os
import glob
import json

In [30]:
# Go back one step in the folder hierarchy
parent_folder_path = os.path.dirname(os.getcwd())

# Define the path to the "transcription" folder
transcription_folder_path = os.path.join(parent_folder_path, "transcriptions")

# Get a list of all text files in the "transcription" folder
text_files = glob.glob(os.path.join(transcription_folder_path, "*.txt"))
text_files = sorted(text_files)

# Load text files
texts = [open(file, 'r', encoding='utf-8').read() for file in text_files]


In [31]:
text_files

['/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_1.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_10.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_2.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_3.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_4.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_5.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_6.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_7.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_8.txt',
 '/Users/peachapongpoolpol/Digitisation College/wolf_haus/transcriptions/conversations_9.txt']

In [32]:
import spacy

# Load SpaCy model
nlp = spacy.load("de_core_news_sm")

# Define word numbers
word_numbers = ["ein", "eine", "einen", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn"]

# Preprocess texts
def preprocess(text):
    doc = nlp(text.lower())
    tokens = []
    for token in doc:
        if token.is_punct:  # Skip punctuation
            continue
        if token.is_digit or token.text in word_numbers:  # Preserve numbers and word numbers
            tokens.append(token.text)
        elif token.is_alpha and not token.is_stop:  # Lemmatize other alphabetic tokens
            tokens.append(token.lemma_)
    return " ".join(tokens)

In [11]:
ROOM_KEYWORDS = [
    "Schlafzimmer", "Küche", "Wohnzimmer", "Badezimmer", "Arbeitszimmer", "Gästezimmer", "Esszimmer", "Kinderzimmer", 
    "Hauptschlafzimmer", "Speisekammer", "Fitnessraum", "Waschraum", "Keller", "Dachboden", 
    "Garage", "Lagerraum", "Spielzimmer", "Heimkino", "Büro", "Gartenhaus", "Hobbyraum", "Wintergarten", 
    "Empfangsraum", "Bibliothek", "Abstellraum", "Medienraum", "Werkstatt", "Technikraum", "Galerie", "Schrankraum", 
    "Kinderbereich", "Gäste-WC", "Dusche", "Umkleideraum", "Weinkeller", "Entspannungsraum", "Lesezimmer", "Pflegezimmer", 
    "Veranda", "Begehbarer Kleiderschrank", "Hauptbadezimmer", "Spielraum", "Offener Wohnbereich", 
    "Doppelgarage", "Multifunktionsraum", "Besprechungsraum", "Pförtnerloge", 
    "Außenküche", "Atelier", "Leseecke", "Arbeitsfläche", "Aufenthaltsraum", "Lounge", "Veranstaltungsraum", 
    "Cafeteria", "Pavillon", "Behandlungsraum", "Eingangsbereich", "Meditationsraum", "Bastelraum", 
    "Fitnessstudio", "Aufnahmeraum", "Serverraum", "Klassenraum", "Lagerfläche", "Übergangsbereich", "Heizungsraum", 
    "Labor", "Küchenzeile", "Arbeitsinsel", "Speiseraum", "Pausenraum", "Schlafsaal", "Ruheraum", "Tanzstudio", 
    "Empfangsbereich", "Arbeitsstation", "Werkraum", "Außenraum", "Gruppenraum", "Kunststudio", "Rezeption", 
    "Konferenzraum", "Bewegungsraum", "Arbeitsplatz"
]


SPECIAL_FEATURES_KEYWORDS = [
    "Whirlpool", "Pool", "Sauna", "Terrasse", "Balkon", "Garten", "Feuerstelle", "Kamin", "Fitnessraum", "Heimkino", 
    "Weinkeller", "Außenküche", "Wintergarten", "Veranda", "Bibliothek", "Gewächshaus", "Sichtfenster", "Skylights", 
    "Jacuzzi", "Grillplatz", "Spielplatz", "Trampolin", "Automatisierung", "Smart-Home-System", "Schiebedach", 
    "Lichtkuppel", "Kletterwand", "Fischteich", "Vogelhaus", "Wasserfall", "Kinderspielplatz", "Baumhaus", 
    "Fahrradständer", "Solarpaneele", "Ladestation für Elektroautos", "Whiskeyzimmer", "Billardraum", "Partykeller", 
    "Tanzfläche", "Karaokeraum", "Yoga-Studio", "Beobachtungsdeck", "Klettergerüst", "Boccia-Bahn", "Bowlingbahn", 
    "Tennisplatz", "Golfplatz", "Laufband", "Wellnessbereich", "Behandlungsraum", "Trainingsfläche", 
    "Privater Strand", "Strandkorb", "Indoor-Garten", "Bilderrahmenwand", "Leseecke", 
    "Beamerraum", "Poolhaus", "Offener Kamin", "Naturteich", "Winterlandschaft", "Weinlager", "Gartenschuppen", 
    "Außenfeuerstelle", "Glaswand", "Eingebauter Grill", "Panoramablick", "Wasserfontäne", "Außenbeleuchtung", 
    "Gartenteich", "Kinderrutsche", "Baumhaus", "Außenlounge", "Liegefläche", "Freiluftdusche", "Fass-Sauna", 
    "Natursteinmauer", "Kneippbecken", "Indoor-Pool", "Spielgeräte", "Gartenmöbel", "Pergola", "Hängematte", 
    "Kletterpflanzen", "Kräutergarten", "Ziergarten", "Vogeltränke", "Swimmingpool", "Flur", "Treppenhaus", "Dachterrasse", 
    "Haupthalle", "Bar", "Außenkamin", "Fenster", "Energiesystem", "Komposttoilette", "Regenwassernutzung", "solarbetriebene Heizung",
    "Spielbereich", "Solarheizung", "Mahlzeiten im Freien", "Beleuchtung", "Energiemonitoring-Systeme", "Solarpaneele",
    "Dämmung","Holzbalken"
]

DESIGN_STYLE_KEYWORDS = [
    "modern", "rustikal", "minimalistisch", "skandinavisch", "industriell", "klassisch", "traditionell", 
    "zeitgenössisch", "barock", "boho", "luxuriös", "gemütlich", "elegant", "natürlich", "alpin", "ländlich", 
    "antik", "viktorianisch", "kolonialstil", "chalet-stil", "mediterran", "orientalisch", "japanisch", 
    "mid-century modern", "art déco", "futuristisch", "maritim", "loft", "eklektisch", "griechisch", 
    "georgianisch", "kubistisch", "tropisch", "industrial chic", "französisch", "marokkanisch", "scandi-chic", 
    "california style", "landhausstil", "natural chic", "urban modern", "contemporary classic", "retro", 
    "neo-gothisch", "avantgarde", "provencial", "veranda style", "ethnisch", "postmodern", "high-tech", 
    "new-classic", "zen", "country-modern", "kanadisch", "alpenstil", "bohemian", "vintage", "pariser chic", 
    "gartenstil", "blockhausstil", "asiatisch", "coastal chic", "grünraumdesign", "kulturstil", "bali-stil", 
    "italienisch", "australisch", "neoklassisch", "seemannsstil", "alhambra-stil", "organisch", "cottagecore", 
    "hacienda-stil", "norwegisch", "schwedisch", "irisch", "holländisch", "luxury loft", 
    "minimalistisches", "linien", "raumnutzung","elementen","holzelementen", "rustikales", "selbstversorgung",
    "zeitgenössisch", "ausstattungen","konzentrieren","Naturholz"
]

MATERIALS_KEYWORDS = [
    "Holz", "Beton", "Stahl", "Glas", "Ziegel", "Marmor", "Granit", "Kunststoff", "Kupfer", "Aluminium", 
    "Keramik", "Fliesen", "Schiefer", "Naturstein", "Laminat", "PVC", "Lehm", "Bambus", "Eichenholz", 
    "Kiefernholz", "Recyclingmaterialien", "Verbundwerkstoffe", "Carbonfaser", "Fertigbeton", "Kalksandstein", 
    "Faserzement", "Sperrholz", "Edelstahl", "Weichholz", "Hartholz", "Metall", "Kunstharz", "Terrakotta", 
    "Zement", "Dämmstoffe", "Gipskarton", "Porenbeton", "Wellblech", "Furnier", "Gusseisen", "Polycarbonat", 
    "Akustikplatten", "Isolierglas", "Solarzellen", "Quarzit", "Travertin", "Kork", "Basalt", "Schlackestein", 
    "Polymere", "Asphalt", "Dachziegel", "Stroh", "Zementestrich", "Tonziegel", "Eisen", "Textilbeton", 
    "Filz", "Vinyl", "Bitumen", "Styropor", "Glasfaser", "Elastomer", "Papier", "Naturfasern", "Leder", 
    "Biokomposite", "Pressholz", "Epoxidharz", "Hanf", "Jute", "Seide", "Schieferplatten", "Ton", "Blech", 
    "Titan", "Gummimaterialien", "Glaswolle", "Perlite", "Bimsstein", "Polyethylen", "Kunstleder", 
    "Kunstmarmor", "Schweißdraht", "Holzwerkstoffplatten", "Styrodur", "Lichtbeton", "Pigmentierte Farben", 
    "Keramische Fliesen", "Naturmaterialien", "Säulenstein", "Architektenbeton", "Leichtbeton", "Glasmaterialien",
    "Premium-Holz", "Dämmung", "Langlebige", "umweltfreundliche", "Hochwertige", "nachhaltige",
    "Energieeffiziente", "Winterklima"
]

BUDGET_KEYWORDS = [
    "Euro", "Kosten", "Budget", "Preis", "Finanzierung", "Baukosten", "Projektkosten", "Schätzung", 
    "Investition", "Kostenrahmen", "Kostenvoranschlag", "Bausumme", "Endsumme", "Geldmittel", "Ausgaben", 
    "Aufwand", "Eigenkapital", "Gesamtbudget", "Planungskosten", "Baukredit", "Rohbaukosten", "Materialkosten", 
    "Lohnkosten", "Baufinanzierung", "Kreditrahmen", "Bauzinsen", "Gebühren", "Fördermittel", "Subventionen", 
    "Monatsraten", "Baukreditkosten", "Versicherungskosten", "Rücklagen", "Anzahlung", "Teilsumme", 
    "Gesamtaufwand", "Projektfinanzierung", "Bauabgaben", "Zuschuss", "Abschreibungskosten", "Umsatzsteuer", 
    "Fixkosten", "Vertragssumme", "Abrechnungsbetrag", "Voranschlag", "Eigenleistungskosten", "Planungspuffer", 
    "Projektschätzung", "Baurechnung", "Vorauszahlung", "Steuerkosten", "Gebäudewert", "Schuldendienst", 
    "Sonderkosten", "Kalkulation", "Kreditkosten", "Leasingrate", "Bauzuschuss", "Ersparnisse", "Kapitalbedarf", 
    "Finanzierungsplan", "Abzahlungsplan", "Kostenschätzung", "Projektmittel", "Tilgungsplan", "Hypothek", 
    "Baukostenanalyse", "Kreditbewilligung", "Eigenkapitalquote", "Mittelzuweisung", "Projektfonds", 
    "Rückzahlungsbetrag", "Gewährleistungskosten", "Baukostenübersicht", "Abrechnungsplan", "Schuldzins", 
    "Hauswert", "Bauabsicherung", "Tilgungsbeitrag", "Baukreditangebot", "Bauversicherungsbeitrag", 
    "Baubuchhaltung", "Baumittel", "Zinskosten", "Nebenkosten", "Baudarlehen", "Gesamtausgaben", 
    "Zinsaufwand", "Baupreis", "Materialpreis", "Honorarkosten", "Vertragskosten", "Vertragswert"
]

TIMELINE_KEYWORDS = [
    "Monate", "Jahre", "Wochen", "Zeitrahmen", "Fertigstellung", "Abschluss", "Projektzeit", "Bauzeit", 
    "Termine", "Planungsphase", "Zeitplan", "Meilensteine", "Lieferdatum", "Bauabschluss", "Abgabetermin", 
    "Fertigstellungsdatum", "Frist", "Baukalender", "Projektkalender", "Projektfristen", "Starttermin", 
    "Bauprojektzeit", "Projektstart", "Zieltermin", "Abschlusstermin", "Zeiteinschätzung", "Baufristen", 
    "Projektdauer", "Umsetzungszeit", "Auftragsdauer", "Bearbeitungszeit", "Planungstermine", "Ausführungsphase", 
    "Konstruktionszeit", "Baufertigstellung", "Abnahmefrist", "Lieferfristen", "Verzögerungen", "Endzeit", 
    "Baulaufzeit", "Projektstartdatum", "Projektenddatum", "Zeitabschätzung", "Arbeitszeit", "Projektzyklus", 
    "Bauetappen", "Bauphasen", "Bauverzug", "Projektetappen", "Durchlaufzeit", "Terminvorgabe", "Fertigstellungsfristen", 
    "Fertigstellungsphase", "Zeitschiene", "Bauprojektfristen", "Projektfortschritt", "Vorlaufzeit", 
    "Termineinhaltung", "Abschlusstermine", "Zeithorizont", "Startzeitpunkt", "Endzeitpunkt", "Tagesplan", 
    "Wochenplan", "Zeitmanagement", "Zeitpuffer", "Projektverlauf", "Projektzeitachse", "Bauvorlauf", 
    "Projektverzögerung", "Fertigstellungsdauer", "Projektdeadline", "Fristverlängerung", "Bauende", 
    "Projektende", "Bauabnahme", "Zeitfenster", "Projektmeilensteine", "Projektaufgaben", "Bauplanung", 
    "Lieferzeiträume", "Bautermin", "Zeitvorgaben", "Verzugszeit", "Projektplan", "Zeitrahmenplanung"
]

ADDITIONAL_NOTES_KEYWORDS = [
    "funktional", "nachhaltig", "energieeffizient", "familienfreundlich", "barrierefrei", "umweltfreundlich", 
    "sicher", "modular", "flexibel", "wetterbeständig", "energieautark", "anpassungsfähig", "pflegeleicht", 
    "wartungsarm", "hochwertig", "komfortabel", "innovativ", "raumoptimiert", "modernisiert", "intelligent", 
    "wartungsfreundlich", "ökologisch", "stabil", "langfristig", "ästhetisch", "luxuriös", "minimalistisch", 
    "schalldicht", "lichtdurchflutet", "pflegearm", "zeitgemäß", "belüftet", "hitzeresistent", "schallisoliert", 
    "rauchfrei", "grün", "sicherheitsoptimiert", "praktisch", "stilvoll", "familiengerecht", "einfallsreich", 
    "robust", "widerstandsfähig", "technologisch", "inspirierend", "benutzerfreundlich", "kostenbewusst", 
    "platzsparend", "geräuscharm", "intelligent", "belastbar", "energiesparend", "modular", "hochwertig", 
    "nachhaltigkeit", "autark", "modernisierung", "effizient", "individuell", "umweltgerecht", "pflegeleicht", 
    "langlebig", "naturnah", "gesundheitlich", "vorausschauend", "klimagerecht", "resilient", "raumoptimierung", 
    "innovativer ansatz", "freundlich", "geschmackvoll", "erweiterbar", "harmonisch", "zeitlos", "praktikabel", 
    "schadstoffarm", "dynamisch", "zukunftssicher", "resourcenschonend", "multifunktional", "adaptiv", 
    "wohnfreundlich", "ergonomisch", "modernste technik", "smart", "resistent", "wohnlich"
]

In [12]:
keywords_dict = {
    "ROOM_KEYWORDS" : ROOM_KEYWORDS,
    "SPECIAL_FEATURES_KEYWORDS" : SPECIAL_FEATURES_KEYWORDS,
    "DESIGN_STYLE_KEYWORDS" : DESIGN_STYLE_KEYWORDS,
    "MATERIALS_KEYWORDS" : MATERIALS_KEYWORDS,
    "BUDGET_KEYWORDS" : BUDGET_KEYWORDS,
    "TIMELINE_KEYWORDS" : TIMELINE_KEYWORDS,
    "ADDITIONAL_NOTES_KEYWORDS" : ADDITIONAL_NOTES_KEYWORDS
}

In [13]:
import os
import json

# Define the folder path
keywords_folder = os.path.join(os.getcwd(), "keywords")

# Create the folder if it doesn't exist
os.makedirs(keywords_folder, exist_ok=True)

# Define the file path for saving the dictionary
keywords_file_path = os.path.join(keywords_folder, "keywords_dict.json")

# Save the dictionary as a JSON file
with open(keywords_file_path, "w", encoding="utf-8") as json_file:
    json.dump(keywords_dict, json_file, ensure_ascii=False, indent=4)

print(f"keywords_dict has been saved in: {keywords_file_path}")

keywords_dict has been saved in: /Users/peachapongpoolpol/Digitisation College/wolf_haus/model/keywords/keywords_dict.json


In [33]:

# Define the file path
keywords_file_path = os.path.join(os.getcwd(), "keywords", "keywords_dict.json")

# Load the JSON file into keywords_dict
with open(keywords_file_path, "r", encoding="utf-8") as json_file:
    keywords_dict = json.load(json_file)

print("keywords_dict has been successfully loaded.")

keywords_dict has been successfully loaded.


In [15]:
keywords_dict

{'ROOM_KEYWORDS': ['Schlafzimmer',
  'Küche',
  'Wohnzimmer',
  'Badezimmer',
  'Arbeitszimmer',
  'Gästezimmer',
  'Esszimmer',
  'Kinderzimmer',
  'Hauptschlafzimmer',
  'Speisekammer',
  'Fitnessraum',
  'Waschraum',
  'Keller',
  'Dachboden',
  'Garage',
  'Lagerraum',
  'Spielzimmer',
  'Heimkino',
  'Büro',
  'Gartenhaus',
  'Hobbyraum',
  'Wintergarten',
  'Empfangsraum',
  'Bibliothek',
  'Abstellraum',
  'Medienraum',
  'Werkstatt',
  'Technikraum',
  'Galerie',
  'Schrankraum',
  'Kinderbereich',
  'Gäste-WC',
  'Dusche',
  'Umkleideraum',
  'Weinkeller',
  'Entspannungsraum',
  'Lesezimmer',
  'Pflegezimmer',
  'Veranda',
  'Begehbarer Kleiderschrank',
  'Hauptbadezimmer',
  'Spielraum',
  'Offener Wohnbereich',
  'Doppelgarage',
  'Multifunktionsraum',
  'Besprechungsraum',
  'Pförtnerloge',
  'Außenküche',
  'Atelier',
  'Leseecke',
  'Arbeitsfläche',
  'Aufenthaltsraum',
  'Lounge',
  'Veranstaltungsraum',
  'Cafeteria',
  'Pavillon',
  'Behandlungsraum',
  'Eingangsberei

In [254]:
# processed_keywords_dict = {
#     key: [preprocess(text) for text in value_list]
#     for key, value_list in keywords_dict.items()
# }

In [16]:
import pandas as pd
import re

# Function to extract data with adjectives
def extract_details_german_with_adjectives(text):
    details = {
        "Project description": "",
        "Rooms": "",
        "Special features": "",
        "Design style": "",
        "Materials": "",
        "Budget": "",
        "Timeline": "",
        #"Additional Notes": ""
    }
    
    # Parse the text
    details["Project description"] = text.split('\n')[0].strip()
    
    #----------------------#
    # Define the list of word-numbers
    word_numbers = ["ein", "eine", "einen", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn"]
    number_pattern = rf"(\b(?:\d+|{'|'.join(word_numbers)})\b)"  # Match numeric or word-numbers

    # Room keywords pattern
    room_keywords = "|".join(keywords_dict["ROOM_KEYWORDS"])

    # Regex to match rooms with number patterns and adjectives
    rooms_pattern = rf"{number_pattern}((?:\s+\b[\wäöüß]+\b)+)?\s+({room_keywords})"

    # Extract matches
    rooms_matches = re.findall(rooms_pattern, text, re.IGNORECASE)

    # Format the results
    details["Rooms"] = ", ".join([
        f"{num or ''} {adj.strip()} {room}".strip()
        for num, adj, room in rooms_matches
    ])
    #replace " und" with "," in the string
    details["Rooms"] = details["Rooms"].replace(" und", ",")        

    #-------------------------#
    
    # Special Features with adjectives
    features_keywords = "|".join(keywords_dict["SPECIAL_FEATURES_KEYWORDS"])  # Join keywords into a regex pattern
    features_pattern = rf"(\b[\wäöüß]+\b)?\s*({features_keywords})"  # Use rf-string for dynamic pattern creation
    features_matches = re.findall(features_pattern, text, re.IGNORECASE)

    # Format the matched features
    details["Special features"] = ", ".join([
        f"{adj or ''} {feature}".strip() for adj, feature in features_matches if feature
    ])
 
    #-------------------------# 
    # Design Style with adjectives
    style_keywords = "|".join(keywords_dict["DESIGN_STYLE_KEYWORDS"])  # Join keywords into a regex pattern
    style_pattern = rf"(\b[\wäöüß]+\b)?\s*({style_keywords})"  # Match an optional word (adjective) before the keyword
    style_matches = re.findall(style_pattern, text, re.IGNORECASE)

    # Format the matched design styles
    details["Design style"] = ", ".join([
        f"{adj or ''} {style}".strip() for adj, style in style_matches if style
    ])

    #-------------------------# 
    # Materials with adjectives
    material_keywords = "|".join(keywords_dict["MATERIALS_KEYWORDS"])  # Join keywords into a regex pattern
    material_pattern = rf"(\b[\wäöüß]+\b)?\s*({material_keywords})"  # Match an optional word (adjective) before the keyword
    material_matches = re.findall(material_pattern, text, re.IGNORECASE)

    # Format the matched materials
    details["Materials"] = ", ".join([
        f"{adj or ''} {material}".strip() for adj, material in material_matches if material
    ])
        
    #-------------------------#
    # Budget
    budget_pattern = r"([\d\s.,]+(?:€|Euro))"
    budget_match = re.search(budget_pattern, text, re.IGNORECASE)
    #budget_match = re.search(r"(\d+(?:,\d+)?(?: €| Euro))", text, re.IGNORECASE)
    details["Budget"] = budget_match.group(0) if budget_match else ""
    
    #-------------------------#
    # Timeline
    timeline_pattern = r"(\d+ (?:Monat(?:en|e)?|Jahr(?:en|e)?|Woche(?:n)?))"
    timeline_match = re.search(timeline_pattern, text, re.IGNORECASE)
    details["Timeline"] = timeline_match.group(0) if timeline_match else ""
    
    #-------------------------#
    
    # Define lists of articles and modal verbs
    articles = ["die", "das", "der", "ein", "eine", "einen", "einem", "einer", "den", "dem"]
    modal_verbs = ["sollte", "könnte", "wollte", "dürfte", "möchte", "müssen", "muss", "kann", "können", "sollen", "wollen", "dürfen"]

    # Compile regex patterns for articles and modal verbs
    articles_pattern = rf"\b(?:{'|'.join(articles)})\b"
    modal_verbs_pattern = rf"\b(?:{'|'.join(modal_verbs)})\b"

    # Remove articles and modal verbs
    def clean_text(text):
        text = re.sub(articles_pattern, "", text, flags=re.IGNORECASE)  # Remove articles
        text = re.sub(modal_verbs_pattern, "", text, flags=re.IGNORECASE)  # Remove modal verbs
        return re.sub(r"\s+", " ", text).strip()  # Normalize spaces

    # Apply cleanup to Design style
    details["Design style"] = clean_text(details["Design style"].replace(" und", ","))
    details["Materials"] = clean_text(details["Materials"].replace(" und", ","))

    return details



In [17]:
texts[3]

'Hallo! Wir planen ein Holz-Familienhaus, das sowohl funktional als auch umweltfreundlich ist. Da wir eine große Familie sind, brauchen wir mindestens fünf Schlafzimmer, einen geräumigen Wohnbereich und eine große Küche für Familientreffen. Ich hätte auch gerne ein Spielzimmer für die Kinder und einen Arbeitsplatz für mich.\n\nEs ist wichtig, dass das Haus nachhaltige Merkmale wie Solarpaneele, Regenwassernutzung und energieeffiziente Dämmung hat. Wir hätten gerne ein modernes Design mit viel Naturholz, um eine warme und einladende Atmosphäre zu schaffen. Unser Budget liegt bei etwa 750.000 Euro, und wir hoffen, dass es in 20 Monaten fertiggestellt werden kann. Könnten Sie etwas Nachhaltiges und Familienorientiertes für uns entwerfen?'

In [26]:
import random
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
from reportlab.lib.styles import getSampleStyleSheet
from datetime import datetime
from reportlab.lib.units import inch

# Random Data Generation
def generate_random_customer_name():
    first_names = ["Anna", "Max", "Laura", "Lukas", "Sophia", "Paul", "Mia", "Jonas", "Lea", "Felix"]
    last_names = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Hoffmann", "Schäfer"]
    return f"{random.choice(first_names)} {random.choice(last_names)}"

def generate_random_address():
    streets = ["Musterstraße", "Hauptstraße", "Gartenweg", "Bachstraße", "Schulweg", "Kirchplatz", "Am Dorf", "Lindenweg"]
    cities = ["Berlin", "Hamburg", "München", "Köln", "Frankfurt", "Stuttgart", "Düsseldorf", "Dortmund", "Essen", "Bremen"]
    return f"{random.choice(streets)} {random.randint(1, 200)}, {random.randint(10000, 99999)} {random.choice(cities)}"

def generate_random_phone():
    return f"+49-{random.randint(100, 999)}-{random.randint(1000, 9999)}-XXX"

def generate_random_client_name():
    return generate_random_customer_name()


# Random data
customer_name = generate_random_customer_name()
customer_address = generate_random_address()
customer_phone = generate_random_phone()
client_name = generate_random_client_name()


In [27]:
for file in text_files:
    text = open(file, 'r', encoding='utf-8').read()
    report_dict = extract_details_german_with_adjectives(text)
    
    ## save CSV ##
    # Convert dictionary to DataFrame
    df = pd.DataFrame(list(report_dict.items()), columns=["Category", "Data"])
    # Save DataFrame to CSV
    output_csv_folder = os.path.join(os.getcwd(), "gen-csv-report")
    # Get the base file name (without extension) and create a CSV file name
    base_name = os.path.splitext(os.path.basename(file))[0]
    csv_file = os.path.join(output_csv_folder, f"{base_name}.csv")
    df.to_csv(csv_file, index=False, encoding="utf-8")
    
    
    
    
        # Example data
    details = report_dict

    # Function to preprocess text for bullet points
    def preprocess_for_bullets(text):
        lines = [line.strip() for line in text.replace(",", "\n").split("\n")]  # Split and strip lines
        return "\n".join([f"• {line[0].upper() + line[1:]}" for line in lines if line])  # Capitalize first letter

    # Preprocess table fields
    data = [
        ["Field", "Details"],
        ["Rooms", preprocess_for_bullets(details["Rooms"])],
        ["Special features", preprocess_for_bullets(details["Special features"])],
        ["Design style", preprocess_for_bullets(details["Design style"])],
        ["Materials", preprocess_for_bullets(details["Materials"])],
        ["Budget", details["Budget"]],
        ["Timeline", details["Timeline"]]
    ]
    
    
    output_pdf_folder = os.path.join(os.getcwd(), "gen-pdf-report")
    # Get the base file name (without extension) and create a CSV file name
    base_name = os.path.splitext(os.path.basename(file))[0]
    pdf_file = os.path.join(output_pdf_folder, f"{base_name}.pdf")

    # Define PDF document with explicit margins
    doc = SimpleDocTemplate(
        pdf_file,
        pagesize=A4,
        leftMargin=1 * inch,
        rightMargin=1 * inch,
        topMargin=1 * inch,
        bottomMargin=1 * inch
    )
    styles = getSampleStyleSheet()
    story = []

    # Calculate usable width
    usable_width = A4[0] - doc.leftMargin - doc.rightMargin

    # Logo Dimensions
    original_width = 1128
    original_height = 560
    desired_width = 200  # Set the desired width for the logo
    aspect_ratio = original_height / original_width  # Calculate aspect ratio
    calculated_height = desired_width * aspect_ratio  # Calculate height maintaining aspect ratio

    # Add Logo
    logo_path = "logo.png"  # Replace with your logo file
    logo = Image(logo_path, width=desired_width, height=calculated_height)  # Set width and calculated height
    story.append(logo)
    #story.append(Spacer(1, 1))

    # Customer Details with Bullets
    story.append(Paragraph("<b>Customer Details:</b>", styles["Heading2"]))
    story.append(Paragraph(f"• Customer name: {customer_name}", styles["Normal"]))
    story.append(Paragraph(f"• Customer address: {customer_address}", styles["Normal"]))
    story.append(Paragraph(f"• Customer phone: {customer_phone}", styles["Normal"]))
    story.append(Spacer(1, 10))

    # Conversation Script
    story.append(Paragraph("<b>Conversation Script:</b>", styles["Heading2"]))
    story.append(Paragraph(details["Project description"], styles["Normal"]))
    story.append(Spacer(1, 10))

    # Report Summary
    story.append(Paragraph("<b>Report Summary:</b>", styles["Heading2"]))


    # Dynamically calculate column widths
    col_widths = [0.3 * usable_width, 0.7 * usable_width]  # Adjust column widths (30% for 'Field', 70% for 'Details')

    # Add Table to the Story
    table = Table(data, colWidths=col_widths)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),  # Set header text color to black
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
    ]))
    story.append(table)
    story.append(Spacer(1, 10))

    # Client Name and Date-Time in Bottom-Left Corner
    current_datetime = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    story.append(Spacer(1, 50))  # Spacer to push the footer to the bottom
    footer_style = styles["Normal"]
    footer_style.alignment = 0  # Left align
    story.append(Paragraph(f"Client Name: {client_name}", footer_style))
    story.append(Paragraph(f"Date and Time: {current_datetime}", footer_style))

    # Build the PDF
    doc.build(story)

In [18]:
# report_dict = extract_details_german_with_adjectives(texts[1])

In [19]:
# report_dict

{'Project description': 'Wir möchten ein modernes luxuriöses Holzhaus für Wochenendtrips. Es sollte drei Schlafzimmer, einen offenen Wohn- und Essbereich und ein Heimkino haben. Zusätzliche Merkmale sollten ein Infinity-Pool, ein Außenkamin und bodentiefe Fenster sein, um die Aussicht zu genießen.',
 'Rooms': 'drei  Schlafzimmer, ein  Heimkino',
 'Special features': 'ein Heimkino, Pool, ein Außenkamin, bodentiefe Fenster',
 'Design style': 'modern, luxuriös, zeitgenössisch, hochwertigen Ausstattungen',
 'Materials': 'luxuriöses Holz, mit hochwertige, mit Premium-Holz,, Glas',
 'Budget': ' 1.000.000 €',
 'Timeline': '18 Monaten'}

In [290]:
# # Data extraction
# data = []
# for text in texts:
#     #cleaned_text = preprocess(text)
#     #details = extract_details_german_with_adjectives(cleaned_text)
#     details = extract_details_german_with_adjectives(text)
#     data.append(details)

# # Convert to DataFrame
# df = pd.DataFrame(data)

# df



In [292]:
#pip install reportlab

In [299]:
import random
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
from reportlab.lib.styles import getSampleStyleSheet
from datetime import datetime
from reportlab.lib.units import inch

# Random Data Generation
def generate_random_customer_name():
    first_names = ["Anna", "Max", "Laura", "Lukas", "Sophia", "Paul", "Mia", "Jonas", "Lea", "Felix"]
    last_names = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Hoffmann", "Schäfer"]
    return f"{random.choice(first_names)} {random.choice(last_names)}"

def generate_random_address():
    streets = ["Musterstraße", "Hauptstraße", "Gartenweg", "Bachstraße", "Schulweg", "Kirchplatz", "Am Dorf", "Lindenweg"]
    cities = ["Berlin", "Hamburg", "München", "Köln", "Frankfurt", "Stuttgart", "Düsseldorf", "Dortmund", "Essen", "Bremen"]
    return f"{random.choice(streets)} {random.randint(1, 200)}, {random.randint(10000, 99999)} {random.choice(cities)}"

def generate_random_phone():
    return f"+49-{random.randint(100, 999)}-{random.randint(1000, 9999)}-XXX"

def generate_random_client_name():
    return generate_random_customer_name()


# Random data
customer_name = generate_random_customer_name()
customer_address = generate_random_address()
customer_phone = generate_random_phone()
client_name = generate_random_client_name()

# Example data
details = report_dict

# Function to preprocess text for bullet points
def preprocess_for_bullets(text):
    lines = [line.strip() for line in text.replace(",", "\n").split("\n")]  # Split and strip lines
    return "\n".join([f"• {line[0].upper() + line[1:]}" for line in lines if line])  # Capitalize first letter

# Preprocess table fields
data = [
    ["Field", "Details"],
    ["Rooms", preprocess_for_bullets(details["Rooms"])],
    ["Special features", preprocess_for_bullets(details["Special features"])],
    ["Design style", preprocess_for_bullets(details["Design style"])],
    ["Materials", preprocess_for_bullets(details["Materials"])],
    ["Budget", details["Budget"]],
    ["Timeline", details["Timeline"]]
]

# Create PDF
pdf_file = "construction_report.pdf"

# Define PDF document with explicit margins
doc = SimpleDocTemplate(
    pdf_file,
    pagesize=A4,
    leftMargin=1 * inch,
    rightMargin=1 * inch,
    topMargin=1 * inch,
    bottomMargin=1 * inch
)
styles = getSampleStyleSheet()
story = []

# Calculate usable width
usable_width = A4[0] - doc.leftMargin - doc.rightMargin

# Logo Dimensions
original_width = 1128
original_height = 560
desired_width = 200  # Set the desired width for the logo
aspect_ratio = original_height / original_width  # Calculate aspect ratio
calculated_height = desired_width * aspect_ratio  # Calculate height maintaining aspect ratio

# Add Logo
logo_path = "logo.png"  # Replace with your logo file
logo = Image(logo_path, width=desired_width, height=calculated_height)  # Set width and calculated height
story.append(logo)
#story.append(Spacer(1, 1))

# Customer Details with Bullets
story.append(Paragraph("<b>Customer Details:</b>", styles["Heading2"]))
story.append(Paragraph(f"• Customer name: {customer_name}", styles["Normal"]))
story.append(Paragraph(f"• Customer address: {customer_address}", styles["Normal"]))
story.append(Paragraph(f"• Customer phone: {customer_phone}", styles["Normal"]))
story.append(Spacer(1, 10))

# Conversation Script
story.append(Paragraph("<b>Conversation Script:</b>", styles["Heading2"]))
story.append(Paragraph(details["Project description"], styles["Normal"]))
story.append(Spacer(1, 10))

# Report Summary
story.append(Paragraph("<b>Report Summary:</b>", styles["Heading2"]))


# Dynamically calculate column widths
col_widths = [0.3 * usable_width, 0.7 * usable_width]  # Adjust column widths (30% for 'Field', 70% for 'Details')

# Add Table to the Story
table = Table(data, colWidths=col_widths)
table.setStyle(TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),  # Set header text color to black
    ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
    ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
    ('VALIGN', (0, 0), (-1, -1), 'TOP'),
]))
story.append(table)
story.append(Spacer(1, 10))

# Client Name and Date-Time in Bottom-Left Corner
current_datetime = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
story.append(Spacer(1, 50))  # Spacer to push the footer to the bottom
footer_style = styles["Normal"]
footer_style.alignment = 0  # Left align
story.append(Paragraph(f"Client Name: {client_name}", footer_style))
story.append(Paragraph(f"Date and Time: {current_datetime}", footer_style))

# Build the PDF
doc.build(story)

print(f"PDF generated: {pdf_file}")

PDF generated: construction_report.pdf


In [35]:
import pandas as pd
import spacy
import os
import glob
import json
import re
import random ## For random name and phone numbers
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image
from reportlab.lib.styles import getSampleStyleSheet
from datetime import datetime
from reportlab.lib.units import inch

# Go back one step in the folder hierarchy
parent_folder_path = os.path.dirname(os.getcwd())

# Define the path to the "transcription" folder
transcription_folder_path = os.path.join(parent_folder_path, "transcriptions")

# Get a list of all text files in the "transcription" folder
text_files = glob.glob(os.path.join(transcription_folder_path, "*.txt"))
text_files = sorted(text_files)


#-------------------------#
# Load SpaCy model
nlp = spacy.load("de_core_news_sm")

# Define word numbers
word_numbers = ["ein", "eine", "einen", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn"]

# Preprocess texts
def preprocess(text):
    doc = nlp(text.lower())
    tokens = []
    for token in doc:
        if token.is_punct:  # Skip punctuation
            continue
        if token.is_digit or token.text in word_numbers:  # Preserve numbers and word numbers
            tokens.append(token.text)
        elif token.is_alpha and not token.is_stop:  # Lemmatize other alphabetic tokens
            tokens.append(token.lemma_)
    return " ".join(tokens)

# Define the file path
keywords_file_path = os.path.join(os.getcwd(), "keywords", "keywords_dict.json")

# Load the JSON file into keywords_dict
with open(keywords_file_path, "r", encoding="utf-8") as json_file:
    keywords_dict = json.load(json_file)
    
    
#-------------------------#
##RULE-BASED MODEL##

# Function to extract data with adjectives
def extract_details_german_with_adjectives(text):
    details = {
        "Project description": "",
        "Rooms": "",
        "Special features": "",
        "Design style": "",
        "Materials": "",
        "Budget": "",
        "Timeline": "",
        #"Additional Notes": ""
    }
    
    # Parse the text
    details["Project description"] = text.split('\n')[0].strip()
    
    #----------------------#
    # Define the list of word-numbers
    word_numbers = ["ein", "eine", "einen", "zwei", "drei", "vier", "fünf", "sechs", "sieben", "acht", "neun", "zehn"]
    number_pattern = rf"(\b(?:\d+|{'|'.join(word_numbers)})\b)"  # Match numeric or word-numbers

    # Room keywords pattern
    room_keywords = "|".join(keywords_dict["ROOM_KEYWORDS"])

    # Regex to match rooms with number patterns and adjectives
    rooms_pattern = rf"{number_pattern}((?:\s+\b[\wäöüß]+\b)+)?\s+({room_keywords})"

    # Extract matches
    rooms_matches = re.findall(rooms_pattern, text, re.IGNORECASE)

    # Format the results
    details["Rooms"] = ", ".join([
        f"{num or ''} {adj.strip()} {room}".strip()
        for num, adj, room in rooms_matches
    ])
    #replace " und" with "," in the string
    details["Rooms"] = details["Rooms"].replace(" und", ",")        

    #-------------------------#
    
    # Special Features with adjectives
    features_keywords = "|".join(keywords_dict["SPECIAL_FEATURES_KEYWORDS"])  # Join keywords into a regex pattern
    features_pattern = rf"(\b[\wäöüß]+\b)?\s*({features_keywords})"  # Use rf-string for dynamic pattern creation
    features_matches = re.findall(features_pattern, text, re.IGNORECASE)

    # Format the matched features
    details["Special features"] = ", ".join([
        f"{adj or ''} {feature}".strip() for adj, feature in features_matches if feature
    ])
 
    #-------------------------# 
    # Design Style with adjectives
    style_keywords = "|".join(keywords_dict["DESIGN_STYLE_KEYWORDS"])  # Join keywords into a regex pattern
    style_pattern = rf"(\b[\wäöüß]+\b)?\s*({style_keywords})"  # Match an optional word (adjective) before the keyword
    style_matches = re.findall(style_pattern, text, re.IGNORECASE)

    # Format the matched design styles
    details["Design style"] = ", ".join([
        f"{adj or ''} {style}".strip() for adj, style in style_matches if style
    ])

    #-------------------------# 
    # Materials with adjectives
    material_keywords = "|".join(keywords_dict["MATERIALS_KEYWORDS"])  # Join keywords into a regex pattern
    material_pattern = rf"(\b[\wäöüß]+\b)?\s*({material_keywords})"  # Match an optional word (adjective) before the keyword
    material_matches = re.findall(material_pattern, text, re.IGNORECASE)

    # Format the matched materials
    details["Materials"] = ", ".join([
        f"{adj or ''} {material}".strip() for adj, material in material_matches if material
    ])
        
    #-------------------------#
    # Budget
    budget_pattern = r"([\d\s.,]+(?:€|Euro))"
    budget_match = re.search(budget_pattern, text, re.IGNORECASE)
    #budget_match = re.search(r"(\d+(?:,\d+)?(?: €| Euro))", text, re.IGNORECASE)
    details["Budget"] = budget_match.group(0) if budget_match else ""
    
    #-------------------------#
    # Timeline
    timeline_pattern = r"(\d+ (?:Monat(?:en|e)?|Jahr(?:en|e)?|Woche(?:n)?))"
    timeline_match = re.search(timeline_pattern, text, re.IGNORECASE)
    details["Timeline"] = timeline_match.group(0) if timeline_match else ""
    
    #-------------------------#
    
    # Define lists of articles and modal verbs
    articles = ["die", "das", "der", "ein", "eine", "einen", "einem", "einer", "den", "dem"]
    modal_verbs = ["sollte", "könnte", "wollte", "dürfte", "möchte", "müssen", "muss", "kann", "können", "sollen", "wollen", "dürfen"]

    # Compile regex patterns for articles and modal verbs
    articles_pattern = rf"\b(?:{'|'.join(articles)})\b"
    modal_verbs_pattern = rf"\b(?:{'|'.join(modal_verbs)})\b"

    # Remove articles and modal verbs
    def clean_text(text):
        text = re.sub(articles_pattern, "", text, flags=re.IGNORECASE)  # Remove articles
        text = re.sub(modal_verbs_pattern, "", text, flags=re.IGNORECASE)  # Remove modal verbs
        return re.sub(r"\s+", " ", text).strip()  # Normalize spaces

    # Apply cleanup to Design style
    details["Design style"] = clean_text(details["Design style"].replace(" und", ","))
    details["Materials"] = clean_text(details["Materials"].replace(" und", ","))

    return details

##-------##
# Random Data Generation
def generate_random_customer_name():
    first_names = ["Anna", "Max", "Laura", "Lukas", "Sophia", "Paul", "Mia", "Jonas", "Lea", "Felix"]
    last_names = ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Hoffmann", "Schäfer"]
    return f"{random.choice(first_names)} {random.choice(last_names)}"

def generate_random_address():
    streets = ["Musterstraße", "Hauptstraße", "Gartenweg", "Bachstraße", "Schulweg", "Kirchplatz", "Am Dorf", "Lindenweg"]
    cities = ["Berlin", "Hamburg", "München", "Köln", "Frankfurt", "Stuttgart", "Düsseldorf", "Dortmund", "Essen", "Bremen"]
    return f"{random.choice(streets)} {random.randint(1, 200)}, {random.randint(10000, 99999)} {random.choice(cities)}"

def generate_random_phone():
    return f"+49-{random.randint(100, 999)}-{random.randint(1000, 9999)}-XXX"

def generate_random_client_name():
    return generate_random_customer_name()


# Random data
customer_name = generate_random_customer_name()
customer_address = generate_random_address()
customer_phone = generate_random_phone()
client_name = generate_random_client_name()

##--------##




## Extract data and generate csv and pdf-report
for file in text_files:
    text = open(file, 'r', encoding='utf-8').read()
    report_dict = extract_details_german_with_adjectives(text)
    
    
    ## save CSV ##
    # Convert dictionary to DataFrame
    df = pd.DataFrame(list(report_dict.items()), columns=["Category", "Data"])
    # Save DataFrame to CSV
    output_csv_folder = os.path.join(os.getcwd(), "gen-csv-report")
    # Get the base file name (without extension) and create a CSV file name
    base_name = os.path.splitext(os.path.basename(file))[0]
    csv_file = os.path.join(output_csv_folder, f"{base_name}.csv")
    df.to_csv(csv_file, index=False, encoding="utf-8")
    
    
    
    ## Save PDF
    details = report_dict

    # Function to preprocess text for bullet points
    def preprocess_for_bullets(text):
        lines = [line.strip() for line in text.replace(",", "\n").split("\n")]  # Split and strip lines
        return "\n".join([f"• {line[0].upper() + line[1:]}" for line in lines if line])  # Capitalize first letter

    # Preprocess table fields
    data = [
        ["Field", "Details"],
        ["Rooms", preprocess_for_bullets(details["Rooms"])],
        ["Special features", preprocess_for_bullets(details["Special features"])],
        ["Design style", preprocess_for_bullets(details["Design style"])],
        ["Materials", preprocess_for_bullets(details["Materials"])],
        ["Budget", details["Budget"]],
        ["Timeline", details["Timeline"]]
    ]
    
    
    output_pdf_folder = os.path.join(os.getcwd(), "gen-pdf-report")
    # Get the base file name (without extension) and create a CSV file name
    base_name = os.path.splitext(os.path.basename(file))[0]
    pdf_file = os.path.join(output_pdf_folder, f"{base_name}.pdf")

    # Define PDF document with explicit margins
    doc = SimpleDocTemplate(
        pdf_file,
        pagesize=A4,
        leftMargin=1 * inch,
        rightMargin=1 * inch,
        topMargin=1 * inch,
        bottomMargin=1 * inch
    )
    styles = getSampleStyleSheet()
    story = []

    # Calculate usable width
    usable_width = A4[0] - doc.leftMargin - doc.rightMargin

    # Logo Dimensions
    original_width = 1128
    original_height = 560
    desired_width = 200  # Set the desired width for the logo
    aspect_ratio = original_height / original_width  # Calculate aspect ratio
    calculated_height = desired_width * aspect_ratio  # Calculate height maintaining aspect ratio

    # Add Logo
    logo_path = "logo.png"  # Replace with your logo file
    logo = Image(logo_path, width=desired_width, height=calculated_height)  # Set width and calculated height
    story.append(logo)
    #story.append(Spacer(1, 1))

    # Customer Details with Bullets
    story.append(Paragraph("<b>Customer Details:</b>", styles["Heading2"]))
    story.append(Paragraph(f"• Customer name: {customer_name}", styles["Normal"]))
    story.append(Paragraph(f"• Customer address: {customer_address}", styles["Normal"]))
    story.append(Paragraph(f"• Customer phone: {customer_phone}", styles["Normal"]))
    story.append(Spacer(1, 10))

    # Conversation Script
    story.append(Paragraph("<b>Conversation Script:</b>", styles["Heading2"]))
    story.append(Paragraph(details["Project description"], styles["Normal"]))
    story.append(Spacer(1, 10))

    # Report Summary
    story.append(Paragraph("<b>Report Summary:</b>", styles["Heading2"]))


    # Dynamically calculate column widths
    col_widths = [0.3 * usable_width, 0.7 * usable_width]  # Adjust column widths (30% for 'Field', 70% for 'Details')

    # Add Table to the Story
    table = Table(data, colWidths=col_widths)
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.black),  # Set header text color to black
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
    ]))
    story.append(table)
    story.append(Spacer(1, 10))

    # Client Name and Date-Time in Bottom-Left Corner
    current_datetime = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
    story.append(Spacer(1, 50))  # Spacer to push the footer to the bottom
    footer_style = styles["Normal"]
    footer_style.alignment = 0  # Left align
    story.append(Paragraph(f"Client Name: {client_name}", footer_style))
    story.append(Paragraph(f"Date and Time: {current_datetime}", footer_style))

    # Build the PDF
    doc.build(story)
    

