# **1. Libaries and files**

In [33]:
#Importing libraries
import os
import glob
import re
import json
from bs4 import BeautifulSoup, Tag, NavigableString
import xml.etree.ElementTree as ET
from xml.dom import minidom
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
import pandas as pd
from lxml import etree

In [34]:
#Change accordingly
file_path_index="C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/tei/Index/index_person_place_org.xml"

# **2. Creating csv with index**

In [35]:
#Accessing index
tree = etree.parse(file_path_index)
root = tree.getroot()

In [36]:
# Define the TEI namespace
ns = {'tei': 'http://www.tei-c.org/ns/1.0'}

## **2.1. Index person**

In [37]:
#Partly created with the help of u:ai
# Extract <person> elements
persons = root.xpath('//tei:listPerson/tei:person', namespaces=ns)
print(f"Number of <person> elements found: {len(persons)}")

# Extract data
data = []
for person in persons:
    person_data = {}
    # Extract xml:id
    person_data['ID'] = person.get('{http://www.w3.org/XML/1998/namespace}id')
    
    # Extract forename
    forename = person.find('tei:persName/tei:forename', namespaces=ns)
    person_data['forename'] = forename.text if forename is not None else None
    
    # Extract surname
    surname = person.find('tei:persName/tei:surname', namespaces=ns)
    person_data['surname'] = surname.text if surname is not None else None

    # Extract surname maiden
    surname_maiden = person.find('tei:persName/tei:surname[@type="maiden"]', namespaces=ns)
    person_data['surname maiden']= surname_maiden.text if surname_maiden is not None else None
    
    # Extract occupations (can be multiple)
    occupations = person.findall('tei:occupation', namespaces=ns)
    occupation_data = []
    for occ in occupations:
        # Get the main text of the occupation
        occ_text = occ.text.strip() if occ.text else ""
        
        # Check for nested <orgName> elements
        org_name = occ.find('tei:orgName', namespaces=ns)
        if org_name is not None:
            org_name_text = org_name.text.strip() if org_name.text else ""
            # Combine the occupation text with the organization name
            occ_text += f" {org_name_text}"
        # Add the processed occupation to the list
        occupation_data.append(occ_text)
        # Join all occupations into a single string
        person_data['occupations'] = "; ".join(occupation_data)

    # Extract affiliations (can be multiple)
    affiliations = person.findall('tei:affiliation', namespaces=ns)

    affiliations_data=[]
    for aff in affiliations:
        # Get the main text of the occupation
        aff_text = aff.text.strip() if aff.text else ""
        
        # Check for nested <orgName> elements
        org_name = aff.find('tei:orgName', namespaces=ns)
        if org_name is not None:
            org_name_text = org_name.text.strip() if org_name.text else ""
            # Combine the affiliation text with the organization name
            aff_text += f" {org_name_text}"
        # Add the processed occupation to the list
        affiliations_data.append(aff_text)
        # Join all occupations into a single string
        person_data['affiliations'] = "; ".join(affiliations_data)
    
    # Extract death date and description
    death = person.find('tei:death', namespaces=ns)
    person_data['death_date'] = death.get('when') if death is not None else None
    
    # Extract idnos
    #Wikidata
    wikidata=person.find('tei:idno[@subtype="WIKIDATA"]', namespaces=ns)
    person_data['Wikidata']=wikidata.text if wikidata is not None else None

    #WGW
    wgw=person.find('tei:idno[@subtype="WGW"]', namespaces=ns)
    person_data['WGW']=wgw.text if wgw is not None else None

    #GND
    gnd=person.find('tei:idno[@subtype="GND"]', namespaces=ns)
    person_data['GND']=gnd.text if gnd is not None else None

    #Wikipedia
    wikipedia=person.find('tei:idno[@subtype="Wikipedia"]', namespaces=ns)
    person_data['Wikipedia']=wikipedia.text if wikipedia is not None else None

    #ÖBL
    öbl=person.find('tei:idno[@subtype="ÖBL"]', namespaces=ns)
    person_data['ÖBL']=öbl.text if öbl is not None else None

    #PMB
    pmb=person.find('tei:idno[@subtype="PMB"]', namespaces=ns)
    person_data['PMB']=pmb.text if pmb is not None else None

    #VIAF
    viaf=person.find('tei:idno[@subtype="VIAF"]', namespaces=ns)
    person_data['VIAF']=viaf.text if viaf is not None else None
    
    # Append the person's data to the list
    data.append(person_data)

# Convert to pandas DataFrame
df_person = pd.DataFrame(data)

#Fill empty rows (needed for json later on)
df_person= df_person.fillna("")

#Duplicate ID and create website-name in new column
df_person['Website']=df_person.loc[:,'ID']+f".html"

# Save to a CSV file
df_person.to_csv("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/csv/person_index.csv", index=False)
df_person.to_excel("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/Excel/person_index.xlsx", index=False)

# Display the DataFrame
print(df_person)

Number of <person> elements found: 74
               ID  forename                 surname surname maiden  \
0   LudwigLobmeyr    Ludwig                 Lobmeyr                  
1   JosephLobmeyr    Joseph                 Lobmeyr                  
2    FranzLobmeyr     Franz                 Lobmeyr                  
3    LouiseKralik    Louise  Kralik von Meyrswalden        Lobmeyr   
4     HannsKralik     Hanns  Kralik von Meyrswalden                  
..            ...       ...                     ...            ...   
69   ErnstDohnany     Ernst            von Dohnányi                  
70      Beethoven    Ludwig           van Beethoven                  
71         Mozart  Wolfgang                  Mozart                  
72          Haydn    Joseph                   Haydn                  
73        LeoXIII       Leo                    XIII                  

                       occupations  \
0             k. k. Hofglashändler   
1             k. k. Hofglashändler   
2   k. 

## **2.2. Index place**

In [38]:
# Extract <place> elements
places = root.xpath('//tei:listPlace/tei:place', namespaces=ns)
print(f"Number of <place> elements found: {len(places)}")

# Extract data
data_place = []
for place in places:
    place_data = {}
    # Extract xml:id
    place_data['ID'] = place.get('{http://www.w3.org/XML/1998/namespace}id')
    
    # Extract placeName
    placename = place.find('tei:placeName', namespaces=ns)
    place_data['name'] = placename.text if placename is not None else None

    #Extract type placeName
    placename_ref=placename.get('type') if placename is not None else None
    place_data['type'] = f"{placename_ref}" if placename is not None else None
    
    # Extract geo
    geo = place.find('tei:location/tei:geo', namespaces=ns)
    place_data['geo'] = geo.text if geo is not None else None
    
    # Extract idnos
    #Wikipedia
    wikipedia = place.find('tei:idno[@subtype="Wikipedia"]', namespaces=ns)
    place_data['Wikipedia']=wikipedia.text if wikipedia is not None else None

    #Wikidata
    wikidata=place.find('tei:idno[@subtype="WIKIDATA"]', namespaces=ns)
    place_data['WIKIDATA']=wikidata.text if wikidata is not None else None

    #Geonames
    geonames=place.find('tei:idno[@subtype="geonames"]', namespaces=ns)
    place_data['Geonames']=geonames.text if geonames is not None else None

    #WGW
    wgw=place.find('tei:idno[@subtype="WGW"]', namespaces=ns)
    place_data['WGW']=wgw.text if wgw is not None else None

    #GND
    gnd=place.find('tei:idno[@subtype="GND"]', namespaces=ns)
    place_data['GND']=gnd.text if gnd is not None else None
    
    # Append the data to the list
    data_place.append(place_data)

# Convert to pandas DataFrame
df_place = pd.DataFrame(data_place)

#Fill empty rows (needed for json later on)
df_place = df_place.fillna("")

#Duplicate ID and create website-name in new column
df_place['Website']=df_place.loc[:,'ID']+f".html"

# Save to a CSV file
df_place.to_csv("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/csv/place_index.csv", index=False)
df_place.to_excel("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/Excel/place_index.xlsx", index=False)

# Display the DataFrame
print(df_place)

Number of <place> elements found: 28
                         ID  \
0                      Wien   
1           Weimarerplatz_3   
2         DöblingerFriedhof   
3           WähringerKirche   
4        Rosensteingasse_32   
5   HernalserHauptstraße_81   
6           EhrengrabKralik   
7               Stephansdom   
8          StMarxerFriedhof   
9                    Weimar   
10          Zentralfriedhof   
11          GoldeggGasse_19   
12       Favoritenstraße_27   
13              Vorderbrühl   
14        Elisabethstraße_1   
15        Augustinerkircher   
16        KärntnerStraße_21   
17               Rennweg_91   
18          ErdbergerKirche   
19            KircheMödling   
20            Eleonorenhain   
21                 Annathal   
22   Zentralfriedhof_Halle1   
23        WeimarerStraße_89   
24            Bendlgasse_24   
25                   Berlin   
26               Seccession   
27                     Linz   

                                                 name      type

## **2.3. Index organisation**

In [39]:
# Extract <org> elements
orgs = root.xpath('//tei:listOrg/tei:org', namespaces=ns)
print(f"Number of <org> elements found: {len(orgs)}")

# Extract data
data_org = []
for org in orgs:
    org_data = {}
    # Extract xml:id
    org_data['ID'] = org.get('{http://www.w3.org/XML/1998/namespace}id')
    
    # Extract orgName
    orgname = org.find('tei:orgName', namespaces=ns)
    org_data['name'] = orgname.text if orgname is not None else f""

    #Extract type orgName
    orgname_ref=orgname.get('type') if orgname is not None else None
    org_data['type'] = f"{orgname_ref}" if orgname is not None else None
    
    #Extract idnos
    #Wikipedia
    wikipedia = org.find('tei:idno[@subtype="Wikipedia"]', namespaces=ns)
    org_data['Wikipedia']=wikipedia.text if wikipedia is not None else None

    #Wikidata
    wikidata=org.find('tei:idno[@subtype="WIKIDATA"]', namespaces=ns)
    org_data['WIKIDATA']=wikidata.text if wikidata is not None else None

    #WGW
    wgw=org.find('tei:idno[@subtype="WGW"]', namespaces=ns)
    org_data['WGW']=wgw.text if wgw is not None else None

    #GND
    gnd=org.find('tei:idno[@subtype="GND"]', namespaces=ns)
    org_data['GND']=gnd.text if gnd is not None else None
    
    # Append the data to the list
    data_org.append(org_data)

# Convert to pandas DataFrame
df_org = pd.DataFrame(data_org)

#Fill empty rows (needed for json later on)
df_org = df_org.fillna("")

#Duplicate ID and create website-name in new column
df_org['Website']=df_org.loc[:,'ID']+f".html"

# Save to a CSV file
df_org.to_csv("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/csv/org_index.csv", index=False)
df_org.to_excel("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/Excel/org_index.xlsx", index=False)

# Display the DataFrame
print(df_org)

Number of <org> elements found: 22
                             ID  \
0                     JLLobmeyr   
1                        Gerold   
2                    Herrenhaus   
3                        AkBild   
4        GenossenschaftKünstler   
5              Schillerstiftung   
6                 TheyerHartmut   
7   StädtischeLeichenbestattung   
8                    DruckBileg   
9    GeneralinspektionEisenbahn   
10                Statthalterei   
11           Enterprisefunebres   
12                   Bärenhöhle   
13                     Nordbahn   
14         EisenbahnMinisterium   
15              BestattungPayer   
16                     Lischkar   
17                    Vaterland   
18                  FreiePresse   
19                   Extrablatt   
20                   MeyrsNeffe   
21         WienerKonservatorium   

                                                 name           type  \
0                                    J. & L. Lobmeyr     manufactory   
1              

## **2.4. Index relationships**

In [31]:
#Created with the help of u:ai
# Extract <relation> elements
relations = root.xpath('//tei:listRelation/tei:relation', namespaces=ns)

# Extract data
relation_data = []
for relation in relations:
    relation_entry = {}
    # Extract relation name
    relation_entry['name'] = relation.get('name')
    
    # Extract active attribute
    relation_entry['active'] = relation.get('active')
    
    # Extract passive attribute
    relation_entry['passive'] = relation.get('passive')
    
    # Extract mutual attribute
    relation_entry['mutual'] = relation.get('mutual')
    
    # Append the relation entry to the list
    relation_data.append(relation_entry)

# Convert to pandas DataFrame
df_relations = pd.DataFrame(relation_data)

# Function to process each cell
def process_names(cell):
    if pd.isna(cell):  # Check if the cell is None or NaN
        return cell  # Return as is (or return an empty string if preferred)
    
    # Step 1: Insert a comma between each #VornameNachname
    cell = cell.replace(' #', ';#')  # Add a comma before each `#` (except the first one)
    
    # Step 2: Remove the '#' symbol
    cell = cell.replace('#', '')
    
    # Step 3: Split each name at capital letters and join with spaces
    processed_names = [' '.join(re.findall(r'[A-Z][a-z]*', name)) for name in cell.split(',')]
    
    # Step 4: Join the processed names back into a single string with commas
    return ', '.join(processed_names)

df_relations['active'] = df_relations['active'].apply(process_names)
df_relations['passive'] = df_relations['passive'].apply(process_names)
df_relations['mutual'] = df_relations['mutual'].apply(process_names)

# Display the updated DataFrame
print(df_relations)

# Save to a CSV file (optional)
df_relations.to_csv("C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/csv/relation_index.csv", index=False)


# Display the DataFrame
print(df_relations)

           name           active  \
0        parent      August Rath   
1        parent  Aloisia Lobmeyr   
2        parent    Louise Kralik   
3        parent      Maia Kralik   
4        parent   Richard Kralik   
5        parent  Aloisia Pichler   
6        parent   Eduard Pichler   
7   grandparent    Louise Kralik   
8   grandparent      Maia Kralik   
9   grandparent   Richard Kralik   
10      sibling             None   
11      sibling             None   
12      sibling             None   
13      sibling             None   
14      sibling             None   
15      sibling             None   
16        uncle   Ludwig Lobmeyr   
17        uncle    Ludwig Kralik   
18       spouse             None   
19       spouse             None   
20       spouse             None   
21       spouse             None   
22      partner             None   

                                              passive  \
0   Marie Sch Kreuzenau August Rathjun Paul Rath S...   
1   Joseph Lobmeyr Lu

# **3. Converting csv to json**

## **3.1. json-person**

In [163]:
#CODE FOR JSON WITH EMPTY LINES
# Function to clean and preprocess data
#Created with u:ai
def preprocess_value(value):
    """
    Cleans and preprocesses a value:
    - Strips leading/trailing whitespace
    - Replaces empty strings or invalid data with None
    """
    if pd.isna(value) or str(value).strip() == '':
        return f""
    return str(value).strip()

# Apply preprocessing to all cells in the DataFrame
df_person = df_person.applymap(preprocess_value)

# Function to remove keys with None values
def remove_null_values(record):
    """
    Removes keys with None values from a dictionary.
    """
    return {key: value for key, value in record.items() if value is not None}

# Convert the DataFrame to a list of dictionaries and remove null values
person_records = [remove_null_values(record) for record in df_person.to_dict(orient='records')]

# Wrap the list of person records in a dictionary with a "data" key
person_json_data = {
    "data": person_records
}

# Write the JSON to a file
output_file_path = 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/person_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(person_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
print(json.dumps(person_json_data, indent=4, ensure_ascii=False))

{
    "data": [
        {
            "ID": "LudwigLobmeyr",
            "forename": "Ludwig",
            "surname": "Lobmeyr",
            "surname maiden": "",
            "occupations": "k. k. Hofglashändler",
            "affiliations": "Österreichisches Herrenhaus;  k. k.Akademie der bildenden Künste;  Genossenschaft der bildenden Künstler Wiens;  Deutsche Schillerstiftung",
            "death_date": "1917-03-25",
            "Wikidata": "Q55847850",
            "WGW": "",
            "GND": "",
            "Wikipedia": "",
            "ÖBL": "",
            "PMB": "",
            "VIAF": "",
            "Website": "LudwigLobmeyr.html"
        },
        {
            "ID": "JosephLobmeyr",
            "forename": "Joseph",
            "surname": "Lobmeyr",
            "surname maiden": "",
            "occupations": "k. k. Hofglashändler",
            "affiliations": "",
            "death_date": "",
            "Wikidata": "",
            "WGW": "",
            "GND": "",
     

  df_person = df_person.applymap(preprocess_value)


In [61]:
#CODE FOR JSON WITH NO EMPTY LINES
# Function to clean and preprocess data
#Created with u:ai
def preprocess_value(value):
    """
    Cleans and preprocesses a value:
    - Strips leading/trailing whitespace
    - Replaces empty strings or invalid data with None
    """
    if pd.isna(value) or str(value).strip() == '':
        return None
    return str(value).strip()

# Apply preprocessing to all cells in the DataFrame
df_person = df_person.applymap(preprocess_value)

# Function to parse complex fields (e.g., affiliations)
def parse_affiliations(affiliations):
    """
    Parses the 'affiliations' field into a structured list of dictionaries.
    """
    if not affiliations:
        return None
    # Split affiliations by tab or newline
    entries = re.split(r'\t|\n', affiliations)
    parsed_entries = []
    for entry in entries:
        # Extract key-value pairs from the entry
        match = re.match(r'type:\s*(.*?),\s*subtype:\s*(.*?),\s*orgName:\s*(.*?),\s*ref:\s*(.*)', entry)
        if match:
            parsed_entries.append({
                "type": match.group(1).strip(),
                "subtype": match.group(2).strip(),
                "orgName": match.group(3).strip(),
                "ref": match.group(4).strip()
            })
    return parsed_entries if parsed_entries else None

# Apply the parsing function to the 'affiliations' column
if 'affiliations' in df_person.columns:
    df_person['affiliations'] = df_person['affiliations'].apply(parse_affiliations)

# Function to remove keys with None values
def remove_null_values(record):
    """
    Removes keys with None values from a dictionary.
    """
    return {key: value for key, value in record.items() if value is not None}

# Convert the DataFrame to a list of dictionaries and remove null values
person_records = [remove_null_values(record) for record in df_person.to_dict(orient='records')]

# Wrap the list of person records in a dictionary with a "data" key
person_json_data = {
    "data": person_records
}

# Write the JSON to a file
output_file_path = 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/person_index_clean.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(person_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
print(json.dumps(person_json_data, indent=4, ensure_ascii=False))

{
    "data": [
        {
            "ID": "LudwigLobmeyr",
            "forename": "Ludwig",
            "surname": "Lobmeyr",
            "occupations": "k. k. Hofglashändler",
            "death_date": "1917-03-25",
            "idnos": "ID (WIKIDATA): Q55847850"
        },
        {
            "ID": "JosephLobmeyr",
            "forename": "Joseph",
            "surname": "Lobmeyr",
            "occupations": "k. k. Hofglashändler"
        },
        {
            "ID": "FranzLobmeyr",
            "forename": "Franz",
            "surname": "Lobmeyr",
            "occupations": "k. k. Linienschiffs-Lieutenant"
        },
        {
            "ID": "LouiseKralik",
            "forename": "Louise",
            "surname": "Kralik von Meyrswalden",
            "surname maiden": "Lobmayr",
            "death_date": "1905-10-03",
            "idnos": "ID (WIKIDATA): Q133874636"
        },
        {
            "ID": "HannsKralik",
            "forename": "Hanns",
            "surname"

  df_person = df_person.applymap(preprocess_value)


## **2.2. Place-json**

In [181]:
#CODE FOR JSON WITH EMPTY LINES
# Function to clean and preprocess data
#Created with u:ai
def preprocess_value(value):
    """
    Cleans and preprocesses a value:
    - Strips leading/trailing whitespace
    - Replaces empty strings or invalid data with None
    """
    if pd.isna(value) or str(value).strip() == '':
        return f""
    return str(value).strip()

#Replace english nouns with german translation
df_place['type']=df_place['type'].str.replace('street', 'Straße', regex=False)
df_place['type']=df_place['type'].str.replace('church', 'Kirche', regex=False)
df_place['type']=df_place['type'].str.replace('city', 'Stadt', regex=False)
df_place['type']=df_place['type'].str.replace('village', 'Dorf', regex=False)
df_place['type']=df_place['type'].str.replace('museum', 'Museum', regex=False)
df_place['type']=df_place['type'].str.replace('cemetery', 'Friedhof', regex=False)

# Apply preprocessing to all cells in the DataFrame
df_place = df_place.applymap(preprocess_value)

# Function to remove keys with None values
def remove_null_values(record):
    """
    Removes keys with None values from a dictionary.
    """
    return {key: value for key, value in record.items() if value is not None}

# Convert the DataFrame to a list of dictionaries and remove null values
place_records = [remove_null_values(record) for record in df_place.to_dict(orient='records')]

# Wrap the list of person records in a dictionary with a "data" key
place_json_data = {
    "data": place_records
}

# Write the JSON to a file
output_file_path = 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/place_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(place_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
print(json.dumps(place_json_data, indent=4, ensure_ascii=False))

{
    "data": [
        {
            "ID": "Wien",
            "name": "Wien",
            "type": "Stadt",
            "geo": "48.20849, 16.37208",
            "Wikipedia": "",
            "WIKIDATA": "Q1741",
            "Geonames": "https://www.geonames.org/2761369/vienna.html",
            "WGW": "",
            "GND": "",
            "Website": "Wien.html"
        },
        {
            "ID": "Weimarerplatz_3",
            "name": "Weimarerplatz 3",
            "type": "Straße",
            "geo": "48.233902622021546, 16.344070193512067",
            "Wikipedia": "https://de.wikipedia.org/wiki/Richard-Kralik-Platz",
            "WIKIDATA": "",
            "Geonames": "",
            "WGW": "26530",
            "GND": "",
            "Website": "Weimarerplatz_3.html"
        },
        {
            "ID": "DöblingerFriedhof",
            "name": "Döblinger Friedhof",
            "type": "Friedhof",
            "geo": "48.238719731402156, 16.328673269061063",
            "Wikiped

  df_place = df_place.applymap(preprocess_value)


## **2.3. Org-json**

In [87]:
#CODE FOR JSON WITH EMPTY LINES
# Function to clean and preprocess data
#Created with u:ai
def preprocess_value(value):
    """
    Cleans and preprocesses a value:
    - Strips leading/trailing whitespace
    - Replaces empty strings or invalid data with None
    """
    if pd.isna(value) or str(value).strip() == '':
        return f""
    return str(value).strip()

# Apply preprocessing to all cells in the DataFrame
df_org = df_org.applymap(preprocess_value)

# Function to remove keys with None values
def remove_null_values(record):
    """
    Removes keys with None values from a dictionary.
    """
    return {key: value for key, value in record.items() if value is not None}

#Replace english words with german translation
df_org['type']=df_org['type'].str.replace('manufactory', 'Manufaktur', regex=False)
df_org['type']=df_org['type'].str.replace('printer', 'Druckerei', regex=False)
df_org['type']=df_org['type'].str.replace('representativ', 'Vertretungsorgan', regex=False)
df_org['type']=df_org['type'].str.replace('cultural', 'Kulturelle Einrichtung', regex=False)
df_org['type']=df_org['type'].str.replace('paper', 'Papierwaren', regex=False)
df_org['type']=df_org['type'].str.replace('funeral', 'Bestattungsunternehmen', regex=False)
df_org['type']=df_org['type'].str.replace('transport', 'Verkehrswesen', regex=False)
df_org['type']=df_org['type'].str.replace('political', 'Politische Organisation', regex=False)
df_org['type']=df_org['type'].str.replace('press', 'Presse', regex=False)

# Convert the DataFrame to a list of dictionaries and remove null values
org_records = [remove_null_values(record) for record in df_org.to_dict(orient='records')]

# Wrap the list of person records in a dictionary with a "data" key
org_json_data = {
    "data": org_records
}

# Write the JSON to a file
output_file_path = 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/org_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(org_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
print(json.dumps(org_json_data, indent=4, ensure_ascii=False))

{
    "data": [
        {
            "ID": "JLLobmeyr",
            "name": "J. & L. Lobmeyr",
            "type": "Manufaktur",
            "idnos": "ID (WGW): 17959; ID (WIKIDATA): Q876883",
            "Website": "JLLobmeyr.html"
        },
        {
            "ID": "Gerold",
            "name": "Gerold & Co",
            "type": "Druckerei",
            "idnos": "ID (WGW): 19514",
            "Website": "Gerold.html"
        },
        {
            "ID": "Herrenhaus",
            "name": "Herrenhaus",
            "type": "Vertretungsorgan",
            "idnos": "ID (WGW): 12008",
            "Website": "Herrenhaus.html"
        },
        {
            "ID": "AkBild",
            "name": "Akademie der bildenden Künste",
            "type": "Kulturelle Einrichtung",
            "idnos": "ID (WGW): 17555; ID (GND): 2003761-2; ID (WIKIDATA): Q414219",
            "Website": "AkBild.html"
        },
        {
            "ID": "GenossenschaftKünstler",
            "name": "Genossen

  df_org = df_org.applymap(preprocess_value)


## **2.4. Relation json**

In [29]:
#CODE FOR JSON WITH EMPTY LINES
# Function to clean and preprocess data
#Created with u:ai
def preprocess_value(value):
    """
    Cleans and preprocesses a value:
    - Strips leading/trailing whitespace
    - Replaces empty strings or invalid data with None
    """
    if pd.isna(value) or str(value).strip() == '':
        return None
    return str(value).strip()

# Apply preprocessing to all cells in the DataFrame
df_relation = df_relations.applymap(preprocess_value)

# Function to remove keys with None values
def remove_null_values(record):
    """
    Removes keys with None values from a dictionary.
    """
    return {key: value for key, value in record.items() if value is not None}

# Convert the DataFrame to a list of dictionaries and remove null values
relation_records = [remove_null_values(record) for record in df_relation.to_dict(orient='records')]

# Wrap the list of person records in a dictionary with a "data" key
relation_json_data = {
    "data": relation_records
}

# Write the JSON to a file
output_file_path = 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/relation_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(relation_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
print(json.dumps(relation_json_data, indent=4, ensure_ascii=False))

{
    "data": [
        {
            "name": "parent",
            "active": "August Rath",
            "passive": "Marie Sch Kreuzenau, August Rathjun, Paul Rath, Stefan Rath"
        },
        {
            "name": "parent",
            "active": "Aloisia Lobmeyr",
            "passive": "Joseph Lobmeyr, Ludwig Lobmeyr, Franz Lobmeyr, Louise Kralik, Mathilde Rath"
        },
        {
            "name": "parent",
            "active": "Louise Kralik",
            "passive": "Richard Kralik, Mathilde Kralik, Aloisia Pichler"
        },
        {
            "name": "parent",
            "active": "Maia Kralik",
            "passive": "Dietrich Kralik, Heinrich Kralik, Rodrich Kralik"
        },
        {
            "name": "parent",
            "active": "Richard Kralik",
            "passive": "Dietrich Kralik, Heinrich Kralik, Rodrich Kralik"
        },
        {
            "name": "parent",
            "active": "Aloisia Pichler",
            "passive": "Ilse Pichler, Edith Pi

  df_relation = df_relations.applymap(preprocess_value)


# **3. Metadaten (letters, etc.) to json**

## **3.1. Opening files**

In [124]:
#Path to directory
path_briefe="C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/metadaten/metadaten_briefe.xlsx"
path_totenzettel="C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/metadaten/metadaten_totenzettel.xlsx"
path_manuskripte="C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/metadaten/metadaten_manuskripte.xlsx"

In [125]:
#Excel to pd
df_briefe=pd.read_excel(path_briefe)
df_totenzettel=pd.read_excel(path_totenzettel)
df_manuskripte=pd.read_excel(path_manuskripte)

## **3.2. Briefe to json**

In [120]:
#Originally created with u:ai, but adapted and changed manually
#Convert df to a list of dictionaries
briefe_records=df_briefe.to_dict(orient='records')

#Wrap list in data key
briefe_json_data = {
    "data": briefe_records
}

#Write JSON to file
output_file_path='C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/briefe_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(briefe_json_data, json_file, indent=4, ensure_ascii=False)

# Display the JSON structure (optional, for debugging)
#print(json.dumps(briefe_json_data, indent=4, ensure_ascii=False))

## **3.3. Totenzettel to json**

In [127]:
#Originally created with u:ai, but adapted and changed manually
#Convert df to a list of dictionaries
toten_records=df_totenzettel.to_dict(orient='records')

#Wrap list in data key
toten_json_data = {
    "data": toten_records
}

#Write JSON to file
output_file_path='C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/toten_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(toten_json_data, json_file, indent=4, ensure_ascii=False)

## **3.4. Manuskript to json**

In [126]:
#Originally created with u:ai, but adapted and changed manually
#Convert df to a list of dictionaries
manuskripte_records=df_manuskripte.to_dict(orient='records')

#Wrap list in data key
manuskripte_json_data = {
    "data": manuskripte_records
}

#Write JSON to file
output_file_path='C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/data/json/manuskript_index.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(manuskripte_json_data, json_file, indent=4, ensure_ascii=False)

# **4. Individual webpages**

## **4.1. Webpages person**

In [58]:
data = df_person.copy()

# Create output directory for the generated HTML files
output_dir = "C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/personen"
os.makedirs(output_dir, exist_ok=True)

In [59]:
# HTML template for the webpage
html_template = """<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
        <title>Edition Mathilde Kralik - {title}</title>
        <link rel="stylesheet" type="text/css" href="../../css/menu_bar.css">
        <link rel="stylesheet" type="text/css" href="../../css/eintrag.css">
        <link rel="preconnect" href="https://fonts.googleapis.com">
        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="anonymous">
        <link href="https://fonts.googleapis.com/css2?family=Merriweather:ital,opsz,wght@0,18..144,300..900&amp;1,18..144,300..900&amp;family=Montserrat:ital,wght@0,100..900&amp;1,100..900&amp;display=swap" rel="stylesheet">
    </head>
    <body>
        <!-- Navigation bar -->
                 <nav class="topnav" aria-label="Main Navigation">
                     <a href="../../index.html">Menü</a>
                     
                     <!-- Submenu for documents -->
                     <div class="subnav">
                         <div class="nav-toggle">Dokumente</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Briefe.html">Briefe</a>
                             <a href="../Manuskript.html">Manuskripte</a>
                             <a href="../Totenzettel.html">Totenzettel</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Entities -->
                     <div class="subnav">
                         <div class="nav-toggle">Verzeichnis</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Personen.html">Personen</a>
                             <a href="../Orte.html">Orte</a>
                             <a href="../Org.html">Organisationen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Visualisations -->
                     <div class="subnav">
                         <div class="nav-toggle">Visualisierungen</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Vis_Orte.html">Orte</a>
                             <a href="../Vis_Personen.html">Personennetzwerk</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Mathilde Kralik -->
                     <div class="subnav">
                         <div class="nav-toggle">Mathilde Kralik</div>
                         <div class="subnav-content" aria-label="Mathilde Kralik Submenu">
                             <a href="../Ressourcen.html">Ressourcen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Edition -->
                     <div class="subnav">
                         <div class="nav-toggle">Über die Edition</div>
                         <div class="subnav-content" aria-label="Edition Submenu">
                             <a href="../Projekt.html">Projekt</a>
                             <a href="../Transkription.html">Transkription</a>
                             <a href="https://github.com/baueranna/mathilde_kralik.git" target="_blank" rel="noopener noreferrer">Github</a>
                         </div>
                     </div>
                 </nav>
        <main>
            <h1>{title}</h1>
            <table>
                {table_rows}
            </table>
        </main>
        <script src="../../js/menu_bar.js"></script>
    </body>
</html>
"""

In [60]:

# Function to generate table rows dynamically based on non-empty data
def generate_table_rows(row):
    table_rows = ""
    if row.get("forename"):
        table_rows += f"<tr><th>Vorname</th><td>{row['forename']}</td></tr>\n"
    if row.get("surname"):
        table_rows += f"<tr><th>Nachname</th><td>{row['surname']}</td></tr>\n"
    if row.get("surname maiden"):
        table_rows += f"<tr><th>Geburtsname</th><td>{row['surname maiden']}</td></tr>\n"
    if row.get("death_date"):
        table_rows += f"<tr><th>Sterbedatum</th><td>{row['death_date']}</td></tr>\n"
    if row.get("occupations"):
        table_rows += f"<tr><th>Beruf</th><td>{row['occupations']}</td></tr>\n"
    if row.get("affiliations"):
        table_rows += f"<tr><th>Mitgliedschaft</th><td>{row['affiliations']}</td></tr>\n"
    if row.get("Wikidata"):
        table_rows += f"<tr><th>WIKIDATA</th><td><a href='https://www.wikidata.org/wiki/{row['Wikidata']}' class='info'>{row['Wikidata']}</a></td></tr>\n"
    if row.get("WGW"):
        table_rows += f"<tr><th>WGW</th><td><a href='https://www.geschichtewiki.wien.gv.at/Special:URIResolver/?curid={row['WGW']}' class='info'>{row['WGW']}</a></td></tr>\n"
    return table_rows
    if row.get("GND"):
        table_rows += f"<tr><th>GND</th><td><a href='https://d-nb.info/gnd/{row['GND']}' class='info'>{row['GND']}</a></td></tr>\n"
    return table_rows
    if row.get("Wikipedia"):
        table_rows += f"<tr><th>Wikipedia</th><td><a href='{row['Wikipedia']}' class='info'>{row['forename']} {row['surname']}</a></td></tr>\n"
    return table_rows
    if row.get("VIAF"):
        table_rows += f"<tr><th>VIAF</th><td><a href='https://viaf.org/de/viaf/{row['VIAF']}' class='info'>{row['VIAF']}</a></td></tr>\n"
    return table_rows
    if row.get("ÖBL"):
        table_rows += f"<tr><th>ÖBL</th><td><a href='{row['ÖBL']}' class='info'>{row['forename']} {row['surname']}</a></td></tr>\n"
    return table_rows
    if row.get("PMB"):
        table_rows += f"<tr><th>PMB</th><td><a href='https://pmb.acdh.oeaw.ac.at/apis/entities/entity/person/{row['PMB']}' class='info'>{row['PMB']}</a></td></tr>\n"
    return table_rows

# Iterate through each row in the CSV and generate an HTML file
for index, row in data.iterrows():
    # Generate the table rows dynamically
    table_rows = generate_table_rows(row)
    
    # Generate the HTML content
    html_content = html_template.format(
        title=f"{row.get('forename', '')} {row.get('surname', '')}",
        table_rows=table_rows
    )
    
    # Save the HTML file
    website = row.get("Website", "default.html")
    output_file = os.path.join(output_dir, website)
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

print(f"HTML files have been generated in the '{output_dir}' directory.")

HTML files have been generated in the 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/personen' directory.


## **4.2. Webpage places**

In [61]:
data2 = df_place.copy()

#Replace english nouns with german translation
data2['type']=data2['type'].str.replace('street', 'Straße', regex=False)
data2['type']=data2['type'].str.replace('church', 'Kirche', regex=False)
data2['type']=data2['type'].str.replace('city', 'Stadt', regex=False)
data2['type']=data2['type'].str.replace('village', 'Dorf', regex=False)
data2['type']=data2['type'].str.replace('museum', 'Museum', regex=False)
data2['type']=data2['type'].str.replace('cemetery', 'Friedhof', regex=False)

# Create output directory for the generated HTML files
output_dir2 = "C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/orte"
os.makedirs(output_dir2, exist_ok=True)

In [62]:
# HTML template for the webpage
html_template = """<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
        <title>Edition Mathilde Kralik - {title}</title>
        <link rel="stylesheet" type="text/css" href="../../css/menu_bar.css">
        <link rel="stylesheet" type="text/css" href="../../css/eintrag.css">
        <link rel="preconnect" href="https://fonts.googleapis.com">
        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="anonymous">
        <link href="https://fonts.googleapis.com/css2?family=Merriweather:ital,opsz,wght@0,18..144,300..900&amp;1,18..144,300..900&amp;family=Montserrat:ital,wght@0,100..900&amp;1,100..900&amp;display=swap" rel="stylesheet">
    </head>
    <body>
       <!-- Navigation bar -->
                 <nav class="topnav" aria-label="Main Navigation">
                     <a href="../../index.html">Menü</a>
                     
                     <!-- Submenu for documents -->
                     <div class="subnav">
                         <div class="nav-toggle">Dokumente</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Briefe.html">Briefe</a>
                             <a href="../Manuskript.html">Manuskripte</a>
                             <a href="../Totenzettel.html">Totenzettel</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Entities -->
                     <div class="subnav">
                         <div class="nav-toggle">Verzeichnis</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Personen.html">Personen</a>
                             <a href="../Orte.html">Orte</a>
                             <a href="../Org.html">Organisationen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Visualisations -->
                     <div class="subnav">
                         <div class="nav-toggle">Visualisierungen</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Vis_Orte.html">Orte</a>
                             <a href="../Vis_Personen.html">Personennetzwerk</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Mathilde Kralik -->
                     <div class="subnav">
                         <div class="nav-toggle">Mathilde Kralik</div>
                         <div class="subnav-content" aria-label="Mathilde Kralik Submenu">
                             <a href="../Ressourcen.html">Ressourcen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Edition -->
                     <div class="subnav">
                         <div class="nav-toggle">Über die Edition</div>
                         <div class="subnav-content" aria-label="Edition Submenu">
                             <a href="../Projekt.html">Projekt</a>
                             <a href="../Transkription.html">Transkription</a>
                             <a href="https://github.com/baueranna/mathilde_kralik.git" target="_blank" rel="noopener noreferrer">Github</a>
                         </div>
                     </div>
                 </nav>
        <main>
            <h1>{title}</h1>
            <table>
                {table_rows}
            </table>
        </main>
        <script src="../../js/menu_bar.js"></script>
    </body>
</html>
"""

In [63]:
# Function to generate table rows dynamically based on non-empty data
def generate_table_rows(row):
    table_rows = ""
    if row.get("name"):
        table_rows += f"<tr><th>Name</th><td>{row['name']}</td></tr>\n"
    if row.get("type"):
        table_rows += f"<tr><th>Art</th><td>{row['type']}</td></tr>\n"
    if row.get("geo"):
        table_rows += f"<tr><th>Geodaten</th><td>{row['geo']}</td></tr>\n"
    if row.get("WIKIDATA"):
        table_rows += f"<tr><th>WIKIDATA</th><td><a href='https://www.wikidata.org/wiki/{row['WIKIDATA']}' class='info'>{row['WIKIDATA']}</a></td></tr>\n"
    if row.get("WGW"):
        table_rows += f"<tr><th>WGW</th><td><a href='https://www.geschichtewiki.wien.gv.at/Special:URIResolver/?curid={row['WGW']}' class='info'>{row['WGW']}</a></td></tr>\n"
    return table_rows
    if row.get("GND"):
        table_rows += f"<tr><th>GND</th><td><a href='https://d-nb.info/gnd/{row['GND']}' class='info'>{row['GND']}</a></td></tr>\n"
    return table_rows
    if row.get("Wikipedia"):
        table_rows += f"<tr><th>Wikipedia</th><td><a href='{row['Wikipedia']}' class='info'>{row['Wikipedia']}</a></td></tr>\n"
    return table_rows
    if row.get("Geonames"):
        table_rows += f"<tr><th>Geonames</th><td><a href='{row['Geonames']}' class='info'>{row['Geonames']}</a></td></tr>\n"
    return table_rows

# Iterate through each row in the CSV and generate an HTML file
for index, row in data2.iterrows():
    # Generate the table rows dynamically
    table_rows = generate_table_rows(row)
    
    # Generate the HTML content
    html_content = html_template.format(
        title=f"{row.get('name', '')}",
        table_rows=table_rows
    )
    
    # Save the HTML file
    website = row.get("Website", "default.html")
    output_file = os.path.join(output_dir2, website)
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

print(f"HTML files have been generated in the '{output_dir2}' directory.")

HTML files have been generated in the 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/orte' directory.


## **4.3. Website org**

In [64]:
data3 = df_org.copy()

#Replace english words with german translation
data3['type']=data3['type'].str.replace('manufactory', 'Manufaktur', regex=False)
data3['type']=data3['type'].str.replace('printer', 'Druckerei', regex=False)
data3['type']=data3['type'].str.replace('representativ', 'Vertretungsorgan', regex=False)
data3['type']=data3['type'].str.replace('cultural', 'Kulturelle Einrichtung', regex=False)
data3['type']=data3['type'].str.replace('paper', 'Papierwaren', regex=False)
data3['type']=data3['type'].str.replace('funeral', 'Bestattungsunternehmen', regex=False)
data3['type']=data3['type'].str.replace('transport', 'Verkehrswesen', regex=False)
data3['type']=data3['type'].str.replace('political', 'Politische Organisation', regex=False)
data3['type']=data3['type'].str.replace('press', 'Presse', regex=False)

# Create output directory for the generated HTML files
output_dir3 = "C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/org"
os.makedirs(output_dir3, exist_ok=True)

In [65]:
# HTML template for the webpage
html_template = """<!DOCTYPE HTML>
<html>
    <head>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
        <title>Edition Mathilde Kralik - {title}</title>
        <link rel="stylesheet" type="text/css" href="../../css/menu_bar.css">
        <link rel="stylesheet" type="text/css" href="../../css/eintrag.css">
        <link rel="preconnect" href="https://fonts.googleapis.com">
        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="anonymous">
        <link href="https://fonts.googleapis.com/css2?family=Merriweather:ital,opsz,wght@0,18..144,300..900&amp;1,18..144,300..900&amp;family=Montserrat:ital,wght@0,100..900&amp;1,100..900&amp;display=swap" rel="stylesheet">
    </head>
    <body>
        <!-- Navigation bar -->
                 <nav class="topnav" aria-label="Main Navigation">
                     <a href="../../index.html">Menü</a>
                     
                     <!-- Submenu for documents -->
                     <div class="subnav">
                         <div class="nav-toggle">Dokumente</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Briefe.html">Briefe</a>
                             <a href="../Manuskript.html">Manuskripte</a>
                             <a href="../Totenzettel.html">Totenzettel</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Entities -->
                     <div class="subnav">
                         <div class="nav-toggle">Verzeichnis</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Personen.html">Personen</a>
                             <a href="../Orte.html">Orte</a>
                             <a href="../Org.html">Organisationen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Visualisations -->
                     <div class="subnav">
                         <div class="nav-toggle">Visualisierungen</div>
                         <div class="subnav-content" aria-label="Submenu">
                             <a href="../Vis_Orte.html">Orte</a>
                             <a href="../Vis_Personen.html">Personennetzwerk</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Mathilde Kralik -->
                     <div class="subnav">
                         <div class="nav-toggle">Mathilde Kralik</div>
                         <div class="subnav-content" aria-label="Mathilde Kralik Submenu">
                             <a href="../Ressourcen.html">Ressourcen</a>
                         </div>
                     </div>
                     
                     <!-- Submenu for Edition -->
                     <div class="subnav">
                         <div class="nav-toggle">Über die Edition</div>
                         <div class="subnav-content" aria-label="Edition Submenu">
                             <a href="../Projekt.html">Projekt</a>
                             <a href="../Transkription.html">Transkription</a>
                             <a href="https://github.com/baueranna/mathilde_kralik.git" target="_blank" rel="noopener noreferrer">Github</a>
                         </div>
                     </div>
                 </nav>
        <main>
            <h1>{title}</h1>
            <table>
                {table_rows}
            </table>
        </main>
        <script src="../../js/menu_bar.js"></script>
    </body>
</html>
"""

In [66]:
# Function to generate table rows dynamically based on non-empty data
def generate_table_rows(row):
    table_rows = ""
    if row.get("name"):
        table_rows += f"<tr><th>Name</th><td>{row['name']}</td></tr>\n"
    if row.get("type"):
        table_rows += f"<tr><th>Art</th><td>{row['type']}</td></tr>\n"
    if row.get("WIKIDATA"):
        table_rows += f"<tr><th>WIKIDATA</th><td><a href='https://www.wikidata.org/wiki/{row['WIKIDATA']}' class='info'>{row['WIKIDATA']}</a></td></tr>\n"
    if row.get("WGW"):
        table_rows += f"<tr><th>WGW</th><td><a href='https://www.geschichtewiki.wien.gv.at/Special:URIResolver/?curid={row['WGW']}' class='info'>{row['WGW']}</a></td></tr>\n"
    return table_rows
    if row.get("GND"):
        table_rows += f"<tr><th>GND</th><td><a href='https://d-nb.info/gnd/{row['GND']}' class='info'>{row['GND']}</a></td></tr>\n"
    return table_rows
    if row.get("Wikipedia"):
        table_rows += f"<tr><th>Wikipedia</th><td><a href='{row['Wikipedia']}' class='info'>{row['Wikipedia']}</a></td></tr>\n"
    return table_rows

# Iterate through each row in the CSV and generate an HTML file
for index, row in data3.iterrows():
    # Generate the table rows dynamically
    table_rows = generate_table_rows(row)
    
    # Generate the HTML content
    html_content = html_template.format(
        title=f"{row.get('name', '')}",
        table_rows=table_rows
    )
    
    # Save the HTML file
    website = row.get("Website", "default.html")
    output_file = os.path.join(output_dir3, website)
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(html_content)

print(f"HTML files have been generated in the '{output_dir3}' directory.")

HTML files have been generated in the 'C:/Users/annab/OneDrive/Desktop/Kralik/mathilde_kralik/html/org' directory.
