# Datenbasis-Analyse für die Masterarbeit

Dieses Notebook dient der exemplarischen Darstellung von zwei ausgewählten Logos und deren zugehörigen Metadaten. Die hier präsentierten Daten sind Teil der Grundlage für das Training des Diffusionsmodells.

In [23]:
import pandas as pd
from PIL import Image
from IPython.display import display, HTML
import os
import base64
from io import BytesIO

# --- Konfiguration ---
METADATA_BEFORE_PATH = '../output/amazing_logos_v4/data/amazing_logos_v4_cleanup/metadata.csv'
METADATA_AFTER_PATH = '../output/amazing_logos_v4/data/meta_postprep/metadata_filtered_by_maps.csv'
IMAGE_DIR = '../output/amazing_logos_v4/images/balanced_sample_2k_512x512'
IDS_TO_DISPLAY = ['amazing_logo_v4000185', 'amazing_logo_v4054788']

# --- Daten laden ---
try:
    df_before = pd.read_csv(METADATA_BEFORE_PATH)
    df_after = pd.read_csv(METADATA_AFTER_PATH)
    print("Metadaten (Vorher und Nachher) erfolgreich geladen.")
except FileNotFoundError as e:
    print(f"Fehler beim Laden der Dateien: {e}")
    df_before, df_after = None, None

Metadaten (Vorher und Nachher) erfolgreich geladen.


In [24]:
# --- Tags im "Nachher"-DataFrame formatieren ---
if df_after is not None:
    # Sicherstellen, dass die 'tags'-Spalte existiert
    if 'tags' in df_after.columns:
        # Funktion, um die Tags zu formatieren
        def format_tags(tags_str):
            if pd.isna(tags_str):
                return '' # Leeren String für NaN-Werte zurückgeben
            
            # Tags aufteilen, Leerzeichen entfernen und wieder zusammenfügen
            tags_list = [tag.strip() for tag in str(tags_str).split(',')]
            # Leere Tags herausfiltern, die durch doppelte Kommas o.ä. entstehen könnten
            tags_list = [tag for tag in tags_list if tag]
            return ', '.join(tags_list)

        # Die Funktion auf die 'tags'-Spalte anwenden
        df_after['tags'] = df_after['tags'].apply(format_tags)
        print("Die 'tags'-Spalte im 'Nachher'-DataFrame wurde erfolgreich formatiert.")
    else:
        print("Die Spalte 'tags' wurde im 'Nachher'-DataFrame nicht gefunden.")

Die 'tags'-Spalte im 'Nachher'-DataFrame wurde erfolgreich formatiert.


In [25]:
df

Unnamed: 0,id,category_main,category,description,tags,company
0,amazing_logo_v4000185,manufacturing_transport,textiles_manufacturing,Drop Oval United states Water,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Santa Fe By Design
1,amazing_logo_v4000313,food_beverage,restaurant_dining,Bird Fork United states,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Bluebird Cafe
2,amazing_logo_v4000469,retail_hospitality,home_improvement,A I Italy N,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Collezione Nai
3,amazing_logo_v4000481,manufacturing_transport,automotive_transport,A Arrow France Line Triangle,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Renault Alpine
4,amazing_logo_v4000523,entertainment_sports_media,arts_culture,Circle Germany Line Rotation Spiral Swirl,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Rotation
...,...,...,...,...,...,...
1805,amazing_logo_v4396621,retail_hospitality,home_improvement,citadel apartment rook commercial castle build...,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Citadel Construction 3
1806,amazing_logo_v4396812,real_estate_construction,real_estate_development,house Architecture development 3D minimal,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Phillip Smith Developers
1807,amazing_logo_v4397131,health,medical_specialty,vitality healthy nature green,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for Sander Apotheke
1808,amazing_logo_v4397134,food_beverage,brewery_alcohol,Promo,"successful_vibe, minimalist, thoughtprovoking,...",Simple elegant logo for TCD


In [30]:
# --- Daten filtern und vergleichen ---
if df_before is not None and df_after is not None:
    # DataFrames filtern
    filtered_df_before = df_before[df_before['id'].isin(IDS_TO_DISPLAY)].set_index('id')
    filtered_df_after = df_after[df_after['id'].isin(IDS_TO_DISPLAY)].set_index('id')

    # Deutsche Bezeichnungen für die Spalten
    column_labels_after = {
        'company': 'Firma',
        'description': 'Beschreibung',
        'category': 'Kategorie',
        'category_main': 'Hauptkategorie',
        'tags': 'Tags'
    }

    # Container für die gesamte Ausgabe
    html_str = "<div style='font-family: Arial, sans-serif; font-size: 18px;'>"

    # Iteration über die ausgewählten IDs
    for logo_id in IDS_TO_DISPLAY:
        # Daten für die ID holen
        try:
            row_before = filtered_df_before.loc[logo_id]
            row_after = filtered_df_after.loc[logo_id]
        except KeyError:
            print(f"Logo-ID {logo_id} nicht in beiden DataFrames gefunden. Wird übersprungen.")
            continue

        # --- "Vorher"-Metadaten formatieren ---
        meta_before_html = "<div style='margin-right: 10px; flex: 1;'>"
        meta_before_html += "<h3>Vor der Bereinigung</h3>"
        meta_before_html += "<table style='border-collapse: collapse; width: 100%; font-size: 18px;'>"
        meta_before_html += f"<tr><td style='padding: 4px; border: 1px solid #ddd; background-color: #f2f2f2;'><b>Caption:</b></td><td style='padding: 4px; border: 1px solid #ddd;'><i>{row_before['text']}</i></td></tr>"
        meta_before_html += "</table></div>"

        # --- "Nachher"-Metadaten formatieren ---
        meta_after_html = "<div style='flex: 1;'>"
        meta_after_html += "<h3>Nach der Bereinigung</h3>"
        meta_after_html += "<table style='border-collapse: collapse; width: 100%; font-size: 18px;'>"
        for col_name, col_value in row_after.items():
            if col_name == 'id': continue
            label = column_labels_after.get(col_name, col_name.capitalize())
            
            # Umgang mit NaN-Werten
            display_value = '' if pd.isna(col_value) else col_value
            value_display = f"<i>{display_value}</i>" if col_name == 'description' else display_value
            
            meta_after_html += f"<tr><td style='padding: 4px; border: 1px solid #ddd; background-color: #f2f2f2;'><b>{label}:</b></td><td style='padding: 4px; border: 1px solid #ddd;'>{value_display}</td></tr>"
        meta_after_html += "</table></div>"

        # --- Alles für dieses Logo zusammenfügen ---
        html_str += f"<div style='margin-bottom: 40px; border-top: 2px solid #eee; padding-top: 20px;'>"
        html_str += "<div style='display: flex; justify-content: space-between; width: 100%; gap: 20px;'>"
        html_str += meta_before_html
        html_str += meta_after_html
        html_str += "</div></div>"

    html_str += "</div>"
    display(HTML(html_str))
else:
    print("Einer der DataFrames konnte nicht geladen werden. Vergleich nicht möglich.")

0,1
Caption:,"Simple elegant logo for Santa Fe By Design, Drop Oval United states Water, Manufacturing, successful vibe, minimalist, thought-provoking, abstract, recognizable, relatable, sharp, vector art, even edges, black and white"

0,1
Hauptkategorie:,manufacturing_transport
Kategorie:,textiles_manufacturing
Beschreibung:,Drop Oval United states Water
Tags:,"successful_vibe, minimalist, thoughtprovoking, abstract, recognizable, relatable, sharp, vector_art, even_edges, black_and_white"
Firma:,Simple elegant logo for Santa Fe By Design

0,1
Caption:,"Simple elegant logo for Aziz Firaat, Read Book Tick Checkmark Todo List, Website, successful vibe, minimalist, thought-provoking, abstract, recognizable, relatable, sharp, vector art, even edges"

0,1
Hauptkategorie:,tech
Kategorie:,web_digital
Beschreibung:,Read Book Tick Checkmark Todo List
Tags:,"successful_vibe, minimalist, thoughtprovoking, abstract, recognizable, relatable, sharp, vector_art, even_edges"
Firma:,Simple elegant logo for Aziz Firaat
