<a href="https://colab.research.google.com/github/hsandaver/hsandaver/blob/main/Manifest_Enricher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install requests
!pip install networkx
!pip install plotly
!pip install ipywidgets

# Enable ipywidgets in Colab
from google.colab import output
output.enable_custom_widget_manager()

# Import necessary libraries
import json
import requests
import networkx as nx
import plotly.graph_objs as go
from ipywidgets import widgets, VBox, HBox, Layout
from IPython.display import display, clear_output
import plotly.io as pio

# Set Plotly renderer to 'colab'
pio.renderers.default = 'colab'

# Input widgets for IIIF Manifest URIs and Linked Data Entity URIs
iiif_manifest_uris_input = widgets.Textarea(
    value='',
    placeholder='Enter IIIF Manifest URIs (one per line)',
    description='IIIF Manifests:',
    layout=Layout(width='50%', height='200px')
)

linked_data_uris_input = widgets.Textarea(
    value='',
    placeholder='Enter Linked Data Entity URIs (one per line)',
    description='Linked Data URIs:',
    layout=Layout(width='50%', height='200px')
)

# Button to start processing
process_button = widgets.Button(
    description='Process',
    button_style='success',
    layout=Layout(width='200px')
)

# Output area
output_area = widgets.Output()

# Display the UI
display(VBox([HBox([iiif_manifest_uris_input, linked_data_uris_input]), process_button, output_area]))

# Define functions

def fetch_linked_data_entities(uris):
    linked_data_entities = []
    for uri in uris:
        try:
            response = requests.get(uri, headers={'Accept': 'application/ld+json'})
            if response.status_code == 200:
                data = response.json()
                linked_data_entities.append(data)
            else:
                print(f"Failed to fetch linked data entity from {uri}")
        except Exception as e:
            print(f"Error fetching linked data entity from {uri}: {e}")
    return linked_data_entities

def fetch_iiif_manifests(uris):
    iiif_manifests = []
    for uri in uris:
        try:
            response = requests.get(uri)
            if response.status_code == 200:
                data = response.json()
                iiif_manifests.append(data)
            else:
                print(f"Failed to fetch IIIF manifest from {uri}")
        except Exception as e:
            print(f"Error fetching IIIF manifest from {uri}: {e}")
    return iiif_manifests

def extract_artist_info(linked_data_entity):
    artist_info = {}
    pref_label = linked_data_entity.get('prefLabel', {})
    artist_info['preferred_name'] = pref_label.get('en', next(iter(pref_label.values()), 'Unknown'))

    alt_labels = linked_data_entity.get('altLabel', {})
    artist_info['alternative_names'] = []
    for labels in alt_labels.values():
        if isinstance(labels, list):
            artist_info['alternative_names'].extend(labels)
        else:
            artist_info['alternative_names'].append(labels)

    dob_entry = linked_data_entity.get('dateOfBirth', [{}])[0]
    artist_info['date_of_birth'] = dob_entry.get('time:inXSDDateTimeStamp', {}).get('@value', 'Unknown')[:10]

    dod_entry = linked_data_entity.get('dateOfDeath', [{}])[0]
    artist_info['date_of_death'] = dod_entry.get('time:inXSDDateTimeStamp', {}).get('@value', 'Unknown')[:10]

    description = linked_data_entity.get('description', {})
    artist_info['description'] = description.get('en', next(iter(description.values()), 'No description available.'))

    artist_info['id'] = linked_data_entity.get('id', 'Unknown')

    return artist_info

def remove_duplicate_metadata(metadata_list):
    seen = set()
    unique_metadata = []
    for item in metadata_list:
        key = (item['label'], item['value'])
        if key not in seen:
            seen.add(key)
            unique_metadata.append(item)
    return unique_metadata

def enrich_iiif_manifests(iiif_manifests, artists_info):
    enriched_manifests = []
    for manifest in iiif_manifests:
        manifest_metadata = manifest.get('metadata', [])
        # Check if any artist is related to the manifest
        for artist_info in artists_info:
            artist_names = [artist_info['preferred_name']] + artist_info['alternative_names']
            # Search for matching creator/artist metadata
            for item in manifest_metadata:
                label = item.get('label', '').strip().lower()
                value = item.get('value', '')
                if label in ['creator', 'artist', 'author']:
                    if any(name.lower() in value.lower() for name in artist_names):
                        # Enrich the manifest
                        new_metadata_entries = [
                            {'label': 'Artist Preferred Name', 'value': artist_info['preferred_name']},
                            {'label': 'Artist Alternative Names', 'value': ', '.join(artist_info['alternative_names'])},
                            {'label': 'Artist Date of Birth', 'value': artist_info['date_of_birth']},
                            {'label': 'Artist Date of Death', 'value': artist_info['date_of_death']},
                            {'label': 'Artist Description', 'value': artist_info['description']},
                            {'label': 'Artist Linked Data ID', 'value': artist_info['id']}
                        ]
                        manifest['metadata'].extend(new_metadata_entries)
                        # Remove duplicates
                        manifest['metadata'] = remove_duplicate_metadata(manifest['metadata'])
                        break  # Stop checking after enriching with one artist
        enriched_manifests.append(manifest)
    return enriched_manifests

def create_relationship_graph(iiif_manifests, artists_info):
    G = nx.Graph()

    # Add artist nodes
    for artist_info in artists_info:
        G.add_node(artist_info['preferred_name'], type='artist', description=artist_info['description'])

    # Add artwork nodes and edges
    for manifest in iiif_manifests:
        manifest_label = manifest.get('label', 'Unknown')
        G.add_node(manifest_label, type='artwork')
        # Check if the manifest has artist metadata
        for item in manifest.get('metadata', []):
            label = item.get('label', '').strip().lower()
            value = item.get('value', '')
            if label in ['creator', 'artist', 'author']:
                for artist_info in artists_info:
                    artist_names = [artist_info['preferred_name']] + artist_info['alternative_names']
                    if any(name.lower() in value.lower() for name in artist_names):
                        G.add_edge(artist_info['preferred_name'], manifest_label)
                        break  # Assume one artist per artwork for simplicity

    # Use a faster layout algorithm
    pos = nx.kamada_kawai_layout(G)

    # Create Plotly graph
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=1, color='#888'),
        hoverinfo='none',
        mode='lines'
    )

    node_x = []
    node_y = []
    node_text = []
    node_color = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_type = G.nodes[node].get('type', '')
        if node_type == 'artist':
            node_color.append('blue')
            hover_text = f"{node}<br>{G.nodes[node].get('description', '')}"
        else:
            node_color.append('orange')
            hover_text = node
        node_text.append(hover_text)
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        text=[node for node in G.nodes()],
        textposition='top center',
        hoverinfo='text',
        hovertext=node_text,
        marker=dict(
            color=node_color,
            size=20,
            line_width=2
        )
    )
    fig = go.Figure(
        data=[edge_trace, node_trace],
        layout=go.Layout(
            title='Artist and Artwork Relationships',
            showlegend=False,
            hovermode='closest',
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(
                showgrid=False,
                showticklabels=False,
                zeroline=False
            ),
            yaxis=dict(
                showgrid=False,
                showticklabels=False,
                zeroline=False
            )
        )
    )
    # Display the figure explicitly with the 'colab' renderer
    fig.show(renderer='colab')

def on_process_button_clicked(b):
    with output_area:
        clear_output()
        # Get URIs from input widgets
        iiif_manifest_uris = iiif_manifest_uris_input.value.strip().split('\n')
        linked_data_uris = linked_data_uris_input.value.strip().split('\n')
        iiif_manifest_uris = [uri.strip() for uri in iiif_manifest_uris if uri.strip()]
        linked_data_uris = [uri.strip() for uri in linked_data_uris if uri.strip()]

        if not iiif_manifest_uris or not linked_data_uris:
            print("Please enter at least one IIIF manifest URI and one linked data entity URI.")
            return

        # Fetch data
        print("Fetching linked data entities...")
        linked_data_entities = fetch_linked_data_entities(linked_data_uris)
        print("Fetching IIIF manifests...")
        iiif_manifests = fetch_iiif_manifests(iiif_manifest_uris)

        # Extract artist info
        print("Extracting artist information...")
        artists_info = [extract_artist_info(entity) for entity in linked_data_entities]

        # Enrich IIIF manifests
        print("Enriching IIIF manifests...")
        enriched_manifests = enrich_iiif_manifests(iiif_manifests, artists_info)

        # Save enriched manifests
        for idx, manifest in enumerate(enriched_manifests):
            filename = f'enriched_manifest_{idx+1}.json'
            with open(filename, 'w', encoding='utf-8') as file:
                json.dump(manifest, file, ensure_ascii=False, indent=2)
            print(f"Enriched manifest saved to {filename}")

        # Create visualization
        print("Creating visualization...")
        create_relationship_graph(enriched_manifests, artists_info)

# Link the button to the callback function
process_button.on_click(on_process_button_clicked)
