In [1]:
## env setup
%env OPENAI_API_KEY = "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"
# %pip install openai 
# %pip install networkx
# %pip install matplotlib
# %pip install pymongo

env: OPENAI_API_KEY="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"


In [3]:
# Uncomment the following line if running in a Jupyter notebook
# %pip install pyvis matplotlib ipywidgets networkx openai pymongo

import networkx as nx
from openai import OpenAI
import json
import os
from itertools import combinations
from networkx.readwrite import json_graph
from pymongo import MongoClient
from datetime import datetime  # For timestamps
from pyvis.network import Network  # For visualization
import matplotlib.colors as mcolors
from matplotlib import cm
from string import Template  # For templating JavaScript code
import hashlib

# -------------------- Configuration -------------------- #

# Adjustable weights for connectivity scores
visual_weight = 0.5    # Weight for visual connectivity score
narrative_weight = 0.5 # Weight for narrative connectivity score

# Ensure that the sum of weights is not zero to avoid division by zero
if visual_weight + narrative_weight == 0:
    raise ValueError("The sum of visual_weight and narrative_weight must be greater than zero.")

# -------------------- Initialize Clients -------------------- #

# Initialize the OpenAI client
# Ensure that your OpenAI API key is set in the environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("OpenAI API key not found in environment variables.")
client = OpenAI(api_key=openai_api_key)

# MongoDB Connection
try:
    mongo_client = MongoClient("mongodb+srv://btonyip:Kbd0PvJSa6yL0Ui7@spatialcluster.tbvav.mongodb.net/spatial?retryWrites=true&w=majority&appName=SpatialCluster")
    db = mongo_client['spatial']
    collection = db['Artworks']
except Exception as e:
    print(f"Error connecting to MongoDB: {e}")
    exit(1)

# -------------------- Data Fetching -------------------- #

def fetch_artworks_from_db(limit=22):
    """
    Fetches artworks from the MongoDB collection.
    
    Parameters:
        limit (int): Number of artworks to fetch.
    
    Returns:
        list: A list of artwork dictionaries.
    """
    try:
        artworks_data = collection.find({}).limit(limit)  # Limit to first 'limit' items
    except Exception as e:
        print(f"Error fetching data from MongoDB: {e}")
        return []
    
    artworks = []
    for artwork in artworks_data:
        artworks.append({
            "name": artwork.get("title", "Untitled"),
            "artist": artwork.get("artist_name", "Unknown Artist"),
            "description": f"description: {artwork.get('description', '')} "
                           f"historical significance: {artwork.get('historical_significance', '')} "
                           f"style significance: {artwork.get('style_significance', '')} "
                           f"exhibition utilisation: {artwork.get('exhibition_utilisation', '')}",
            "imageurl": artwork.get("image", "")
        })
    return artworks

# Fetch artworks from the database
artworks = fetch_artworks_from_db()

# -------------------- Graph Initialization -------------------- #

graph_file = 'artworks_graph.json'
if os.path.exists(graph_file):
    with open(graph_file, 'r') as f:
        data = json.load(f)
        G = json_graph.node_link_graph(data)
    print("Graph loaded from", graph_file)
else:
    G = nx.Graph()
    print("Initialized a new graph.")

# Add artworks as nodes
for artwork in artworks:
    if not G.has_node(artwork['name']):
        G.add_node(
            artwork['name'],
            artist=artwork['artist'],
            description=artwork['description'],
            imageurl=artwork['imageurl']
        )
        print(f"Added node: {artwork['name']}")
    else:
        print(f"Node already exists: {artwork['name']}")

# -------------------- Connectivity Scores Computation -------------------- #

rubrics = "Give me the connectivity score of these two artworks"

# Generate all unique pairs of artworks
artwork_pairs = list(combinations(artworks, 2))

# Function to compute connectivity scores using OpenAI API
def compute_connectivity_scores(artwork_a, artwork_b):
    """
    Computes the visual and narrative connectivity scores between two artworks.
    
    Parameters:
        artwork_a (dict): Artwork A data.
        artwork_b (dict): Artwork B data.
    
    Returns:
        dict: Connectivity scores and reasoning.
    """
    messages = [
        {
            "role": "system",
            "content": "You are an art expert who evaluates the connectivity between two artworks based on their visual and narrative aspects."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": rubrics},
                {"type": "text", "text": f"Artwork A: {artwork_a['name']} by {artwork_a['artist']}"},
                {"type": "text", "text": "Description:"},
                {"type": "text", "text": artwork_a["description"]},
                {"type": "image_url", "image_url": {"url": artwork_a["imageurl"]}},
                {"type": "text", "text": f"Artwork B: {artwork_b['name']} by {artwork_b['artist']}"},
                {"type": "text", "text": "Description:"},
                {"type": "text", "text": artwork_b["description"]},
                {"type": "image_url", "image_url": {"url": artwork_b["imageurl"]}},
            ]
        }
    ]
    
    # Define the response format using JSON schema
    response_format = {
        "type": "json_schema",
        "json_schema": {
            "name": "connectivity_score_schema",
            "schema": {
                "type": "object",
                "properties": {
                    "visual_connectivity_score": {
                        "type": "number",
                        "description": "The visual connectivity score between the two artworks"
                    },
                    "visual_reasoning": {
                        "type": "string",
                        "description": "Reasoning for the visual connectivity score"
                    },
                    "narrative_connectivity_score": {
                        "type": "number",
                        "description": "The narrative connectivity score between the two artworks"
                    },
                    "narrative_reasoning": {
                        "type": "string",
                        "description": "Reasoning for the narrative connectivity score"
                    }
                },
                "required": [
                    "visual_connectivity_score",
                    "visual_reasoning",
                    "narrative_connectivity_score",
                    "narrative_reasoning"
                ],
                "additionalProperties": False
            }
        }
    }
    
    try:
        # Create the chat completion
        completion = client.chat.completions.create(
            model="gpt-4",
            messages=messages,
            response_format=response_format
        )
        # Parse the JSON response
        response_content = completion.choices[0].message.content
        connectivity_data = json.loads(response_content)
        return connectivity_data
    except Exception as e:
        print(f"Error computing connectivity scores: {e}")
        return None

# Compute and add edges with connectivity scores
for artwork_a, artwork_b in artwork_pairs:
    if not G.has_edge(artwork_a['name'], artwork_b['name']):
        print(f"Processing pair: {artwork_a['name']} - {artwork_b['name']}")
        connectivity_data = compute_connectivity_scores(artwork_a, artwork_b)
        if connectivity_data:
            G.add_edge(
                artwork_a['name'],
                artwork_b['name'],
                visual_connectivity_score=connectivity_data['visual_connectivity_score'],
                visual_reasoning=connectivity_data['visual_reasoning'],
                narrative_connectivity_score=connectivity_data['narrative_connectivity_score'],
                narrative_reasoning=connectivity_data['narrative_reasoning']
            )
            print(f"Added edge: {artwork_a['name']} - {artwork_b['name']}")
        else:
            print(f"Failed to add edge for pair: {artwork_a['name']} - {artwork_b['name']}")

# -------------------- Save and Insert Graph -------------------- #

# Save the graph to JSON
data = json_graph.node_link_data(G)  # Node-link format to serialize

with open(graph_file, 'w') as f:
    json.dump(data, f, indent=4)
    print(f"Graph saved to {graph_file}")

# Insert the graph JSON into MongoDB
graph_collection = db['ArtworksGraph']  # Define a new collection for the graph

# Prepare the document to insert
graph_document = {
    "graph": data,
    "created_at": datetime.utcnow()
}

# Insert the document into the ArtworksGraph collection
try:
    graph_hash = hashlib.sha256(json.dumps(data).encode()).hexdigest()
    if graph_collection.find_one({"hash": graph_hash}):
        print("Graph already exists in the database.")
    else:
        graph_document["hash"] = graph_hash
        result = graph_collection.insert_one(graph_document)

except Exception as e:
    print(f"Error inserting graph into MongoDB: {e}")

# -------------------- Pyvis Visualization with Sliders -------------------- #

# Initialize Pyvis network without the 'physics' parameter
net = Network(height='750px', width='100%', notebook=False, directed=False)

# Apply Force Atlas 2 layout with customized physics settings
net.force_atlas_2based(
    gravity=-30,
    central_gravity=0.0005,
    spring_length=20,
    spring_strength=0.3,
    damping=0.4,
    overlap=0
)

# Convert NetworkX graph to Pyvis graph
net.from_nx(G)

# Customize node appearance
for node in net.nodes:
    node_id = node['id']
    node['title'] = f"<b>{node_id}</b><br>Artist: {G.nodes[node_id]['artist']}<br>Description: {G.nodes[node_id]['description']}"
    node['label'] = node_id  # Use artwork name as label
    node['value'] = 10  # Adjust node size
    node['color'] = 'lightgreen'  # Node color

# Compute min and max for normalization and slider ranges
if G.edges():
    min_visual = min([G[u][v]['visual_connectivity_score'] for u, v in G.edges()])
    max_visual = max([G[u][v]['visual_connectivity_score'] for u, v in G.edges()])
    min_narrative = min([G[u][v]['narrative_connectivity_score'] for u, v in G.edges()])
    max_narrative = max([G[u][v]['narrative_connectivity_score'] for u, v in G.edges()])
else:
    min_visual = max_visual = min_narrative = max_narrative = 0

# Ensure the sliders have a valid range
if min_visual == max_visual:
    max_visual += 1  # Avoid zero range
if min_narrative == max_narrative:
    max_narrative += 1  # Avoid zero range

# Normalize function
def normalize(value, min_val, max_val):
    if max_val - min_val == 0:
        return 0.5  # Avoid division by zero; neutral value
    return (value - min_val) / (max_val - min_val)

# Define a colormap
cmap = cm.get_cmap('coolwarm')  # Choose any colormap you prefer

# Customize edge appearance based on connectivity scores
for edge in net.edges:
    u = edge['from']
    v = edge['to']
    visual_score = G[u][v].get('visual_connectivity_score', 0)
    narrative_score = G[u][v].get('narrative_connectivity_score', 0)
    
    # Normalize the scores
    norm_visual = normalize(visual_score, min_visual, max_visual)
    norm_narrative = normalize(narrative_score, min_narrative, max_narrative)
    
    # Compute weighted overall score
    overall_score = (visual_weight * norm_visual) + (narrative_weight * norm_narrative)
    
    # Adjust edge width based on overall_score
    edge['width'] = 1 + overall_score * 5  # Base width plus scaled connectivity
    
    # Set edge color using colormap
    rgba_color = cmap(overall_score)
    hex_color = mcolors.to_hex(rgba_color)
    edge['color'] = hex_color
    
    # Add title for hover information
    edge['title'] = f"Visual Score: {visual_score}<br>Narrative Score: {narrative_score}"
    
    # Include the scores in edge data for filtering
    edge['data'] = {
        'visual_connectivity_score': visual_score,
        'narrative_connectivity_score': narrative_score
    }
    
    # Set initial edge length inversely proportional to overall_score
    # Higher overall_score => shorter length
    edge['length'] = 300 * (1 - overall_score) + 100  # Length between 100 and 400
    
    # Debug: Print edge attributes
    print(f"Edge from {u} to {v}: Length={edge['length']}, Color={edge['color']}, Width={edge['width']}")

# Generate the HTML string
html_string = net.generate_html()

# Insert the sliders after the <body> tag
slider_html = f'''
<!-- Sliders for interactivity -->
<div style="margin: 10px;">
  <label for="visualThreshold">Visual Connectivity Threshold:</label>
  <input type="range" id="visualThreshold" name="visualThreshold" min="{min_visual}" max="{max_visual}" value="{min_visual}" step="0.1" style="width: 300px;">
  <span id="visualThresholdValue">{min_visual}</span>
</div>

<div style="margin: 10px;">
  <label for="narrativeThreshold">Narrative Connectivity Threshold:</label>
  <input type="range" id="narrativeThreshold" name="narrativeThreshold" min="{min_narrative}" max="{max_narrative}" value="{min_narrative}" step="0.1" style="width: 300px;">
  <span id="narrativeThresholdValue">{min_narrative}</span>
</div>

<!-- New edge influence slider -->
<div style="margin: 10px;">
  <label for="edgeInfluence">Edge Influence Factor:</label>
  <input type="range" id="edgeInfluence" name="edgeInfluence" min="0.5" max="10" value="1" step="0.1" style="width: 300px;">
  <span id="edgeInfluenceValue">1.0</span>
</div>
'''

body_index = html_string.find('<body>')
if body_index == -1:
    print("Failed to find <body> tag in HTML.")
else:
    body_end_index = body_index + len('<body>')
    html_string = html_string[:body_end_index] + slider_html + html_string[body_end_index:]

# Insert 'var allEdges = new vis.DataSet(edges.get());' after edges initialization
edges_init_index = html_string.find('var edges = new vis.DataSet([')
if edges_init_index == -1:
    print("Failed to find edges initialization in HTML.")
else:
    edges_init_end = html_string.find(']);', edges_init_index) + len(']);')
    # Insert the allEdges dataset
    html_string = html_string[:edges_init_end] + '\nvar allEdges = new vis.DataSet(edges.get());\n' + html_string[edges_init_end:]

# Insert the JavaScript code before </body> tag
js_code = Template('''
<script type="text/javascript">
// Load D3.js for color interpolation
// Ensure D3.js is loaded
if (typeof d3 === 'undefined') {
    var script = document.createElement('script');
    script.src = "https://d3js.org/d3.v6.min.js";
    document.head.appendChild(script);
}

// Get references to the sliders
var visualThresholdSlider = document.getElementById('visualThreshold');
var narrativeThresholdSlider = document.getElementById('narrativeThreshold');
var edgeInfluenceSlider = document.getElementById('edgeInfluence'); // New slider

// Store all edges as an array
var allEdges = new vis.DataSet(edges.get());

// Store the original weights
var originalVisualWeight = $visual_weight;
var originalNarrativeWeight = $narrative_weight;

// Function to update the displayed value next to the slider
function updateSliderValue(sliderId, valueId) {
    var slider = document.getElementById(sliderId);
    var display = document.getElementById(valueId);
    display.innerText = parseFloat(slider.value).toFixed(1);
}

// Initialize the slider values
updateSliderValue('visualThreshold', 'visualThresholdValue');
updateSliderValue('narrativeThreshold', 'narrativeThresholdValue');
updateSliderValue('edgeInfluence', 'edgeInfluenceValue');

// Event listeners for sliders
visualThresholdSlider.addEventListener('input', function() {
    updateSliderValue('visualThreshold', 'visualThresholdValue');
    updateEdges();
});

narrativeThresholdSlider.addEventListener('input', function() {
    updateSliderValue('narrativeThreshold', 'narrativeThresholdValue');
    updateEdges();
});

edgeInfluenceSlider.addEventListener('input', function() { // New event listener
    updateSliderValue('edgeInfluence', 'edgeInfluenceValue');
    updateEdges();
});

function updateEdges() {
    var visualThreshold = parseFloat(visualThresholdSlider.value);
    var narrativeThreshold = parseFloat(narrativeThresholdSlider.value);
    var edgeInfluence = parseFloat(edgeInfluenceSlider.value); // Get edge influence factor

    // Recalculate weights based on edge influence factor
    var visualWeight = originalVisualWeight * edgeInfluence;
    var narrativeWeight = originalNarrativeWeight * edgeInfluence;

    // Normalize weights to ensure their sum is not zero
    // Removed normalization to allow edgeInfluence to directly affect overall_score
    // var totalWeight = visualWeight + narrativeWeight;
    // if (totalWeight === 0) {
    //     visualWeight = 0.5;
    //     narrativeWeight = 0.5;
    // } else {
    //     visualWeight /= totalWeight;
    //     narrativeWeight /= totalWeight;
    // }

    // Filter edges based on threshold values
    var filteredEdges = allEdges.get({
        filter: function (edge) {
            return (edge.data.visual_connectivity_score >= visualThreshold) &&
                   (edge.data.narrative_connectivity_score >= narrativeThreshold);
        }
    });

    // Update edge attributes based on new weights
    filteredEdges.forEach(function(edge) {
        // Normalize the scores
        var norm_visual = ($max_visual - $min_visual) !== 0 ? 
            (edge.data.visual_connectivity_score - $min_visual) / ($max_visual - $min_visual) : 0.5;
        var norm_narrative = ($max_narrative - $min_narrative) !== 0 ? 
            (edge.data.narrative_connectivity_score - $min_narrative) / ($max_narrative - $min_narrative) : 0.5;

        // Compute weighted overall score
        var overall_score = (visualWeight * norm_visual) + (narrativeWeight * norm_narrative);

        // Cap overall_score to a maximum of 1.0 to avoid negative lengths
        overall_score = Math.min(overall_score, 1.0);

        // Update edge width
        var newWidth = 1 + overall_score * 5;
        edges.update({id: edge.id, width: newWidth});

        // Update edge color
        if (typeof d3 !== 'undefined') {
            var newColor = d3.interpolateCool(overall_score); // Using 'cool' colormap
            edges.update({id: edge.id, color: {color: newColor}});
        }

        // Update edge length to cluster higher connectivity scores
        var newLength = 300 * (1 - overall_score) + 100; // Length between 100 and 400
        edges.update({id: edge.id, length: newLength});
    });

    // Reset and update edges in the DataSet
    edges.clear();
    edges.add(filteredEdges);

    // Restart physics simulation
    network.stabilize();
}
</script>
''')

# Formatting the JavaScript with actual Python variables
js_code_formatted = js_code.substitute(
    visual_weight=visual_weight,
    narrative_weight=narrative_weight,
    min_visual=min_visual,
    max_visual=max_visual,
    min_narrative=min_narrative,
    max_narrative=max_narrative
)

# Insert the formatted JavaScript before </body>
body_end_index = html_string.find('</body>')
if body_end_index == -1:
    print("Failed to find </body> tag in HTML.")
else:
    html_string = html_string[:body_end_index] + js_code_formatted + html_string[body_end_index:]

# Insert D3.js before </head> if not already inserted
d3_script = '''
<script src="https://d3js.org/d3.v6.min.js"></script>
'''

head_end_index = html_string.find('</head>')
if head_end_index == -1:
    print("Failed to find </head> tag in HTML.")
else:
    # Check if D3.js is already included to avoid duplication
    if 'd3.v6.min.js' not in html_string:
        html_string = html_string[:head_end_index] + d3_script + html_string[head_end_index:]
    else:
        print("D3.js is already included in the HTML.")

# Save the modified HTML to a file
output_file = 'artworks_graph_with_sliders.html'
with open(output_file, 'w') as f:
    f.write(html_string)
    print(f"Interactive graph with sliders saved to {output_file}")


Graph loaded from artworks_graph.json
Node already exists: Blue
Node already exists: Card of the Pilipino
Node already exists: He was out there all alone riding the monsoon waves like a champ
Node already exists: Again and again we are somewhere
Node already exists: Untitled
Node already exists: Soap Blocked
Node already exists: The Flower Field 
Node already exists: The Internalised Self
Node already exists: The Dustpan from TOOLS/WORK
Node already exists: Untitled (MT_116)
Node already exists: Der Sekundentraum
Node already exists: Time: Dust
Node already exists: Domesticated Politics
Node already exists: Bicycle Tyre Rolling Event from Yangon series: 1. “Bank of Innya Lake” 2. “Bogyoke’s Bronze Statue” 3. “Independence Monument” 4. “Kan Daw Gyi Park”
Node already exists: Untitled
Node already exists: Untitled
Node already exists: Untitled
Node already exists: Mon Boulet
Node already exists: Peripheral Ritual I, II, III
Node already exists: Untitled
Node already exists: Tapestry of J

  cmap = cm.get_cmap('coolwarm')  # Choose any colormap you prefer
