# Deciphering Still Life Artworks with Linked Open Data - Code

This notebook contains all the scripts to do the analysis described in the CHR2024 paper.

Python version = Python 3.12.4

## Libraries

In [8]:
import urllib.parse
import rdflib
from rdflib import URIRef, BNode, Literal, Graph, Namespace, ConjunctiveGraph
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import namespace
import pandas

from rdflib import Graph, Namespace, RDFS
from pymantic import sparql
import string
import re

## Ancillary Functions

In [6]:
def to_camel_case(input_string):
    """
    Convert a string to CamelCase.

    Args:
    - input_string (str): The input string.

    Returns:
    - str: The CamelCase version of the input string.
    """
    # Remove punctuation from the input string
    input_string = input_string.translate(str.maketrans('', '', string.punctuation))

    # Split the input string into words using space and underscore as delimiters
    words = re.split(r'[_\s]+', input_string)

    # Capitalize the first letter of each word (except the first word)
    camel_words = [words[0].lower()] + [word.capitalize() for word in words[1:]]

    # Join the words together to form the CamelCase string
    camel_case_string = ''.join(camel_words)

    return camel_case_string

## Loading HyperReal in RDFLIB

In [4]:
# Create an instance of the Graph
hr = Graph()

# Parse the RDF data from the specified URL in Turtle format
hr.parse("https://raw.githubusercontent.com/br0ast/simulationontology/main/KG/kg.ttl", format="ttl")

# Define namespaces for easier access
sim_on = "https://w3id.org/simulation/ontology/"

sim_n = Namespace(sim_on)  # Replace with the actual URI
hr.bind("sim", sim_n)

hrdata = "https://w3id.org/simulation/data/"
hrd = Namespace(hrdata)
hr.bind("hr", hrd)

### Extract disambiguations form HR

In [14]:
setoftypes = set()

# Iterate through objects with the predicate sim_n.hasSimulacrum
for o in hr.objects(None, sim_n.hasSimulacrum, None):
    # Retrieve labels for each object
    for lab in hr.objects(o, RDFS.label, None):
        # Check if the label contains "("
        if "(" in str(lab):
            # Add the label to the set
            setoftypes.add(str(lab))

# Create a set to store types
setoftypes2 = set()

# Iterate over elements in setoftypes
for el in setoftypes:
    # Extract the type from the element
    typ = el.split("(")[1].split(")")[0]
    setoftypes2.add(typ)

# Remove types with spaces in the set
setoftypes2 = {el for el in setoftypes2 if " " not in el}

def combinewithtype(string):
    """
    Combine a string with each type from setoftypes2.

    Args:
    - string (str): The input string.

    Returns:
    - list: A list of strings where each element is the input string combined with a type.
    """
    # Create a list to store combined strings
    listoftypes = []

    # Iterate over types in setoftypes2
    for typ in setoftypes2:
        # Create a new string by combining the input string and the title-cased type
        new_string = string + to_camel_case(typ).title()        
        # Append the new string to the list
        listoftypes.append(new_string)
    # Return the list of combined strings
    return listoftypes

# Common Names for Wikidata entity

We extract both the english label of Wikidata entities and the "common taxonomy name" label, as some fruits/plants/flowers have their scientific name as the label, which does not match with HyperReal.

We first import a csv with the "depictedLabel" (scientific name) and "common" (common taxonomy name) as columns. This csv was made by filtering the stillart.csv file in excel by keeping these two colums and removing the duplicates.

In [2]:
common = pandas.read_csv("common_names.csv", delimiter=";")

In [3]:
common.head()

Unnamed: 0,depictedLabel,common
0,Grey Partridge,Grey Partridge
1,Eurasian Eagle-owl,eurasian eagle owl
2,Eurasian Eagle-owl,Eurasian Eagle-owl
3,Eurasian Eagle-owl,Northern Eagle Owl
4,house cat,Cat


## First Match Wikidata HyperReal

In [9]:
dict_change = dict()
dict_both = dict()
nope = set()
for i in range(len(common["depictedLabel"])):
    new =common["common"][i]
    orig =common["depictedLabel"][i]
    if orig.lower() != new.lower():
        camel_orig = to_camel_case(orig)
        camel_new = to_camel_case(new)
        camel_orig_uri = URIRef(hrdata+camel_orig)
        camel_new_uri = URIRef(hrdata+camel_new)
        camel_orig_uri_flower = URIRef(hrdata+camel_orig+"Flower")
        camel_new_uri_flower = URIRef(hrdata+camel_new+"Flower")

        if (None, sim_n.hasSimulacrum, camel_orig_uri) in hr and (None, sim_n.hasSimulacrum, camel_new_uri) in hr:
            if camel_orig not in dict_both:
                dict_both[camel_orig] = set()
            dict_both[camel_orig].add(camel_new)
            if (None, sim_n.hasSimulacrum, camel_orig_uri_flower) in hr:
                dict_both[camel_orig].add(camel_orig+"Flower")
            if (None, sim_n.hasSimulacrum, camel_new_uri_flower) in hr:
                dict_both[camel_orig].add(camel_new+"Flower")
        elif (None, sim_n.hasSimulacrum, camel_orig_uri) in hr and (None, sim_n.hasSimulacrum, camel_new_uri) not in hr:
            if (None, sim_n.hasSimulacrum, camel_orig_uri_flower) in hr:
                if camel_orig not in dict_both:
                    dict_both[camel_orig] = set()
                dict_both[camel_orig].add(camel_orig+"Flower")
            if (None, sim_n.hasSimulacrum, camel_new_uri_flower) in hr:
                if camel_orig not in dict_both:
                    dict_both[camel_orig] = set()
                dict_both[camel_orig].add(camel_new+"Flower")
        elif (None, sim_n.hasSimulacrum, camel_orig_uri) not in hr and (None, sim_n.hasSimulacrum, camel_new_uri) in hr:
            if (None, sim_n.hasSimulacrum, camel_orig_uri_flower) in hr:
                if camel_orig not in dict_change:
                    dict_change[camel_orig] = set()
                dict_change[camel_orig].add(camel_orig+"Flower")
            if camel_orig not in dict_change:
                dict_change[camel_orig] = set()
            dict_change[camel_orig].add(camel_new)
            if (None, sim_n.hasSimulacrum, camel_new_uri_flower) in hr:
                if camel_orig not in dict_change:
                    dict_change[camel_orig] = set()
                dict_change[camel_orig].add(camel_new+"Flower")
        elif (None, sim_n.hasSimulacrum, camel_orig_uri) not in hr and (None, sim_n.hasSimulacrum, camel_new_uri) not in hr:
            if (None, sim_n.hasSimulacrum, camel_orig_uri_flower) in hr:
                if camel_orig not in dict_change:
                    dict_change[camel_orig] = set()
                dict_change[camel_orig].add(camel_orig+"Flower")
            if (None, sim_n.hasSimulacrum, camel_new_uri_flower) in hr:
                if camel_orig not in dict_change:
                    dict_change[camel_orig] = set()
                dict_change[camel_orig].add(camel_new+"Flower")
            nope.add(camel_orig_uri)
        
# dict change has the entities that do not match with HyperReal normally but match using the common taxonomy name
# dict both has the entities that match in both their common name and normal label

## Loading the still life art query csv result file

To get this file you must use this query on wikidata: [https://w.wiki/AduD](https://w.wiki/AduD)

The query was last run in May 2024, the results might be different if you redo it. If you want to replicate exactly the results of the paper, use the csv below

In [10]:
still_art_query = pandas.read_csv("stillart.csv") # if you rerun the query, update the file with the new csv downloaded from wikidata

## Loading previous mapping from HyperReal to Wikidata

In [11]:
wkh = pandas.read_csv("wikihyper.csv", encoding="ISO-8859-1", delimiter=";")
wkhn = pandas.read_csv("wikihypernew.csv", encoding="ISO-8859-1", delimiter=";")
wkhf =pandas.concat([wkh, wkhn])

In [12]:
wkhf = wkhf.reset_index()

## Adding new matches with semi-automatic decisions

In [16]:
not_there = []
already_done = set()
dict_change_2 = dict()
types = set()
for i in range(len(still_art_query["depicted"])):
    if still_art_query["depicted"][i] not in already_done:
        depi = still_art_query["depictedLabel"][i]
        depi = to_camel_case(depi)
        there = False
        if still_art_query["depicted"][i] not in list(wkhf["wikidata"]):
            depinewlist = combinewithtype(depi)
            uridepinewlist = [URIRef(hrdata+el) for el in depinewlist]
            for depinew in uridepinewlist:
                if (None, sim_n.hasSimulacrum, depinew) in hr:
                    type_of = depinew.split(depi)[1]
                    types.add(type_of)
                    print(depinew + " proposed")
                    answer = input("Accept?")
                    if answer == "y":
                        dict_change_2[depi] = depinew.split(hrdata)[1]
                        there = True
        else:
            there = True
        if there is False:
            not_there.append(still_art_query["depictedLabel"][i])
            already_done.add(still_art_query["depicted"][i])
        else:
            already_done.add(still_art_query["depicted"][i])

https://w3id.org/simulation/data/figTree proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/figTree proposed


Accept? n


https://w3id.org/simulation/data/figFruit proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/figFruit proposed


Accept? y


https://w3id.org/simulation/data/forkImplement proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/forkImplement proposed


Accept? y


https://w3id.org/simulation/data/appleTree proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/appleTree proposed


Accept? n


https://w3id.org/simulation/data/appleFruit proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/appleFruit proposed


Accept? y


https://w3id.org/simulation/data/narcissusFlower proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/narcissusFlower proposed


Accept? y


https://w3id.org/simulation/data/narcissusPlant proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/narcissusPlant proposed


Accept? n


https://w3id.org/simulation/data/bayTopography proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/bayTopography proposed


Accept? n


https://w3id.org/simulation/data/plainTopography proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/plainTopography proposed


Accept? n


https://w3id.org/simulation/data/stoolFurniture proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/stoolFurniture proposed


Accept? y


https://w3id.org/simulation/data/chestContainer proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/chestContainer proposed


Accept? y


https://w3id.org/simulation/data/pipeMusical proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/pipeMusical proposed


Accept? n


https://w3id.org/simulation/data/pipeSmoking proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/pipeSmoking proposed


Accept? y


https://w3id.org/simulation/data/letterEpistle proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/letterEpistle proposed


Accept? y


https://w3id.org/simulation/data/matchFire proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/matchFire proposed


Accept? y


https://w3id.org/simulation/data/sealAnimal proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/sealAnimal proposed


Accept? n


https://w3id.org/simulation/data/sealStamp proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/sealStamp proposed


Accept? y


https://w3id.org/simulation/data/bayTopography proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/bayTopography proposed


Accept? n


https://w3id.org/simulation/data/hornMusical proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/hornMusical proposed


Accept? y


https://w3id.org/simulation/data/batAnimal proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/batAnimal proposed


Accept? y


https://w3id.org/simulation/data/broomPlant proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/broomPlant proposed


Accept? y


https://w3id.org/simulation/data/broomSweeping proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/broomSweeping proposed


Accept? y


https://w3id.org/simulation/data/perchFish proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/perchFish proposed


Accept? y


https://w3id.org/simulation/data/pikeFish proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/pikeFish proposed


Accept? y


https://w3id.org/simulation/data/pikeFish proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/pikeFish proposed


Accept? y


https://w3id.org/simulation/data/mustardPlant proposed does not look like a valid URI, trying to serialize this will break.


https://w3id.org/simulation/data/mustardPlant proposed


Accept? y


In [17]:
# Additional change

dict_change_2["bayLeaf"] = "bayLaurel"

In [19]:
dict_change_2b = dict()
for el in dict_change_2:
    newset = set()
    newset.add(dict_change_2[el])
    dict_change_2b[el] = newset

In [29]:
dict_change.update(dict_change_2b)

## Final matching of potential interpretation wikidata-hyperreal

**IMPORTANT**: for the next step it is necessary that HyperReal is also loaded locally in a GraphDB or Blazegraph server



In [20]:
wk_hyper = dict()
for i in range(len(wkhf["wikidata"])):
    w = list(wkhf["wikidata"])[i]
    if not pandas.isnull(wkhf["hyperreal"][i]):
        if w not in wk_hyper:
            wk_hyper[w] = ""
        wk_hyper[w] = wkhf["hyperreal"][i]
    

In [22]:
server_address = "http://LMKWDCH-NB-2300:7200/repositories/HyperReal" # change this to the blazegraph/graphdb url
server = sparql.SPARQLServer(server_address)

In [30]:
ad = set()
depi_simu = {"normal":dict(), "prevented":dict(), "healed":dict()}
for i in range(len(still_art_query["depicted"])):
    depi_w = still_art_query["depicted"][i]
    depi_l = still_art_query["depictedLabel"][i]
    depi_lc = to_camel_case(depi_l)
    if depi_lc in depi_simu["normal"]:
        depi_simu["normal"][depi_lc]["artworks"].add(still_art_query["painting"][i])
    if depi_lc in depi_simu["prevented"]:
        depi_simu["prevented"][depi_lc]["artworks"].add(still_art_query["painting"][i])
    if depi_lc in depi_simu["healed"]:
        depi_simu["healed"][depi_lc]["artworks"].add(still_art_query["painting"][i])
    if still_art_query["depicted"][i] not in ad:
        if depi_w in list(wkhf["wikidata"]) and depi_w in wk_hyper:
            simu = wk_hyper[depi_w]
            result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
select ?simulation ?rc ?context where {
    <'''+simu+'''> sim:isSimulacrumOf ?simulation .
    ?simulation sim:hasRealityCounterpart ?rc ;
                sim:hasContext ?context }''')
            for res in result["results"]["bindings"]:
                if depi_lc not in depi_simu["normal"]:
                    depi_simu["normal"][depi_lc] = {"artworks":set()}
                if res["context"]["value"] not in depi_simu["normal"][depi_lc]:
                    depi_simu["normal"][depi_lc][res["context"]["value"]] = set()
                depi_simu["normal"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                depi_simu["normal"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
            result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
select ?simulation ?rc ?context where {
    <'''+simu+'''> sim:isSimulacrumOf ?simulation .
    ?simulation sim:preventedRealityCounterpart ?rc ;
                sim:hasContext ?context }''')
            for res in result["results"]["bindings"]:
                if depi_lc not in depi_simu["prevented"]:
                    depi_simu["prevented"][depi_lc] = {"artworks":set()}
                if res["context"]["value"] not in depi_simu["prevented"][depi_lc]:
                    depi_simu["prevented"][depi_lc][res["context"]["value"]] = set()
                depi_simu["prevented"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                depi_simu["prevented"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
            result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
select ?simulation ?rc ?context where {
    <'''+simu+'''> sim:isSimulacrumOf ?simulation .
    ?simulation sim:healedRealityCounterpart ?rc ;
                sim:hasContext ?context }''')
            for res in result["results"]["bindings"]:
                if depi_lc not in depi_simu["healed"]:
                    depi_simu["healed"][depi_lc] = {"artworks":set()}
                if res["context"]["value"] not in depi_simu["healed"][depi_lc]:
                    depi_simu["healed"][depi_lc][res["context"]["value"]] = set()
                depi_simu["healed"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                depi_simu["healed"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
            ad.add(still_art_query["depicted"][i])
        if depi_lc in dict_both:
            simus = [depi_lc, list(dict_both[depi_lc])[0]]
            #print(simus)
            simus = [hrdata+sss for sss in simus]
            for simu in simus:
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
select ?simulation ?rc ?context where {
    <'''+simu+'''> sim:isSimulacrumOf ?simulation .
    ?simulation sim:hasRealityCounterpart ?rc ;
                sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["normal"]:
                        depi_simu["normal"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["normal"][depi_lc]:
                        depi_simu["normal"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["normal"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["normal"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    select ?simulation ?rc ?context where {
        <'''+simu+'''> sim:isSimulacrumOf ?simulation .
        ?simulation sim:preventedRealityCounterpart ?rc ;
                    sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["prevented"]:
                        depi_simu["prevented"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["prevented"][depi_lc]:
                        depi_simu["prevented"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["prevented"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["prevented"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    select ?simulation ?rc ?context where {
        <'''+simu+'''> sim:isSimulacrumOf ?simulation .
        ?simulation sim:healedRealityCounterpart ?rc ;
                    sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["healed"]:
                        depi_simu["healed"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["healed"][depi_lc]:
                        depi_simu["healed"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["healed"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["healed"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
            ad.add(still_art_query["depicted"][i])
        elif depi_lc in dict_change:
            simus = dict_change[depi_lc]
            simus = [hrdata+sss for sss in simus]
            for simu in simus:
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
select ?simulation ?rc ?context where {
    <'''+simu+'''> sim:isSimulacrumOf ?simulation .
    ?simulation sim:hasRealityCounterpart ?rc ;
                sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["normal"]:
                        depi_simu["normal"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["normal"][depi_lc]:
                        depi_simu["normal"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["normal"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["normal"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    select ?simulation ?rc ?context where {
        <'''+simu+'''> sim:isSimulacrumOf ?simulation .
        ?simulation sim:preventedRealityCounterpart ?rc ;
                    sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["prevented"]:
                        depi_simu["prevented"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["prevented"][depi_lc]:
                        depi_simu["prevented"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["prevented"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["prevented"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
                result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    select ?simulation ?rc ?context where {
        <'''+simu+'''> sim:isSimulacrumOf ?simulation .
        ?simulation sim:healedRealityCounterpart ?rc ;
                    sim:hasContext ?context }''')
                for res in result["results"]["bindings"]:
                    if depi_lc not in depi_simu["healed"]:
                        depi_simu["healed"][depi_lc] = {"artworks":set()}
                    if res["context"]["value"] not in depi_simu["healed"][depi_lc]:
                        depi_simu["healed"][depi_lc][res["context"]["value"]] = set()
                    depi_simu["healed"][depi_lc]["artworks"].add(still_art_query["painting"][i])
                    depi_simu["healed"][depi_lc][res["context"]["value"]].add(res["rc"]["value"])
            ad.add(still_art_query["depicted"][i])            
        else:
            ad.add(still_art_query["depicted"][i])

## IF YOU SKIPPED THE STEPS AND JUST WANT THE WIKIDATA MATCH FOR THE ANALYSIS RUN THIS

In [None]:
import pickle
file = open("still_art_int.p",'rb')
depi_simu = pickle.load(file)
file.close()

## FILTERING THE WIKIDATA QUERY TO ONLY INCLUDE PAINTINGS WITH AT LEAST 1 MATCH IN HYPERREAL

In [31]:
art_tot = set()
for t in depi_simu:
    for symb in depi_simu[t]:
        for art in depi_simu[t][symb]["artworks"]:
            art_tot.add(art)
print(len(art_tot))
print(len(set(still_art_query["painting"])))

3533
4997


In [32]:
still_art_filt = still_art_query[still_art_query['painting'].isin(art_tot)]

In [36]:
still_art_filt =still_art_filt.reset_index()

In [38]:
still_art_filt.head()

Unnamed: 0,index,painting,paintingLabel,depicted,depictedLabel,common,inception,countryLabel,iso
0,0,http://www.wikidata.org/entity/Q153517,The Basket of Bread,http://www.wikidata.org/entity/Q7802,bread,,1926-01-01T00:00:00Z,Spain,ESP
1,1,http://www.wikidata.org/entity/Q2270291,Basket of Fruit,http://www.wikidata.org/entity/Q201097,basket,,1600-01-01T00:00:00Z,Italy,ITA
2,3,http://www.wikidata.org/entity/Q152509,Luncheon on the Grass,http://www.wikidata.org/entity/Q843173,Pyrrhula,,1863-01-01T00:00:00Z,France,FRA
3,4,http://www.wikidata.org/entity/Q152509,Luncheon on the Grass,http://www.wikidata.org/entity/Q6578319,malleolus,,1863-01-01T00:00:00Z,France,FRA
4,5,http://www.wikidata.org/entity/Q152509,Luncheon on the Grass,http://www.wikidata.org/entity/Q355304,watercourse,,1863-01-01T00:00:00Z,France,FRA


## ODOR DATASET

I have downloaded the instances_all.json file from the github: https://github.com/mathiaszinnen/odor-dataset/tree/main/data

In [44]:
import json
# We match the COCO annotations to the csv, to link every artwork with the detections of the computer vision algorithm
f = open('instances_all.json')

smelly_ann = json.load(f)
smelly_df = pd.read_csv("https://raw.githubusercontent.com/mathiaszinnen/odor-dataset/main/data/meta.csv")
cat_id_label = dict()
for cat in smelly_ann["categories"]:
    cat_id_label[cat["id"]] = {"label":cat["name"], "supercategory":[cat["supercategory"]]}

new_column = []
for url in smelly_df["File Name"]:
    img_id = None
    labels = []
    for img in smelly_ann["images"]:
        if url == img["file_name"]:
            img_id = img["id"]
            break
    if img_id:
        for annotation in smelly_ann["annotations"]:
            if annotation["image_id"] == img_id:
                cat_id = annotation["category_id"]
                label = cat_id_label[cat_id]["label"]
                labels.append(label)
        labels = list(set(labels))
        labels_string = ", ".join(labels)
        new_column.append(labels_string)   
    else:
        new_column.append("")

smelly_df["detections"] = new_column


In [46]:
supercat_hr = {"bird": ["animal", "bird"], "clothing":["clothing"], "fish": ["animal", "fish"],
               "flower":["flower"], "fruit":["fruit"], "insect":["insect"], "lamp":["lamp"],
               "lighting":["lighting"], "vegetable":["plant"]} ### For the disambiguation

### MATCH ODOR HYPERREAL

In [47]:
smelly_hr = dict()
for el in cat_id_label:
    el_uri = URIRef(hrdata+to_camel_case(cat_id_label[el]["label"]))
    if (None, sim_n.hasSimulacrum, el_uri) in hr:
        if cat_id_label[el]["label"] not in smelly_hr:
            smelly_hr[cat_id_label[el]["label"]] = []
        smelly_hr[cat_id_label[el]["label"]].append(el_uri)
    for category in cat_id_label[el]["supercategory"]:
        if category in supercat_hr:
            newcategories = supercat_hr[category]
            for newcat in newcategories:
                newel = cat_id_label[el]["label"] + " " + newcat
                neweluri = URIRef(hrdata+to_camel_case(newel))
                if (None, sim_n.hasSimulacrum, neweluri) in hr:
                    if cat_id_label[el]["label"] not in smelly_hr:
                        smelly_hr[cat_id_label[el]["label"]] = []  
                    smelly_hr[cat_id_label[el]["label"]].append(neweluri)
smelly_hr["snake"].append(URIRef(hrdata+"serpent"))

In [49]:
keyword = 'still life'
columns_to_check = ['Title', 'Iconography', 'Description',"Keywords"]  # List of columns to check

filtered_smelly_df = smelly_df[smelly_df[columns_to_check].apply(lambda row: row.str.contains(keyword, na=False)).any(axis=1)]

In [51]:
filtered_smelly_df =filtered_smelly_df.reset_index()

In [None]:
# This will generate warning but only because .query is shared by rdflib and pymantiq, you can ignore them
ad_smelly = set()
depi_smelly_simu = {"normal":dict(), "prevented":dict(), "healed":dict()}
for i in range(len(filtered_smelly_df["File Name"])):
    detections = filtered_smelly_df["detections"][i].split(", ")
    for det in detections:
        if det in depi_smelly_simu["normal"]:
            depi_smelly_simu["normal"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
        if det in depi_smelly_simu["prevented"]:
            depi_smelly_simu["prevented"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
        if det in depi_smelly_simu["healed"]:
            depi_smelly_simu["healed"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
        if det not in ad_smelly:
            if det in smelly_hr:
                for simu in smelly_hr[det]:
                    result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
        PREFIX sim: <https://w3id.org/simulation/ontology/>
        select ?simulation ?rc ?context where {
            <'''+simu+'''> sim:isSimulacrumOf ?simulation .
            ?simulation sim:hasRealityCounterpart ?rc ;
                        sim:hasContext ?context }''')
                    for res in result["results"]["bindings"]:
                        if det not in depi_smelly_simu["normal"]:
                            depi_smelly_simu["normal"][det] = {"artworks":set()}
                        if res["context"]["value"] not in depi_smelly_simu["normal"][det]:
                            depi_smelly_simu["normal"][det][res["context"]["value"]] = set()
                        depi_smelly_simu["normal"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
                        depi_smelly_simu["normal"][det][res["context"]["value"]].add(res["rc"]["value"])
                    result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
        PREFIX sim: <https://w3id.org/simulation/ontology/>
        select ?simulation ?rc ?context where {
            <'''+simu+'''> sim:isSimulacrumOf ?simulation .
            ?simulation sim:hasRealityCounterpart ?rc ;
                        sim:hasContext ?context }''')
                    for res in result["results"]["bindings"]:
                        if det not in depi_smelly_simu["prevented"]:
                            depi_smelly_simu["prevented"][det] = {"artworks":set()}
                        if res["context"]["value"] not in depi_smelly_simu["prevented"][det]:
                            depi_smelly_simu["prevented"][det][res["context"]["value"]] = set()
                        depi_smelly_simu["prevented"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
                        depi_smelly_simu["prevented"][det][res["context"]["value"]].add(res["rc"]["value"])
                    result = server.query('''PREFIX kb: <https://w3id.org/simulation/data/>
        PREFIX sim: <https://w3id.org/simulation/ontology/>
        select ?simulation ?rc ?context where {
            <'''+simu+'''> sim:isSimulacrumOf ?simulation .
            ?simulation sim:hasRealityCounterpart ?rc ;
                        sim:hasContext ?context }''')
                    for res in result["results"]["bindings"]:
                        if det not in depi_smelly_simu["healed"]:
                            depi_smelly_simu["healed"][det] = {"artworks":set()}
                        if res["context"]["value"] not in depi_smelly_simu["healed"][det]:
                            depi_smelly_simu["healed"][det][res["context"]["value"]] = set()
                        depi_smelly_simu["healed"][det]["artworks"].add(filtered_smelly_df["File Name"][i])
                        depi_smelly_simu["healed"][det][res["context"]["value"]].add(res["rc"]["value"])
                    ad_smelly.add(det)

### IF YOU WANT TO SKIP THE PREVIOUS STEPS! IMPORT THIS:


In [55]:
import pickle
file = open("smelly_int.p",'rb')
depi_smelly_simu = pickle.load(file)
file.close()

### CONTINUE AFTER SKIP

In [56]:
smelly_symb_art = set()
for i in range(len(filtered_smelly_df["File Name"])):
    art = filtered_smelly_df["File Name"][i]
    for det in filtered_smelly_df["detections"][i].split(", "):
        if det in smelly_hr:
            smelly_symb_art.add(art)
            break
print(len(smelly_symb_art))

478


## RQ1

### Percentage of Christian Contexts in Wikidata

In [41]:
contexts = set()
for t in depi_simu:
    for symb in depi_simu[t]:
        ctxs = list(depi_simu[t][symb].keys())
        ctxs.remove("artworks")
        for el in ctxs:
            contexts.add(el)
ctx_count = dict()
for cont in contexts:
    ctx_count[cont]= set()
    for t in depi_simu:
        for symb in depi_simu[t]:
            if cont in depi_simu[t][symb]:
                for art in depi_simu[t][symb]["artworks"]:
                    ctx_count[cont].add(art)
ctx_count_numb = dict()
for el in ctx_count:
    ctx_count_numb[el] =len(ctx_count[el])
ctx_count_numb_perc = dict()
for el in ctx_count_numb:
    ctx_count_numb_perc[el] = round(ctx_count_numb[el]*100/3533, 3)
sorted_ctx_count_numb = sorted(ctx_count_numb.items(), key=lambda item: item[1], reverse=True)
sorted_ctx_count_numb_perc = sorted(ctx_count_numb_perc.items(), key=lambda item: item[1], reverse=True)

In [42]:
sorted_ctx_count_numb_perc[:10]

[('https://w3id.org/simulation/data/generalOrUnknown', 99.123),
 ('https://w3id.org/simulation/data/christian', 84.914),
 ('https://w3id.org/simulation/data/heraldic', 67.62),
 ('https://w3id.org/simulation/data/grecoRoman', 63.431),
 ('https://w3id.org/simulation/data/greek', 53.948),
 ('https://w3id.org/simulation/data/chinese', 47.665),
 ('https://w3id.org/simulation/data/jewish', 43.787),
 ('https://w3id.org/simulation/data/roman', 37.617),
 ('https://w3id.org/simulation/data/egyptian', 36.343),
 ('https://w3id.org/simulation/data/buddhist', 35.126)]

### Percentage of Christian Context in ODOR

In [57]:
contexts = set()
for t in depi_smelly_simu:
    for symb in depi_smelly_simu[t]:
        ctxs = list(depi_smelly_simu[t][symb].keys())
        ctxs.remove("artworks")
        for el in ctxs:
            contexts.add(el)
ctx_count = dict()
for cont in contexts:
    ctx_count[cont]= set()
    for t in depi_smelly_simu:
        for symb in depi_smelly_simu[t]:
            if cont in depi_smelly_simu[t][symb]:
                for art in depi_smelly_simu[t][symb]["artworks"]:
                    ctx_count[cont].add(art)
ctx_count_numb = dict()
for el in ctx_count:
    ctx_count_numb[el] =len(ctx_count[el])
ctx_count_numb_perc = dict()
for el in ctx_count_numb:
    ctx_count_numb_perc[el] = round(ctx_count_numb[el]*100/478, 3)
sorted_ctx_count_numb = sorted(ctx_count_numb.items(), key=lambda item: item[1], reverse=True)
sorted_ctx_count_numb_perc = sorted(ctx_count_numb_perc.items(), key=lambda item: item[1], reverse=True)

In [58]:
sorted_ctx_count_numb_perc[:10]

[('https://w3id.org/simulation/data/generalOrUnknown', 100.0),
 ('https://w3id.org/simulation/data/christian', 93.305),
 ('https://w3id.org/simulation/data/heraldic', 81.172),
 ('https://w3id.org/simulation/data/grecoRoman', 75.732),
 ('https://w3id.org/simulation/data/greek', 75.105),
 ('https://w3id.org/simulation/data/jewish', 70.921),
 ('https://w3id.org/simulation/data/chinese', 69.874),
 ('https://w3id.org/simulation/data/egyptian', 65.272),
 ('https://w3id.org/simulation/data/japanese', 56.276),
 ('https://w3id.org/simulation/data/celtic', 51.255)]

### Percentage of Christian Context SAMPLE

If you want to obtain a different sample, you must query IICONGRAPH with this query:

In [None]:
query_for_sample = '''
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX icon: <https://w3id.org/icon/ontology/>
select ?art (GROUP_CONCAT(distinct ?ctx; SEPARATOR=" @ ") as ?ctxs) where { 
	?art icon:iconographicallyDepicts ?simulation .
    ?simulation sim:hasContext ?ctx .
} GROUP BY ?art ORDER BY RAND() LIMIT 3533'''

In [59]:
sample3533 = pd.read_csv("sample3533.tsv", delimiter="\t")
ctx_count_sample = dict()
for i in range(len(sample3533["?art"])):
    ctxs = sample3533["?ctxs"][i].split(" @ ")
    ctxs = [el for el in ctxs if len(el) > 3]
    for ct in ctxs:
        if ct not in ctx_count_sample:
            ctx_count_sample[ct] = set()
        ctx_count_sample[ct].add(sample3533["?art"][i])

In [60]:
ctx_count_sample_numb = dict()
for el in ctx_count_sample:
    ctx_count_sample_numb[el] =len(ctx_count_sample[el])
ctx_count_sample_numb_perc = dict()
for el in ctx_count_sample_numb:
    ctx_count_sample_numb_perc[el] = round(ctx_count_sample_numb[el]*100/3533, 3)
sorted_ctx_count_sample_numb = sorted(ctx_count_sample_numb.items(), key=lambda item: item[1], reverse=True)
sorted_ctx_count_sample_numb_perc = sorted(ctx_count_sample_numb_perc.items(), key=lambda item: item[1], reverse=True)


In [61]:
sorted_ctx_count_sample_numb_perc[:10]

[('https://w3id.org/simulation/data/generalOrUnknown', 85.31),
 ('https://w3id.org/simulation/data/christian', 43.504),
 ('https://w3id.org/simulation/data/heraldic', 32.607),
 ('https://w3id.org/simulation/data/grecoRoman', 22.276),
 ('https://w3id.org/simulation/data/jewish', 21.596),
 ('https://w3id.org/simulation/data/chinese', 20.549),
 ('https://w3id.org/simulation/data/greek', 19.304),
 ('https://w3id.org/simulation/data/egyptian', 18.653),
 ('https://w3id.org/simulation/data/hindu', 18.256),
 ('https://w3id.org/simulation/data/buddhist', 16.7)]

## RQ2

### Timesplit and correlation Wikidata

In [39]:
import pandas as pd
from datetime import datetime

# Step 3: Define the date ranges and filter the DataFrame
still_art_filt['inception'] = still_art_filt['inception'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ') if pd.notna(x) and "wikidata" not in str(x) else None)

# Step 3: Define the date ranges

date_before_1600 = datetime(1600, 1, 1)
date_1600 = datetime(1600, 1, 1)
date_1700 = datetime(1700, 1, 1)
date_1800 = datetime(1800, 1, 1)
date_1900 = datetime(1900, 1, 1)

# Step 4: Filter the DataFrame
before_1600 = still_art_filt[still_art_filt['inception'] < date_before_1600]
between_1600_and_1699 = still_art_filt[(still_art_filt['inception'] >= date_1600) & (still_art_filt['inception'] < date_1700)]
between_1700_and_1799 = still_art_filt[(still_art_filt['inception'] >= date_1700) & (still_art_filt['inception'] < date_1800)]
between_1800_and_1899 = still_art_filt[(still_art_filt['inception'] >= date_1800) & (still_art_filt['inception'] < date_1900)]
after_1900 = still_art_filt[still_art_filt['inception'] >= date_1900]

In [62]:
art_before_1600 = list(before_1600["painting"])
art_between_1600_and_1699 = list(between_1600_and_1699["painting"])
art_between_1700_and_1799 = list(between_1700_and_1799["painting"])
art_between_1800_and_1899 = list(between_1800_and_1899["painting"])
art_after_1900 = list(after_1900["painting"])
periods = [art_before_1600,
art_between_1600_and_1699,
art_between_1700_and_1799,
art_between_1800_and_1899,
art_after_1900]
symb_meanings_16th_century = dict()
symb_meanings_17th_century = dict()
symb_meanings_18th_century = dict()
symb_meanings_19th_century = dict()
symb_meanings_20_21th_century = dict()
dizz_sm = [symb_meanings_16th_century,
symb_meanings_17th_century,
symb_meanings_18th_century,
symb_meanings_19th_century,
symb_meanings_20_21th_century]
for i in range(len(dizz_sm)):    
    for t in depi_simu:
        for depi in depi_simu[t]:
            for ctx in depi_simu[t][depi]:
                if ctx != "artworks":
                    for meaning in depi_simu[t][depi][ctx]:
                        meaning_short = meaning.split(hrdata)[1]
                        if meaning_short not in dizz_sm[i]:
                            dizz_sm[i][meaning_short] = set()
                        for art in depi_simu[t][depi]["artworks"]:
                            if art in periods[i]:
                                dizz_sm[i][meaning_short].add(art)

In [63]:
smb18 = dict()
for k in symb_meanings_16th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_16th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_16th_century[k])
for k in symb_meanings_17th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_17th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_17th_century[k])
for k in symb_meanings_18th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_18th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_18th_century[k])


sma18 = dict()
a18 = [symb_meanings_19th_century, symb_meanings_20_21th_century]
for diz in a18: 
    for k in diz:
        if k not in sma18:
            sma18[k] = diz[k]
        else:
            sma18[k] = sma18[k].union(diz[k])

In [64]:
totartb18 = 2073
totarta18 = 1213

In [65]:
smb18p = dict()
sma18p = dict()
for el in smb18:
    smb18p[el] = round(len(smb18[el])*100/totartb18, 5)
for el in sma18:
    sma18p[el] = round(len(sma18[el])*100/totarta18, 5)

In [67]:
# Example data

# Mean difference
mean_diff = {key: sma18p[key] - smb18p[key] for key in smb18p}

# Euclidean Distance
euclidean_distance = sum((sma18p[key] - smb18p[key])**2 for key in smb18p)**0.5

# Manhattan Distance
manhattan_distance = sum(abs(sma18p[key] - smb18p[key]) for key in smb18p)

# Cosine Similarity
dot_product = sum(sma18p[key] * smb18p[key] for key in smb18p)
magnitude_sma18p = sum(value**2 for value in sma18p.values())**0.5
magnitude_smb18p = sum(value**2 for value in smb18p.values())**0.5
cosine_similarity = dot_product / (magnitude_sma18p * magnitude_smb18p)

# Correlation Coefficient
import numpy as np
smb18p_values = np.array(list(smb18p.values()))
sma18p_values = np.array(list(sma18p.values()))
correlation_coefficient = np.corrcoef(smb18p_values, sma18p_values)[0, 1]

#mean_diff
correlation_coefficient
## In the paper we report the correlation_coefficient

0.8460370204873723

#### Correlation Flower Language Wikidata

In [98]:
art_before_1600 = list(before_1600["painting"])
art_between_1600_and_1699 = list(between_1600_and_1699["painting"])
art_between_1700_and_1799 = list(between_1700_and_1799["painting"])
art_between_1800_and_1899 = list(between_1800_and_1899["painting"])
art_after_1900 = list(after_1900["painting"])
periods = [art_before_1600,
art_between_1600_and_1699,
art_between_1700_and_1799,
art_between_1800_and_1899,
art_after_1900]
symb_meanings_16th_century = dict()
symb_meanings_17th_century = dict()
symb_meanings_18th_century = dict()
symb_meanings_19th_century = dict()
symb_meanings_20_21th_century = dict()
dizz_sm = [symb_meanings_16th_century,
symb_meanings_17th_century,
symb_meanings_18th_century,
symb_meanings_19th_century,
symb_meanings_20_21th_century]
for i in range(len(dizz_sm)):    
    for t in depi_simu:
        for depi in depi_simu[t]:
            for ctx in depi_simu[t][depi]:
                if "flowerLanguage" in ctx:
                    for meaning in depi_simu[t][depi][ctx]:
                        meaning_short = meaning.split(hrdata)[1]
                        if meaning_short not in dizz_sm[i]:
                            dizz_sm[i][meaning_short] = set()
                        for art in depi_simu[t][depi]["artworks"]:
                            if art in periods[i]:
                                dizz_sm[i][meaning_short].add(art)

In [99]:
smb18 = dict()
for k in symb_meanings_16th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_16th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_16th_century[k])
for k in symb_meanings_17th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_17th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_17th_century[k])
for k in symb_meanings_18th_century:
    if k not in smb18:
        smb18[k] = symb_meanings_18th_century[k]
    else:
        smb18[k] = smb18[k].union(symb_meanings_18th_century[k])


sma18 = dict()
a18 = [symb_meanings_19th_century, symb_meanings_20_21th_century]
for diz in a18: 
    for k in diz:
        if k not in sma18:
            sma18[k] = diz[k]
        else:
            sma18[k] = sma18[k].union(diz[k])

In [100]:
total_art_fl_b = set()
for el in smb18:
    for art in smb18[el]:
        total_art_fl_b.add(art)
len(total_art_fl_b)

655

In [101]:
total_art_fl_a = set()
for el in sma18:
    for art in sma18[el]:
        total_art_fl_a.add(art)
len(total_art_fl_a)

332

In [102]:
smb18p = dict()
sma18p = dict()
for el in smb18:
    smb18p[el] = round(len(smb18[el])*100/655, 5)
for el in sma18:
    sma18p[el] = round(len(sma18[el])*100/332, 5)

In [103]:
# Example data

# Mean difference
mean_diff = {key: sma18p[key] - smb18p[key] for key in smb18p}

# Euclidean Distance
euclidean_distance = sum((sma18p[key] - smb18p[key])**2 for key in smb18p)**0.5

# Manhattan Distance
manhattan_distance = sum(abs(sma18p[key] - smb18p[key]) for key in smb18p)

# Cosine Similarity
dot_product = sum(sma18p[key] * smb18p[key] for key in smb18p)
magnitude_sma18p = sum(value**2 for value in sma18p.values())**0.5
magnitude_smb18p = sum(value**2 for value in smb18p.values())**0.5
cosine_similarity = dot_product / (magnitude_sma18p * magnitude_smb18p)

# Correlation Coefficient
import numpy as np
smb18p_values = np.array(list(smb18p.values()))
sma18p_values = np.array(list(sma18p.values()))
correlation_coefficient = np.corrcoef(smb18p_values, sma18p_values)[0, 1]

#mean_diff
correlation_coefficient

0.6043809588229936

### Timesplit and correlation ODOR

In [71]:
filtered_smelly_df['Earliest Date'] = pd.to_numeric(filtered_smelly_df['Earliest Date'], errors='coerce').astype('Int64')

In [74]:
smelly_after_1800 = filtered_smelly_df[filtered_smelly_df['Earliest Date'] > 1799]
smelly_before_1800 = filtered_smelly_df[filtered_smelly_df['Earliest Date'] <= 1799]
smelly_before_1800 = smelly_before_1800.reset_index()
smelly_after_1800 = smelly_after_1800.reset_index()

In [75]:
smelly_after_1800.head()

Unnamed: 0,level_0,index,File Name,Artist,Title,Iconography,Earliest Date,Latest Date,Genre,Material,Photo Archive,Image Credits,Details URL,Additional Information,Iconclass code,License,Description,Keywords,Language,detections
0,0,123,d32c384a-256c-db74-5d6b-032d586fe79a.jpg,Otto B. de Kat,Stilleven met kannen en een schaal met fruit,Pomander,1950,1949.0,painting,,https://rkd.nl/en/,https://images.rkd.nl/rkd/thumb/650x650/d32c38...,https://rkd.nl/explore/images/63054,,,,,"still life,ewer (vessel),coffeepot,pipe,dish (...",en,"fruit, carafe, pipe, drinking vessel, fig"
1,2,127,05b7974f-88a9-f82b-307f-6624952ff6c1.jpg,Otto B. de Kat,De koffiekan,Pomander,1972,1972.0,painting,,https://rkd.nl/en/,https://images.rkd.nl/rkd/thumb/650x650/05b797...,https://rkd.nl/explore/images/111515,,,,,"still life,coffeepot,dish (vessel for food)",en,coffeepot
2,4,135,ee6f2a4c-24b6-85fc-1878-921b951d754e.jpg,Otto B. de Kat,Stilleven met koffiepot,Pomander,1982,1982.0,painting,,https://rkd.nl/en/,https://images.rkd.nl/rkd/thumb/650x650/ee6f2a...,https://rkd.nl/explore/images/65510,,,,,"still life,coffeepot",en,"coffeepot, drinking vessel"
3,5,137,ec7b4dd4-b60c-6e63-102a-3f329cf68da5.jpg,Otto B. de Kat,Stilleven met koffiepot en Spaanse schotel,Pomander,1982,1982.0,painting,,https://rkd.nl/en/,https://images.rkd.nl/rkd/thumb/650x650/ec7b4d...,https://rkd.nl/explore/images/65505,,,,,"still life,coffeepot",en,"flower, pot, coffeepot, lemon"
4,6,140,2461b83e-5aeb-3bfd-c5d0-fba832154f4a.jpg,Lou Meyboom,"Still life with jug, vase, onion and thistle",Pomander,1896,1896.0,drawing,,https://rkd.nl/en/,https://images.rkd.nl/rkd/thumb/650x650/2461b8...,https://rkd.nl/explore/images/214004,,,,,"kitchen piece (still life),onion,coffeepot,vas...",en,"coffeepot, onion"


In [77]:
art_smelly_before_1800 = set(smelly_before_1800["File Name"])
art_smelly_after_1800 = set(smelly_after_1800["File Name"])

In [78]:
len(art_smelly_after_1800)

50

In [79]:
symb_art_smelly = set()
for t in depi_smelly_simu:
    for symb in depi_smelly_simu[t]:
        for art in depi_smelly_simu[t][symb]["artworks"]:
            symb_art_smelly.add(art)
len(symb_art_smelly)

478

In [80]:
art_smelly_before_1800 = set([art for art in art_smelly_before_1800 if art in symb_art_smelly])
len(art_smelly_before_1800)

439

In [81]:
art_smelly_after_1800 = set([art for art in art_smelly_after_1800 if art in symb_art_smelly])
len(art_smelly_after_1800)

38

In [82]:
ssmb18p = dict()
ssma18p = dict()
for t in depi_smelly_simu:
    for symb in depi_smelly_simu[t]:
        for ctx in depi_smelly_simu[t][symb]:
            if ctx != "artworks":
                for symb_meaning in depi_smelly_simu[t][symb][ctx]:
                    if symb_meaning not in ssmb18p:
                        ssmb18p[symb_meaning] = set()
                    if symb_meaning not in ssma18p:
                        ssma18p[symb_meaning] = set()
                    for art in depi_smelly_simu[t][symb]["artworks"]:
                        if art in art_smelly_before_1800:
                            ssmb18p[symb_meaning].add(art)
                        elif art in art_smelly_after_1800:
                            ssma18p[symb_meaning].add(art)
for el in ssmb18p:
    ssmb18p[el] = len(ssmb18p[el])*100/439
for el in ssma18p:
    ssma18p[el] = len(ssma18p[el])*100/38

In [83]:
# Example data

# Mean difference
mean_diff = {key: ssma18p[key] - ssmb18p[key] for key in ssmb18p}

# Euclidean Distance
euclidean_distance = sum((ssma18p[key] - ssmb18p[key])**2 for key in ssmb18p)**0.5

# Manhattan Distance
manhattan_distance = sum(abs(ssma18p[key] - ssmb18p[key]) for key in ssmb18p)

# Cosine Similarity
dot_product = sum(ssma18p[key] * ssmb18p[key] for key in ssmb18p)
magnitude_sma18p = sum(value**2 for value in ssma18p.values())**0.5
magnitude_smb18p = sum(value**2 for value in ssmb18p.values())**0.5
cosine_similarity = dot_product / (magnitude_sma18p * magnitude_smb18p)

# Correlation Coefficient
import numpy as np
smb18p_values = np.array(list(ssmb18p.values()))
sma18p_values = np.array(list(ssma18p.values()))
correlation_coefficient = np.corrcoef(smb18p_values, sma18p_values)[0, 1]

#mean_diff
correlation_coefficient


0.820024497113556

#### Correlation ODOR Flower Language

In [91]:
ssmb18p = dict()
ssma18p = dict()
for t in depi_smelly_simu:
    for symb in depi_smelly_simu[t]:
        for ctx in depi_smelly_simu[t][symb]:
            if "flowerLanguage" in ctx:
                for symb_meaning in depi_smelly_simu[t][symb][ctx]:
                    if symb_meaning not in ssmb18p:
                        ssmb18p[symb_meaning] = set()
                    if symb_meaning not in ssma18p:
                        ssma18p[symb_meaning] = set()
                    for art in depi_smelly_simu[t][symb]["artworks"]:
                        if art in art_smelly_before_1800:
                            ssmb18p[symb_meaning].add(art)
                        elif art in art_smelly_after_1800:
                            ssma18p[symb_meaning].add(art)

In [92]:
art_fl_b18 = set()
art_fl_a18 = set()
for el in ssmb18p:
    for art in ssmb18p[el]:
        art_fl_b18.add(art)
for el in ssma18p:
    for art in ssma18p[el]:
        art_fl_a18.add(art)
print(len(art_fl_b18))
print(len(art_fl_a18))

226
16


In [93]:
for el in ssmb18p:
    ssmb18p[el] = len(ssmb18p[el])*100/226
for el in ssma18p:
    ssma18p[el] = len(ssma18p[el])*100/16

In [95]:
# Example data

# Mean difference
mean_diff = {key: ssma18p[key] - ssmb18p[key] for key in ssmb18p}

# Euclidean Distance
euclidean_distance = sum((ssma18p[key] - ssmb18p[key])**2 for key in ssmb18p)**0.5

# Manhattan Distance
manhattan_distance = sum(abs(ssma18p[key] - ssmb18p[key]) for key in ssmb18p)

# Cosine Similarity
dot_product = sum(ssma18p[key] * ssmb18p[key] for key in ssmb18p)
magnitude_sma18p = sum(value**2 for value in ssma18p.values())**0.5
magnitude_smb18p = sum(value**2 for value in ssmb18p.values())**0.5
cosine_similarity = dot_product / (magnitude_sma18p * magnitude_smb18p)

# Correlation Coefficient
import numpy as np
smb18p_values = np.array(list(ssmb18p.values()))
sma18p_values = np.array(list(ssma18p.values()))
correlation_coefficient = np.corrcoef(smb18p_values, sma18p_values)[0, 1]

#mean_diff
correlation_coefficient


0.6198838691020041

### Increased perc in symbolic meanings flower language context Wikidata

In [104]:
df_before = pd.DataFrame(list(smb18p.items()), columns=['symbolic_meaning', 'before_1800'])
df_after = pd.DataFrame(list(sma18p.items()), columns=['symbolic_meaning', 'after_1800'])

# Merge DataFrames
df_combined = pd.merge(df_before, df_after, on='symbolic_meaning', how='outer').fillna(0)

In [106]:
# Make sure you run this after you have run the correlation FL in Wikidata
for i in range(len(df_combined["symbolic_meaning"])):
    if df_combined["before_1800"][i] < df_combined["after_1800"][i]:
        if df_combined["after_1800"][i] - df_combined["before_1800"][i] > 7:
            print(df_combined["symbolic_meaning"][i])
            print("increased perc")
            print(df_combined["after_1800"][i] - df_combined["before_1800"][i])
            print("percentage after 1800")
            print(df_combined["after_1800"][i])
            print("percentage before 1800")
            print(df_combined["before_1800"][i])
            print("***")

affection
increased perc
7.45563
percentage after 1800
12.95181
percentage before 1800
5.49618
***
comfort
increased perc
7.45563
percentage after 1800
12.95181
percentage before 1800
5.49618
***
beauty
increased perc
12.34894
percentage after 1800
23.49398
percentage before 1800
11.14504
***
love
increased perc
12.34894
percentage after 1800
23.49398
percentage before 1800
11.14504
***
gallantry
increased perc
8.895430000000001
percentage after 1800
19.27711
percentage before 1800
10.38168
***
