# DMH artwork

A set of N artworks from Design Museum Helsinki. The dataset was created in the contexto of the [SPICE project](https://spice-h2020.eu/)

In [1]:
import json
  
# Opening JSON file
with open('./DMH_artworks.json') as f:
    data = json.load(f)
data

[{'_id': '634f19096c70ef5cf152f237',
  'tittle': 'Scandia',
  'Object': 'CUTLERY',
  'Special name': '*',
  'id': '44171',
  'author': 'Franck, Kaj',
  'Production date': '1952',
  'Collection': 'KÃ¤yttÃ¶kokoelma',
  'Manufacturer': 'Hackman Sorsakoski',
  'Dimension in cm': 'fork: 17.5 cm (length) x 0.15cm (thickness)  knife: 18.6 cm (length) x 0.3/0.15 cm (thickness)  spoon: 18 cm (length)',
  'Weight in kg': '*',
  'Materials': ['metal'],
  'Colour': ['metal'],
  '_datasetid': '0daa0287-d7f4-4f03-a068-95f43afcc347',
  '_timestamp': 1668599056,
  '_timestamp_year': 2022,
  '_timestamp_month': 11,
  '_timestamp_day': 16,
  '_timestamp_hour': 11,
  '_timestamp_minute': 44,
  '_timestamp_second': 16,
  '_updated': 1.0,
  'year': 1952.0,
  'ColourRGB': [[109, 114, 120]],
  'image': 'https://i.postimg.cc/Zqn1M35m/44171.png',
  'Object group': 'cutlery',
  'Color': [{'colorName': 'metal', 'rgb': [109, 114, 120]}]},
 {'_id': '634f1908b7693f159a62e2f6',
  'tittle': 'Savonia',
  'Object': 'CU

In [2]:
itemIds = [item["_id"] for item in data ]

In [3]:
from colormath.color_objects import LabColor, sRGBColor
from colormath.color_conversions import convert_color
from colormath.color_diff import delta_e_cie1976, delta_e_cie1994, delta_e_cie2000, delta_e_cmc

import numpy as np
def patch_asscalar(a):
    return a.item()

setattr(np, "asscalar", patch_asscalar)

class EqualSim:
    def __init_(self):
        pass
    
    def compute(self,att1, att2):
        return 1.0 if att1==att2 else 0.0
    
class ColorSim:   
    def __init__(self, distanceFunction= delta_e_cie2000):
        self.__distanceFunction = distanceFunction
        
    def compute(self,att1, att2):
        # Att1 and Att2 must be objects that contain a rgb field (a lists with 3 elements (RGB) )
        if ("rgb" in att1) and (len(att1["rgb"])== 3) and ("rgb" in att2) and (len(att2["rgb"])== 3):
            rgb1 = sRGBColor(att1["rgb"][0], att1["rgb"][1], att1["rgb"][2], is_upscaled=True)
            lab1 = convert_color(rgb1,LabColor)

            rgb2 = sRGBColor(att2["rgb"][0], att2["rgb"][1], att2["rgb"][2], is_upscaled=True)
            lab2 = convert_color(rgb2,LabColor)
    
            # Apply distance function on Lab colors. delta functions return a value [0, 100]
            # so we normalize to [0.0, 1.0]
            deltaValue = self.__distanceFunction(lab1, lab2)/100
            # Clamp values greater than 1.0
            return 1.0 if deltaValue>1.0 else deltaValue
                
        else:
            return 0.0

        
class ListSim:
    def __init__(self, similarityFunction, aggFunction):
        self.__similarityFunction = similarityFunction
        self.__aggFunction = aggFunction

    def compute(self,att1, att2):
        if (type (att1) is not list) or len(att1) == 0:
            return 0.0
        if (type (att2) is not list) or len(att2) == 0:
            return 0.0
        partialSims = []
        for i in range(len(att1)):
            for j in range(len(att2)):
                simValue = self.__similarityFunction.compute(att1[i], att2[j])
                partialSims.append( simValue )
        return 1-self.__aggFunction.aggregate(partialSims)      
    
    
class AggregationFunction:
    def __init__(self, aggFunction=max):
        self.__aggFunction = aggFunction
        
    def aggregate(self,valueList):
        return self.__aggFunction(valueList)
    


        

In [4]:
def computeLocalSimilarityOnAttribute(itemA, itemB, attName, aLocalSimilarityFunction):
    if (attName in itemA) and (attName in itemB):
        return aLocalSimilarityFunction.compute(itemA[attName], itemB[attName])
    else:
        raise Exception("Attribute not found: "+attName) 


def weightedAverage(values, weights):
    return np.average(values, weights=weights)

def computeSimilarity(simDescription, itemIds, localSimFunctions, globalAggFunction):
    size = len(itemIds)
    simData = []
    for i in range(size):
        for j in range (i+1, size):
            itemA = data[i]
            itemB = data[j]
            globalData = dict(id1 = itemA["_id"], id2=itemB["_id"])
            localData = {}
            localValues = []
            localWeights = []
            for localAtt in localSimFunctions:
                simValue = computeLocalSimilarityOnAttribute(itemA, itemB, localAtt["attName"], localAtt["localSimFunction"])
                localData[localAtt["attName"]] = simValue
                localValues.append(simValue)
                localWeights.append(localAtt["weight"])
            sim = globalAggFunction(localValues, localWeights)
            globalData["value"] = {"global": sim, "local": localData}
            simData.append(globalData)
    dataFile = dict(
        similarityDescription = simDescription,
        similarityData = simData
    )
    return dataFile


## Color + author (0.5 - 0.5)

In [5]:
aSimilarityFunction = ColorSim()
anAggFunction = AggregationFunction()
theColorSimFunction = ListSim(aSimilarityFunction, anAggFunction)

localSimFunctions = [ dict(attName="Color", localSimFunction=theColorSimFunction, weight=0.5), 
                      dict(attName="author", localSimFunction=EqualSim(), weight=0.5)]

simDescription = dict(
    globalSim= dict(
        simFunction="Weighted average"
    ),
    localSim = dict(
        Color = dict(
            simFunction = "max(delta_e_cie2000)",
            weight = 0.5
        ),
        author = dict(
            simFunction = "equals",
            weight = 0.5
        )
    )
)

dataFile = computeSimilarity(simDescription, itemIds, localSimFunctions, weightedAverage)
with open("Author50Color50.json", "w") as fp:
    json.dump(dataFile, fp,  indent=2)

## Color + author (0.8 - 0.2)

In [6]:
simDescription = dict(
    globalSim= dict(
        simFunction="Weighted average"
    ),
    localSim = dict(
        Color = dict(
            simFunction = "max(delta_e_cie2000)",
            weight = 0.8
        ),
        author = dict(
            simFunction = "equals",
            weight = 0.2
        )
    )
)

localSimFunctions = [ dict(attName="Color", localSimFunction=theColorSimFunction, weight=0.8), 
                      dict(attName="author", localSimFunction=EqualSim(), weight=0.2)]

dataFile = computeSimilarity(simDescription, itemIds, localSimFunctions, weightedAverage)
with open("Author80Color20.json", "w") as fp:
    json.dump(dataFile, fp,  indent=2)

## max(Color)

The similarity is the maximum color similarity between colors in artworks

In [None]:
localSimFunctions = [ dict(attName="Color", localSimFunction=theColorSimFunction, weight=1.0)]
simDescription = dict(
    globalSim= dict(
        simFunction="Weighted average"
    ),
    localSim = dict(
        Color = dict(
            simFunction = "max(delta_e_cie2000)",
            weight = 1.0
        )
    )
)
dataFile = computeSimilarity(simDescription, itemIds, localSimFunctions, weightedAverage)
with open("simMaxColor.json", "w") as fp:
    json.dump(dataFile, fp,  indent=2)

## avg(Color)

The similarity is the average color similarity between colors in artworks

In [None]:
aSimilarityFunction = ColorSim()
anAggFunction = AggregationFunction(np.average)
theColorSimFunction = ListSim(aSimilarityFunction, anAggFunction)

localSimFunctions = [ dict(attName="Color", localSimFunction=theColorSimFunction, weight=1.0)]
simDescription = dict(
    globalSim= dict(
        simFunction="Weighted average"
    ),
    localSim = dict(
        Color = dict(
            simFunction = "avg(delta_e_cie2000)",
            weight = 1.0
        )
    )
)
dataFile = computeSimilarity(simDescription, itemIds, localSimFunctions, weightedAverage)
with open("simAvgColor.json", "w") as fp:
    json.dump(dataFile, fp,  indent=2)


## range(decades)

The similarity is computed using the year, according to the similarity between the decades that the year belong to

In [None]:
class DecadeSimilarity:
    def __init__(self, minYear, maxYear):
        self.minDecade = self.convertYearToDecade(minYear)
        self.maxDecade = self.convertYearToDecade(maxYear)
        self.decadeRange = self.maxDecade - self.minDecade
    
    def compute(self,year1, year2):
        if (year1 is not None) and (year2 is not None):
            decade1 = self.convertYearToDecade(year1)
            decade2 = self.convertYearToDecade(year2)

            normalizeDecade1 = (decade1 - self.minDecade) / self.decadeRange
            normalizeDecade2 = (decade2 - self.minDecade) / self.decadeRange
            return 1-abs(normalizeDecade2 - normalizeDecade1)
        else:
            return 0.0
    
    def convertYearToDecade(self,year):
        return int((year-1)/10) * 10
    


In [None]:
years = [item["year"] for item in data if item["year"] is not None]

    
decadeSimilarityFunction = DecadeSimilarity(min(years), max(years))
localSimFunctions = [ dict(attName="year", localSimFunction=decadeSimilarityFunction, weight=1.0)]
simDescription = dict(
    globalSim= dict(
        simFunction="Weighted average"
    ),
    localSim = dict(
        year = dict(
            simFunction = "Range(Decades)",
            weight = 1.0
        )
    )
)

dataFile = computeSimilarity(simDescription, itemIds, localSimFunctions, weightedAverage)
with open("simDecades.json", "w") as fp:
    json.dump(dataFile, fp,  indent=2)
