In [2]:
import os
import sys

# Importing utils file in a notebook requires to add the directory in the 
# sys path before importing it.
utilsModulePath = os.path.abspath("../utils")
if (utilsModulePath not in sys.path):
    sys.path.append(utilsModulePath)
# Now we can import utils
import utils

# Cars

1k sample of [Used cars dataset](https://www.kaggle.com/datasets/austinreese/craigslist-carstrucks-data). Some attributes from the original were removed

In [4]:
import json
# Opening JSON file
with open('./cars-1k.json') as f:
    dataFile = json.load(f)
data = dataFile["data"]

In [5]:
import cbrkit
casebase=dict(enumerate(data))

## Year, color, manufacturer and condition

In [6]:
simFunction = cbrkit.sim.attribute_value(
    attributes={
        "year": cbrkit.sim.numbers.linear_interval(1950,2021),
        "paint_color": utils.color_similarity(),
        "manufacturer": cbrkit.sim.strings.levenshtein(),
        "condition": utils.nominal_range_similarity(['excellent','new','like new','good','fair','salvage'])
    },
    aggregator=cbrkit.sim.aggregator(pooling="mean"),
)

In [7]:
similarityData = utils.compute_similarity_data(casebase, simFunction)

In [8]:
# Wrap with similarity description
similarityDataComplete = {
    "similarityDescription": {
        "globalSim": {
            "simFunction": "Weighted average"
        },
        "localSim": {
            "paint_color": {
                "simFunction": "max(delta_e2000)",
                "weight": 0.25,
                "description": "Perceived similarity between colors using [Delta E 2000 color difference function](http://www.colorwiki.com/wiki/Delta_E:_The_Color_Difference#dE2000), which is a standard measurement that quantifies the difference between two colors that appear on a screen"
            },
            "manufacturer": {
                "simFunction": "Levensthein",
                "weight": 0.25,
                "description": "String equality using Levensthein"
            },
            "year": {
                "simFunction": "RangeSimilarity",
                "weight": 0.25,
                "description": "Similarity normalized in range (1950, 2021)"
            },
            "condition": {
                "simFunction": "NominalRangeSimilarity",
                "weight": 0.25,
                "description": "Similarity in an ordered cualitative range (excellent...salvage)"
            }
        }
    }
}
similarityDataComplete["similarities"] = similarityData

In [10]:
utils.exportToJson(similarityDataComplete, "cars_similarity.json")