In [1]:
import os
import sys

# Importing utils file in a notebook requires to add the directory in the 
# sys path before importing it.
utilsModulePath = os.path.abspath("../utils")
if (utilsModulePath not in sys.path):
    sys.path.append(utilsModulePath)
# Now we can import utils
import utils

# Travel Agent Case Base

A toy dataset about travels  available at <https://www.cs.auckland.ac.nz/research/groups/ai-cbr/cases.html>

In [2]:
import json
# Opening JSON file
with open('./travel.json') as f:
    dataFile = json.load(f)
data = dataFile["data"]

In [3]:
import cbrkit
casebase=dict(enumerate(data))

## All attributes

In [4]:
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

simFunction = cbrkit.sim.attribute_value(
    attributes= {
        "HolidayType": cbrkit.sim.generic.equality(),
        "NumberOfPersons": cbrkit.sim.numbers.linear_interval(1,12),
        "Region": cbrkit.sim.generic.equality(),
        "Duration": cbrkit.sim.numbers.linear_interval(3,21),
        "Month": utils.circular_range_similarity(months)
    },
    aggregator=cbrkit.sim.aggregator(pooling="mean")
)

In [5]:
similarityData = utils.compute_similarity_data(casebase, simFunction, idAttribute="caseId")

In [6]:
# Wrap with similarity description
similarityDataComplete = {
    "similarityDescription": {
    "globalSim": {
      "simFunction": "Weighted average"
    },
    "localSim": {
      "HolidayType": {
        "simFunction": "equals",
        "weight": 1/5,
        "description": "String equality"
      },
      "NumberOfPersons": {
        "simFunction": "RangeSimilarity",
        "weight": 1/5,
        "description": "Similarity normalized in range (1,12)"
      },
      "Region": {
        "simFunction": "equals",
        "weight": 1/5,
        "description": "String equality"
      },
      "Duration": {
        "simFunction": "RangeSimilarity",
        "weight": 1/5,
        "description": "Similarity normalized in range (3,21)"
      },
      "Month": {
        "simFunction": "CircularRangeSimilarity",
        "weight": 1/5,
        "description": "Similarity between months in range (January...December)"
      },
    }
  }
}
similarityDataComplete["similarities"] = similarityData

In [7]:
utils.exportToJson(similarityDataComplete, "allAttributes.json")

## Months

The following pair of similarity data files are created to test how SimViz can be employed to debug a similarity function. 

First, we create a similarity data file with a function that uses the Month attribute and the similarity function does not take into account the cyclical nature of months. For example, with this function, January and December are very different so they have a low similarity value

In [8]:

simFunction = cbrkit.sim.attribute_value(
    attributes= {
        "Month": utils.nominal_range_similarity(months)
    },
    aggregator=cbrkit.sim.aggregator(pooling="mean")
)

In [9]:
similarityData = utils.compute_similarity_data(casebase, simFunction, idAttribute="caseId")

In [10]:
# Wrap with similarity description
similarityDataComplete = {
    "similarityDescription": {
    "globalSim": {
      "simFunction": "Weighted average"
    },
    "localSim": {
        "Month": {
          "simFunction": "NominalRangeSimilarity",
          "weight": 1.0,
          "description": "Similarity between months in range (January...December)"
        },
    }
  }
}
similarityDataComplete["similarities"] = similarityData

In [11]:
utils.exportToJson(similarityDataComplete, "month1.json")

Now, we create a similarity data file with a function that uses the Month attribute and the similarity function **does** take into account the cyclical nature of months. For example, with this function, January and December are very similar so they have a high similarity value

In [12]:
simFunction = cbrkit.sim.attribute_value(
    attributes= {
        "Month": utils.circular_range_similarity(months)
    },
    aggregator=cbrkit.sim.aggregator(pooling="mean")
)

In [13]:
similarityData = utils.compute_similarity_data(casebase, simFunction, idAttribute="caseId")

In [14]:
# Wrap with similarity description
similarityDataComplete = {
    "similarityDescription": {
    "globalSim": {
      "simFunction": "Weighted average"
    },
    "localSim": {
        "Month": {
          "simFunction": "CircularRangeSimilarity",
          "weight": 1.0,
          "description": "Similarity between months in range (January...December)"
        },
    }
  }
}
similarityDataComplete["similarities"] = similarityData

In [15]:
utils.exportToJson(similarityDataComplete, "month2.json")