In [8]:
import os
import sys

# Importing utils file in a notebook requires to add the directory in the 
# sys path before importing it.
utilsModulePath = os.path.abspath("../utils")
if (utilsModulePath not in sys.path):
    sys.path.append(utilsModulePath)
# Now we can import utils
import utils

# BCWD

Similarity data for the [Breast Cancer Wisconsin Data](https://archive.ics.uci.edu/dataset/15/breast+cancer+wisconsin+original). We create only a dataset where all the attributes are employed.

In [9]:
import json
# Opening JSON file
with open('./bcwd.json') as f:
    dataFile = json.load(f)
data = dataFile["data"]

In [10]:
import cbrkit
casebase=dict(enumerate(data))

In [13]:
columnNames = [
"ClumpThickness",
"UniformityCellSize",
"UniformityCellShape",
"MarginalAdhesion",
"SingleEpithelialCellSize",
"BareNuclei",
"BlandChromatin",
"NormalNucleoli",
"Mitoses"]

attributes={ att: cbrkit.sim.numbers.linear_interval(1,14) for att in columnNames }

In [15]:
simFunction = cbrkit.sim.attribute_value(
    attributes=attributes,
    aggregator=cbrkit.sim.aggregator(pooling="mean")
)

In [16]:
similarityData = utils.compute_similarity_data(casebase, simFunction)

In [17]:
localSimDescription = {}
value = dict(simFunction = "RangeSimilarity", weight=1/len(columnNames), description= "Similarity normalized in range (1,14)")
for att in columnNames:
    localSimDescription[att]=value

# Wrap with similarity description
similarityDataComplete = {
    "similarityDescription": {
        "globalSim": {
            "simFunction": "Weighted average"
        },
        "localSim": localSimDescription
    }
}
similarityDataComplete["similarities"] = similarityData

{'ClumpThickness': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'UniformityCellSize': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'UniformityCellShape': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'MarginalAdhesion': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'SingleEpithelialCellSize': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'BareNuclei': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarity normalized in range (1,14)'},
 'BlandChromatin': {'simFunction': 'RangeSimilarity',
  'weight': 0.1111111111111111,
  'description': 'Similarit

In [10]:
utils.exportToJson(similarityDataComplete, "AllAttributes2.json")