# Cluster Dataset "Mobile Phone Cases"

## Preparation

In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import adjusted_rand_score, silhouette_score

from src import akeneo, akeneo_clustering as ac, clustering

In [2]:
cache = akeneo.create_cache_from_env()

Load Products:

In [3]:
products = ac.parse_products(
    cache,
    product_family="mobile_phone_cases",
    attribute_types=ac.TYPES_NUMERICAL + ac.TYPES_CATEGORICAL + ac.TYPES_MULTI,
)

In [4]:
products_df = pd.DataFrame(products)
products_df

Unnamed: 0,__id__,__family__,__categories__,icecat_8156,icecat_8411,icecat_8778,icecat_27575_fixed,icecat_4860,icecat_8006,icecat_6767,icecat_898_fixed,icecat_94,icecat_1464,icecat_1649,icecat_1650,icecat_26241,icecat_9689,icecat_15767,icecat_38673,icecat_4463_fixed
0,104889,mobile_phone_cases,[s20],348496968,348496965,367987544,not_supported,False,False,0.0,,,,,,,,,,
1,MP-104927,mobile_phone_cases,[s20],348496968,348496965,367987544,not_supported,False,False,0.0,"{thermoplastic_polyurethane_tpu, silicone}",,,,,,,,,
2,MP-104925,mobile_phone_cases,[s20],348496968,348496965,367987544,not_supported,False,False,0.0,"{thermoplastic_polyurethane_tpu, silicone}",,,,,,,,,
3,MP-108422,mobile_phone_cases,[s20],348496968,348496965,367987544,not_supported,False,False,0.0,"{thermoplastic_polyurethane_tpu, silicone}",,,,,,,,,
4,MP-104926,mobile_phone_cases,[s20],348496968,348496965,367987544,not_supported,False,False,0.0,"{thermoplastic_polyurethane_tpu, silicone}",,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,0237,mobile_phone_cases,[s20_ultra],348496968,348496965,367987544,not_supported,,,1.0,"{thermoplastic_polyurethane_tpu, tempered_glass}",,,,,,,True,True,
76,0375,mobile_phone_cases,[s22],348496968,,,,,,,,,,,,,,True,,
77,0376,mobile_phone_cases,[s22_ultra],348496968,,,,,,,,,,,,,,True,,
78,0371,mobile_phone_cases,[s22],348496968,,,,,,,"{thermoplastic_polyurethane_tpu, polycarbonate...",,,,,,,True,True,


Extract wanted labels from categories:

In [5]:
labels_want_series = products_df["__categories__"].map(lambda x: x[0].split("_")[0]).to_numpy()
k_series = 3
pd.Series(labels_want_series).value_counts()

s20    45
s21    31
s22     4
dtype: int64

In [6]:
labels_want_models = products_df["__categories__"].map(lambda x: x[0]).to_numpy()
k_models = 11
pd.Series(labels_want_models).value_counts()

s20          18
s20_plus     14
s21          11
s20_ultra    11
s21_plus     10
s21_fe        5
s21_ultra     5
s20_fe        2
s22           2
s22_ultra     1
s22_plus      1
dtype: int64

Loading attributes:

In [7]:
attr_df = pd.DataFrame(cache.attributes)
attr_df = attr_df[attr_df["code"].isin(products_df.columns)]
attr_df

Unnamed: 0,code,labels,type,localizable,scopable,unique,group,group_labels,sort_order,allowed_extensions,...,max_file_size,metric_family,minimum_input_length,negative_allowed,number_min,number_max,reference_data_name,validation_rule,validation_regexp,wysiwyg_enabled
39,icecat_1464,"{'de_DE': 'Höhe', 'en_GB': 'Height', 'en_US': ...",AttributeType.METRIC,True,True,False,weight_and_dimensions,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei...",1464,[],...,,Length,,False,,,,,,
53,icecat_15767,"{'de_DE': 'Einfache Anwendung', 'en_GB': 'Easy...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",15767,[],...,,,,,,,,,,
60,icecat_1649,"{'de_DE': 'Breite', 'en_GB': 'Width', 'en_US':...",AttributeType.METRIC,True,True,False,weight_and_dimensions,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei...",1649,[],...,,Length,,False,,,,,,
61,icecat_1650,"{'de_DE': 'Tiefe', 'en_GB': 'Depth', 'en_US': ...",AttributeType.METRIC,True,True,False,weight_and_dimensions,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei...",1650,[],...,,Length,,False,,,,,,
137,icecat_26241,"{'de_DE': 'Kartentasche', 'en_GB': 'Card pocke...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",26241,[],...,,,,,,,,,,
144,icecat_27575_fixed,"{'de_DE': 'Verschluss', 'en_GB': 'Closure', 'e...",AttributeType.SELECT_SINGLE,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",27575,[],...,,,,,,,,,,
233,icecat_38673,"{'de_DE': 'Fettabweisende Beschichtung', 'en_G...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",38673,[],...,,,,,,,,,,
339,icecat_4463_fixed,"{'de_DE': 'Verpackungsart', 'en_GB': 'Package ...",AttributeType.SELECT_SINGLE,True,True,False,packaging_data,"{'en_US': 'Packaging data', 'en_GB': 'Packagin...",4463,[],...,,,,,,,,,,
349,icecat_4860,"{'de_DE': 'Desktop-Ständer', 'en_GB': 'Desktop...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",4860,[],...,,,,,,,,,,
365,icecat_6767,"{'de_DE': 'Maximale Bildschirmgröße', 'en_GB':...",AttributeType.METRIC,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",6767,[],...,,Angle,,False,,,,,,


Extract required attributes of this family:

In [8]:
attr_codes_req = cache.families[0].attribute_requirements["default"]
attr_codes_req = attr_df[attr_df["code"].isin(attr_codes_req)]["code"].to_list()
attr_codes_req

['icecat_6767', 'icecat_8156', 'icecat_8778', 'icecat_898_fixed']

In [9]:
attr_df[attr_df["code"].isin(attr_codes_req)]

Unnamed: 0,code,labels,type,localizable,scopable,unique,group,group_labels,sort_order,allowed_extensions,...,max_file_size,metric_family,minimum_input_length,negative_allowed,number_min,number_max,reference_data_name,validation_rule,validation_regexp,wysiwyg_enabled
365,icecat_6767,"{'de_DE': 'Maximale Bildschirmgröße', 'en_GB':...",AttributeType.METRIC,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",6767,[],...,,Angle,,False,,,,,,
405,icecat_8156,"{'de_DE': 'Markenkompatibilität', 'en_GB': 'Br...",AttributeType.SELECT_SINGLE,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",8156,[],...,,,,,,,,,,
422,icecat_8778,"{'de_DE': 'Etui-Typ', 'en_GB': 'Case type', 'e...",AttributeType.SELECT_SINGLE,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",8778,[],...,,,,,,,,,,
426,icecat_898_fixed,"{'de_DE': 'Material', 'en_GB': 'Material', 'en...",AttributeType.SELECT_MULTI,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",898,[],...,,,,,,,,,,


Structure required attributes by their type:

In [10]:
attr_codes_req_num = ["icecat_6767"]
attr_codes_req_cat = ["icecat_8156", "icecat_8778"]
attr_codes_req_mul = ["icecat_898_fixed"]

Get all attributes for all types:

In [11]:
attr_codes_all = attr_df["code"].to_list()
attr_codes_all_num = attr_df[attr_df["type"].isin(ac.TYPES_NUMERICAL)]["code"].to_list()
attr_codes_all_cat = attr_df[attr_df["type"].isin(ac.TYPES_CATEGORICAL)]["code"].to_list()
attr_codes_all_mul = attr_df[attr_df["type"].isin(ac.TYPES_MULTI)]["code"].to_list()

pd.DataFrame({
    "attr_codes_all": [attr_codes_all],
    "attr_codes_all_num": [attr_codes_all_num],
    "attr_codes_all_cat": [attr_codes_all_cat],
    "attr_codes_all_mul": [attr_codes_all_mul],
}).transpose()

Unnamed: 0,0
attr_codes_all,"[icecat_1464, icecat_15767, icecat_1649, iceca..."
attr_codes_all_num,"[icecat_1464, icecat_1649, icecat_1650, icecat..."
attr_codes_all_cat,"[icecat_15767, icecat_26241, icecat_27575_fixe..."
attr_codes_all_mul,[icecat_898_fixed]


## Clustering

Function to generate all wanted metrics in a simple run:

In [12]:
def cluster_dataset(dataset, random_state, weights = None) -> clustering.BisectingKMeans:
    if weights is None:
        return clustering.BisectingKMeans(dataset, ac.Centroid, random_state=random_state)

    cls = ac.create_centroid_cls_with_weights(weights)
    return clustering.BisectingKMeans(dataset, cls, random_state=random_state)


def calc_metrics(attr_codes, weights = None):
    dataset = ac.dataset_from_records(products_df[attr_codes].to_dict("records"))
    proximity_matrix = ac.calc_proximity_matrix(dataset, weights)

    bi_kmeans = cluster_dataset(dataset, 0, weights)
    labels_series = bi_kmeans.labels_flat(k_series)
    labels_models = bi_kmeans.labels_flat(k_models)

    stabilities_series = []
    stabilities_models = []
    for i in range(1, 10):
        bi_kmeans_2 = cluster_dataset(dataset, i, weights)
        stabilities_series.append(
            adjusted_rand_score(labels_series, bi_kmeans_2.labels_flat(k_series))
        )
        stabilities_models.append(
            adjusted_rand_score(labels_models, bi_kmeans_2.labels_flat(k_models))
        )

    return {
        ("Stabilität", "Serie"): np.array(stabilities_series).mean(),
        ("Stabilität", "Model"): np.array(stabilities_models).mean(),
        ("Qualität", "Serie"): silhouette_score(proximity_matrix, labels_series, metric="precomputed"),
        ("Qualität", "Model"): silhouette_score(proximity_matrix, labels_models, metric="precomputed"),
        ("Korrektheit", "Serie"): adjusted_rand_score(labels_want_series, labels_series),
        ("Korrektheit", "Model"): adjusted_rand_score(labels_want_models, labels_models),
    }

Metrics for all clusterings by all/required attributes and the types:

In [13]:
cases = [
    (("numerical", "all"), attr_codes_all_num),
    (("numerical", "required"), attr_codes_req_num),
    (("categorical", "all"), attr_codes_all_cat),
    (("categorical", "required"), attr_codes_req_cat),
    (("multi", "all"), attr_codes_all_mul),
    (("num+cat", "all"), attr_codes_all_num+attr_codes_all_cat),
    (("num+cat", "required"), attr_codes_req_num+attr_codes_req_cat),
    (("num+cat+mul", "all"), attr_codes_all),
    (("num+cat+mul", "required"), attr_codes_req),
]
result = {}
for col_name, attr_codes in cases:
    result[col_name] = calc_metrics(attr_codes)
pd.DataFrame(result)

Unnamed: 0_level_0,Unnamed: 1_level_0,numerical,numerical,categorical,categorical,multi,num+cat,num+cat,num+cat+mul,num+cat+mul
Unnamed: 0_level_1,Unnamed: 1_level_1,all,required,all,required,all,all,required,all,required
Stabilität,Serie,0.954684,0.905867,1.0,0.850738,0.752337,1.0,0.927918,0.993787,0.940299
Stabilität,Model,0.988476,0.907324,0.927454,0.572355,0.99886,0.977562,0.954156,0.92426,0.906319
Qualität,Serie,0.616381,0.81601,0.350063,0.95625,0.637083,0.422325,0.683163,0.414713,0.351281
Qualität,Model,0.844395,0.7,0.887166,-0.6375,0.9625,0.647837,0.971481,0.443559,0.464924
Korrektheit,Serie,0.146734,0.096212,0.083232,0.119365,0.021058,0.362091,0.096212,0.264894,0.019042
Korrektheit,Model,0.448365,0.609359,0.123571,0.037584,0.042547,0.30036,0.551295,0.309848,0.202768


Try different filters for attributes with many null values:

In [14]:
col_mapping = {}
for _, code, labels in attr_df[["code", "labels"]].itertuples():
    label = labels["en_US"]
    col_mapping[code] = f"{label} – {code}"

products_df.rename(col_mapping, axis=1).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 20 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   __id__                                80 non-null     object 
 1   __family__                            80 non-null     object 
 2   __categories__                        80 non-null     object 
 3   Brand compatibility – icecat_8156     79 non-null     object 
 4   Surface coloration – icecat_8411      76 non-null     object 
 5   Case type – icecat_8778               76 non-null     object 
 6   Closure – icecat_27575_fixed          27 non-null     object 
 7   Desktop stand – icecat_4860           58 non-null     object 
 8   Built-in battery – icecat_8006        51 non-null     object 
 9   Maximum screen size – icecat_6767     76 non-null     float64
 10  Material – icecat_898_fixed           57 non-null     object 
 11  Weight – icecat_94   

In [15]:
attr_codes_filled = ["icecat_8156", "icecat_8411", "icecat_8778", "icecat_6767"]

attr_codes_null = ["icecat_26241", "icecat_9689", "icecat_15767", "icecat_38673", "icecat_4463_fixed"]
attr_codes_not_null = list(filter(lambda a: a not in attr_codes_null, attr_codes_all))

pd.DataFrame({
    "all attributes": calc_metrics(attr_codes_all),
    "only filled": calc_metrics(attr_codes_filled),
    "not many null": calc_metrics(attr_codes_not_null),
})

Unnamed: 0,Unnamed: 1,all attributes,only filled,not many null
Stabilität,Serie,0.993787,0.927918,1.0
Stabilität,Model,0.92426,0.946659,0.938388
Qualität,Serie,0.414713,0.665771,0.480697
Qualität,Model,0.443559,0.310053,0.427119
Korrektheit,Serie,0.264894,0.096212,0.216965
Korrektheit,Model,0.309848,0.535888,0.351921


In [16]:
result = {}
for attr_code in attr_codes_all:
    result[attr_code] = calc_metrics([attr_code])
pd.DataFrame(result)

Unnamed: 0,Unnamed: 1,icecat_1464,icecat_15767,icecat_1649,icecat_1650,icecat_26241,icecat_27575_fixed,icecat_38673,icecat_4463_fixed,icecat_4860,icecat_6767,icecat_8006,icecat_8156,icecat_8411,icecat_8778,icecat_898_fixed,icecat_94,icecat_9689
Stabilität,Serie,1.0,0.952078,1.0,1.0,0.087725,0.971416,0.954215,0.031127,0.953795,0.905867,0.958998,0.25752,0.687743,0.850738,0.752337,1.0,0.087725
Stabilität,Model,1.0,0.594179,1.0,0.99578,0.068348,0.839247,0.458053,0.104256,0.610418,0.907324,0.752181,0.092547,0.379409,0.790159,0.99886,1.0,0.068348
Qualität,Serie,0.919147,0.9,0.935977,0.936639,0.0,0.9875,0.925,-0.9625,1.0,0.81601,0.6375,0.0,-0.875,0.975,0.637083,0.951427,0.0
Qualität,Model,0.949167,0.0,0.932986,0.945833,0.0,0.6625,0.0,0.0,0.3125,0.7,0.6375,0.0,0.05,0.05,0.9625,0.952618,0.0
Korrektheit,Serie,0.041013,0.123915,0.035758,0.041013,-0.001674,-0.000463,0.046351,0.046928,0.307123,0.096212,0.185114,0.046928,0.157116,0.119365,0.021058,0.041013,-0.001674
Korrektheit,Model,0.020114,0.019389,0.022125,0.01526,0.009354,-0.019393,0.002057,0.023037,0.087767,0.609359,0.073308,0.018476,0.037742,0.029927,0.042547,0.011582,0.009354


In [17]:
attr_codes_impact = ["icecat_4860", "icecat_6767", "icecat_15767", "icecat_8778"]

In [18]:
pd.DataFrame(result)[attr_codes_impact].rename(col_mapping, axis=1)

Unnamed: 0,Unnamed: 1,Desktop stand – icecat_4860,Maximum screen size – icecat_6767,Easy to apply – icecat_15767,Case type – icecat_8778
Stabilität,Serie,0.953795,0.905867,0.952078,0.850738
Stabilität,Model,0.610418,0.907324,0.594179,0.790159
Qualität,Serie,1.0,0.81601,0.9,0.975
Qualität,Model,0.3125,0.7,0.0,0.05
Korrektheit,Serie,0.307123,0.096212,0.123915,0.119365
Korrektheit,Model,0.087767,0.609359,0.019389,0.029927


In [19]:
attr_df[attr_df["code"].isin(attr_codes_impact)]

Unnamed: 0,code,labels,type,localizable,scopable,unique,group,group_labels,sort_order,allowed_extensions,...,max_file_size,metric_family,minimum_input_length,negative_allowed,number_min,number_max,reference_data_name,validation_rule,validation_regexp,wysiwyg_enabled
53,icecat_15767,"{'de_DE': 'Einfache Anwendung', 'en_GB': 'Easy...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",15767,[],...,,,,,,,,,,
349,icecat_4860,"{'de_DE': 'Desktop-Ständer', 'en_GB': 'Desktop...",AttributeType.BOOL,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",4860,[],...,,,,,,,,,,
365,icecat_6767,"{'de_DE': 'Maximale Bildschirmgröße', 'en_GB':...",AttributeType.METRIC,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",6767,[],...,,Angle,,False,,,,,,
422,icecat_8778,"{'de_DE': 'Etui-Typ', 'en_GB': 'Case type', 'e...",AttributeType.SELECT_SINGLE,True,True,False,features,"{'en_US': 'Features', 'en_GB': 'Features', 'de...",8778,[],...,,,,,,,,,,


In [20]:
pd.DataFrame({
    "all attributes": calc_metrics(attr_codes_all),
    "high impact": calc_metrics(attr_codes_impact),
    "highest impact": calc_metrics(["icecat_6767", "icecat_4860"]),
})

Unnamed: 0,Unnamed: 1,all attributes,high impact,highest impact
Stabilität,Serie,0.993787,0.657696,0.803991
Stabilität,Model,0.92426,0.853056,0.982534
Qualität,Serie,0.414713,0.420927,0.57016
Qualität,Model,0.443559,0.74105,0.838681
Korrektheit,Serie,0.264894,-0.027274,0.115584
Korrektheit,Model,0.309848,0.468458,0.662448


In [21]:
attr_weights_2 = {"icecat_6767": 2.0, "icecat_4860": 2.0}
attr_weights_3 = {"icecat_6767": 3.0, "icecat_4860": 3.0}
attr_weights_10 = {"icecat_6767": 10.0, "icecat_4860": 10.0}

pd.DataFrame({
    "equal weights": calc_metrics(attr_codes_all),
    "important x2": calc_metrics(attr_codes_all, attr_weights_2),
    "important x3": calc_metrics(attr_codes_all, attr_weights_3),
    "important x10": calc_metrics(attr_codes_all, attr_weights_10),
})

Unnamed: 0,Unnamed: 1,equal weights,important x2,important x3,important x10
Stabilität,Serie,0.993787,0.995604,0.901369,0.942934
Stabilität,Model,0.92426,0.98831,0.90716,0.790941
Qualität,Serie,0.414713,0.239804,0.24316,0.253096
Qualität,Model,0.443559,0.468773,0.494199,0.568388
Korrektheit,Serie,0.264894,0.158395,0.148688,-0.001515
Korrektheit,Model,0.309848,0.292728,0.294224,0.4432


Cluster all, but omit one attribute each time:

In [22]:
result = {}
for attr_code in attr_codes_all:
    attr_codes = list(filter(lambda a: a != attr_code, attr_codes_all))
    result[attr_code] = calc_metrics(attr_codes)
pd.DataFrame(result)

Unnamed: 0,Unnamed: 1,icecat_1464,icecat_15767,icecat_1649,icecat_1650,icecat_26241,icecat_27575_fixed,icecat_38673,icecat_4463_fixed,icecat_4860,icecat_6767,icecat_8006,icecat_8156,icecat_8411,icecat_8778,icecat_898_fixed,icecat_94,icecat_9689
Stabilität,Serie,1.0,0.894187,1.0,1.0,0.890116,0.993787,0.630519,0.690707,0.987573,0.927733,0.978594,0.909824,0.562409,0.562409,1.0,1.0,0.890116
Stabilität,Model,0.862803,0.89558,0.81695,0.855638,0.941563,0.96785,0.866601,0.942198,0.891153,0.825029,0.906269,0.873077,0.947047,0.942012,0.977562,0.860151,0.941563
Qualität,Serie,0.373717,0.355803,0.376151,0.374627,0.413998,0.438309,0.456978,0.435174,0.443915,0.445984,0.45308,0.391625,0.437618,0.446527,0.422325,0.375177,0.413998
Qualität,Model,0.40179,0.482165,0.399865,0.397194,0.443559,0.457958,0.469921,0.426572,0.454215,0.619742,0.458514,0.402143,0.424374,0.411898,0.647837,0.394161,0.443559
Korrektheit,Serie,0.235508,0.210575,0.235508,0.235508,0.264894,0.264894,0.227654,0.202216,0.264894,0.264894,0.264894,0.235508,0.202216,0.202216,0.362091,0.235508,0.264894
Korrektheit,Model,0.278312,0.254833,0.278312,0.282763,0.309848,0.292273,0.256989,0.289075,0.309848,0.219093,0.309848,0.282763,0.289075,0.290514,0.30036,0.282763,0.309848


In [23]:
attr_codes_omit = ["icecat_26241", "icecat_4860", "icecat_8006", "icecat_898_fixed", "icecat_9689"]

attr_codes_no_omit = list(filter(lambda a: a not in attr_codes_omit, attr_codes_all))

pd.DataFrame({
    "all attributes": calc_metrics(attr_codes_all),
    "without negatives": calc_metrics(attr_codes_no_omit),
    "without worst (multi)": calc_metrics(attr_codes_all_num+attr_codes_all_cat),
})

Unnamed: 0,Unnamed: 1,all attributes,without negatives,without worst (multi)
Stabilität,Serie,0.993787,0.982168,1.0
Stabilität,Model,0.92426,0.936811,0.977562
Qualität,Serie,0.414713,0.342837,0.422325
Qualität,Model,0.443559,0.704849,0.647837
Korrektheit,Serie,0.264894,0.062951,0.362091
Korrektheit,Model,0.309848,0.376272,0.30036


In [24]:
tmp = attr_df[["code", "labels", "type"]].copy()
tmp["label"] = tmp["labels"].map(lambda x: x["en_US"])
tmp[["code", "label", "type"]]

Unnamed: 0,code,label,type
39,icecat_1464,Height,AttributeType.METRIC
53,icecat_15767,Easy to apply,AttributeType.BOOL
60,icecat_1649,Width,AttributeType.METRIC
61,icecat_1650,Depth,AttributeType.METRIC
137,icecat_26241,Card pocket,AttributeType.BOOL
144,icecat_27575_fixed,Closure,AttributeType.SELECT_SINGLE
233,icecat_38673,Oleophobic coating,AttributeType.BOOL
339,icecat_4463_fixed,Package type,AttributeType.SELECT_SINGLE
349,icecat_4860,Desktop stand,AttributeType.BOOL
365,icecat_6767,Maximum screen size,AttributeType.METRIC


In [25]:
attr_codes_senseful = ["icecat_1464", "icecat_1649", "icecat_1650", "icecat_6767", "icecat_8156"]
attr_codes_maybe = ["icecat_8778", "icecat_898_fixed"]

pd.DataFrame({
    "all attributes": calc_metrics(attr_codes_all),
    "senseful": calc_metrics(attr_codes_senseful),
    "senseful+": calc_metrics(attr_codes_senseful+attr_codes_maybe),
})

Unnamed: 0,Unnamed: 1,all attributes,senseful,senseful+
Stabilität,Serie,0.993787,0.917193,0.974227
Stabilität,Model,0.92426,0.991316,0.814279
Qualität,Serie,0.414713,0.617481,0.327659
Qualität,Model,0.443559,0.81299,0.46148
Korrektheit,Serie,0.264894,0.125286,0.130481
Korrektheit,Model,0.309848,0.467167,0.259936
