# Check Import of Icecat Attributes

The icecat attributes were entered by hand into Akeneo. In this notebook we check, if that process went well.

In [1]:
import pandas as pd

from src import akeneo, config

## Getting Data

In [2]:
data_dir = config.dir_data / "dataset"

### Load Wanted Attributes

In [3]:
attributes_want_df = pd.read_csv(data_dir / "attributes-icecat.csv")
attributes_want_df["code"] = attributes_want_df["code"].map(lambda x: f"icecat_{x}")
attributes_want_df

Unnamed: 0,code,locale_en,group,type,mobile_phone_cases,smartphones
0,icecat_34,Contrast ratio (typical),display,contrast ratio,,optional
1,icecat_47,Processor model,processor,dropdown,,required
2,icecat_48,Digital zoom,camera,numerical,,optional
3,icecat_74,Optical zoom,camera,numerical,,optional
4,icecat_75,Platform,software,dropdown,,required
...,...,...,...,...,...,...
385,icecat_44158,RAM expansion (max),storage,numerical,,optional
386,icecat_44188,AI scene recognition,camera,y_n,,optional
387,icecat_44243,Fourth rear camera pixel size,camera,numerical,,optional
388,icecat_44244,Fourth rear camera field of view (FOV) angle,camera,numerical,,optional


### Get Attributes in PIM

In [4]:
client = akeneo.create_client_from_env()

In [5]:
attributes_got_all = client.request("pim_api_attribute_list")
attributes_got_all_df = pd.DataFrame(attributes_got_all)
attributes_got_all_df

Unnamed: 0,code,type,group,unique,useable_as_grid_filter,allowed_extensions,metric_family,default_metric_unit,reference_data_name,available_locales,...,date_max,max_file_size,minimum_input_length,sort_order,localizable,scopable,labels,auto_option_sorting,default_value,group_labels
0,ean,pim_catalog_text,basic,True,False,[],,,,[],...,,,,0,False,False,"{'en_US': 'EAN', 'en_GB': 'EAN', 'de_DE': 'EAN'}",,,"{'en_US': 'Basic', 'en_GB': 'Basic', 'de_DE': ..."
1,icecat_10035,pim_catalog_metric,display,False,False,[],Angle,RADIAN,,[],...,,,,10035,True,True,"{'de_DE': 'Swivel Winkel', 'en_GB': 'Swivel an...",,,"{'en_US': 'Display', 'en_GB': 'Display', 'de_D..."
2,icecat_10101,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10101,True,True,"{'de_DE': 'Gefaltete Tiefe', 'en_GB': 'Folded ...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
3,icecat_10102,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10102,True,True,"{'de_DE': 'Zusammengeklappte Höhe', 'en_GB': '...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
4,icecat_10103,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10103,True,True,"{'de_DE': 'Zusammengeklappte Breite', 'en_GB':...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,icecat_name,pim_catalog_text,basic,False,False,[],,,,[],...,,,,0,True,True,"{'en_US': 'Name', 'en_GB': 'Name', 'de_DE': 'N...",,,"{'en_US': 'Basic', 'en_GB': 'Basic', 'de_DE': ..."
431,icecat_summary,pim_catalog_textarea,basic,False,False,[],,,,[],...,,,,0,True,True,"{'en_US': 'Summary', 'en_GB': 'Summary', 'de_D...",,,"{'en_US': 'Basic', 'en_GB': 'Basic', 'de_DE': ..."
432,icecat_summary_short,pim_catalog_text,basic,False,False,[],,,,[],...,,,,0,True,True,"{'en_US': 'Short Summary', 'en_GB': 'Short Sum...",,,"{'en_US': 'Basic', 'en_GB': 'Basic', 'de_DE': ..."
433,icecat_title,pim_catalog_text,basic,False,False,[],,,,[],...,,,,0,True,True,"{'en_US': 'Title', 'en_GB': 'Title', 'de_DE': ...",,,"{'en_US': 'Basic', 'en_GB': 'Basic', 'de_DE': ..."


Remove all basic and fixed attributes

In [6]:
attributes_got_df = attributes_got_all_df[
    (attributes_got_all_df["group"] != "basic") &
    ~attributes_got_all_df["code"].str.contains("_fixed")
]
attributes_got_df

Unnamed: 0,code,type,group,unique,useable_as_grid_filter,allowed_extensions,metric_family,default_metric_unit,reference_data_name,available_locales,...,date_max,max_file_size,minimum_input_length,sort_order,localizable,scopable,labels,auto_option_sorting,default_value,group_labels
1,icecat_10035,pim_catalog_metric,display,False,False,[],Angle,RADIAN,,[],...,,,,10035,True,True,"{'de_DE': 'Swivel Winkel', 'en_GB': 'Swivel an...",,,"{'en_US': 'Display', 'en_GB': 'Display', 'de_D..."
2,icecat_10101,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10101,True,True,"{'de_DE': 'Gefaltete Tiefe', 'en_GB': 'Folded ...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
3,icecat_10102,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10102,True,True,"{'de_DE': 'Zusammengeklappte Höhe', 'en_GB': '...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
4,icecat_10103,pim_catalog_metric,weight_and_dimensions,False,False,[],Length,METER,,[],...,,,,10103,True,True,"{'de_DE': 'Zusammengeklappte Breite', 'en_GB':...",,,"{'en_US': 'Weight & dimensions', 'en_GB': 'Wei..."
5,icecat_10157,pim_catalog_boolean,network,False,False,[],,,,[],...,,,,10157,True,True,"{'de_DE': 'Nahfeldkommunikation (NFC)', 'en_GB...",,,"{'en_US': 'Network', 'en_GB': 'Network', 'de_D..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420,icecat_9732,pim_catalog_boolean,sensors,False,False,[],,,,[],...,,,,9732,True,True,"{'de_DE': 'Gyroskop', 'en_GB': 'Gyroscope', 'e...",,,"{'en_US': 'Sensors', 'en_GB': 'Sensors', 'de_D..."
421,icecat_9763,pim_catalog_boolean,network,False,False,[],,,,[],...,,,,9763,True,True,"{'de_DE': 'AirPlay', 'en_GB': 'AirPlay', 'en_U...",,,"{'en_US': 'Network', 'en_GB': 'Network', 'de_D..."
422,icecat_9779,pim_catalog_boolean,network,False,False,[],,,,[],...,,,,9779,True,True,"{'de_DE': 'Wi-Fi Direct', 'en_GB': 'Wi-Fi Dire...",,,"{'en_US': 'Network', 'en_GB': 'Network', 'de_D..."
423,icecat_9780,pim_catalog_simpleselect,faulty,False,False,[],,,,[],...,,,,9780,True,True,"{'de_DE': 'Unterstützte Google-Anwendungen', '...",,,"{'en_US': 'Faulty', 'en_GB': 'Faulty', 'de_DE'..."


## Checking Attributes

### Not Imported

Some attributes could not be imported into Akeneo for an unknown reason.

In [7]:
attributes_want_df[
    ~attributes_want_df["code"].isin(attributes_got_df["code"])
]

Unnamed: 0,code,locale_en,group,type,mobile_phone_cases,smartphones
2,icecat_48,Digital zoom,camera,numerical,,optional
3,icecat_74,Optical zoom,camera,numerical,,optional
221,icecat_20228,Quantity per pack,packaging_data,numerical,optional,
228,icecat_22659,Products per master (outer) case,logistics_data,numerical,optional,
233,icecat_22664,Products per shipping (inner) case,logistics_data,numerical,optional,optional
255,icecat_28170,Shipping (inner) cases per master (outer) case,logistics_data,numerical,,optional


### Unnecessary Imports 

In [8]:
attributes_got_df[
    ~attributes_got_df["code"].isin(attributes_want_df["code"])
]

Unnamed: 0,code,type,group,unique,useable_as_grid_filter,allowed_extensions,metric_family,default_metric_unit,reference_data_name,available_locales,...,date_max,max_file_size,minimum_input_length,sort_order,localizable,scopable,labels,auto_option_sorting,default_value,group_labels


### Check Type Mapping

The Icecat types and Akeneo PIM types differ. Here we take a look, how they are converted into each other.

In [9]:
tmp_got = attributes_got_df[["code", "type"]]
tmp_want = attributes_want_df[["code", "type"]].copy()

merged_df = pd.merge(tmp_got, tmp_want, how="outer", on="code", suffixes=["_akeneo", "_icecat"])
merged_df

Unnamed: 0,code,type_akeneo,type_icecat
0,icecat_10035,pim_catalog_metric,numerical
1,icecat_10101,pim_catalog_metric,numerical
2,icecat_10102,pim_catalog_metric,numerical
3,icecat_10103,pim_catalog_metric,numerical
4,icecat_10157,pim_catalog_boolean,y_n
...,...,...,...
385,icecat_74,,numerical
386,icecat_20228,,numerical
387,icecat_22659,,numerical
388,icecat_22664,,numerical


#### Check all type mappings

There are several mappings, that did not go well. They cannot be changed in Akeneo, because the Icecat Importer does not work properly then. So they have to be fixed afterwards.

In [10]:
cols = ["code", "type_akeneo", "type_icecat"]
merged_df[cols].groupby(cols[1:]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,code
type_akeneo,type_icecat,Unnamed: 2_level_1
pim_catalog_boolean,y_n,123
pim_catalog_metric,numerical,80
pim_catalog_number,numerical,27
pim_catalog_simpleselect,dropdown,59
pim_catalog_simpleselect,multi_dropdown,31
pim_catalog_text,2d,7
pim_catalog_text,3d,2
pim_catalog_text,alphanumeric,9
pim_catalog_text,contrast ratio,1
pim_catalog_text,dropdown,10


preparing attribute fixes:

In [16]:
attr_to_fix_df = merged_df[
    (merged_df["type_akeneo"] == "pim_catalog_simpleselect")
    & (merged_df["type_icecat"] == "multi_dropdown")
    | (merged_df["type_akeneo"] == "pim_catalog_text")
    & (merged_df["type_icecat"] == "dropdown")
    | (merged_df["type_akeneo"] == "pim_catalog_text")
    & (merged_df["type_icecat"] == "numerical")
].sort_values("code")

map_icecate_to_akeneo_type = {
    "multi_dropdown": "pim_catalog_multiselect",
    "dropdown": "pim_catalog_simpleselect",
    "numerical": "pim_catalog_number",
}

attr_to_fix_df["target_type"] = attr_to_fix_df["type_icecat"].map(map_icecate_to_akeneo_type)

attr_to_fix_df

Unnamed: 0,code,type_akeneo,type_icecat,target_type
5,icecat_1024,pim_catalog_text,dropdown,pim_catalog_simpleselect
20,icecat_12435,pim_catalog_text,numerical,pim_catalog_number
21,icecat_12437,pim_catalog_text,numerical,pim_catalog_number
25,icecat_13246,pim_catalog_text,numerical,pim_catalog_number
26,icecat_13248,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
...,...,...,...,...
342,icecat_8072,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
346,icecat_8367,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
358,icecat_8745,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
363,icecat_898,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect


In [17]:
attr_to_fix_df.to_csv(data_dir / "attributes-fixes.csv", index=False)

Create attributes for fixing:

In [23]:
responses = []

for _, attr_code, target_type in attr_to_fix_df[["code", "target_type"]].itertuples():
    attr = client.request("pim_api_attribute_get", {"code": attr_code})
    del attr["code"]
    del attr["group_labels"]

    if target_type == "pim_catalog_number":
        attr["decimals_allowed"] = True
        attr["negative_allowed"] = True

    attr_code_fix = f"{attr_code}_fixed"
    res = client.request(
        "pim_api_attribute_partial_update",
        {"code": attr_code_fix},
        {**attr, "type": target_type},
    )
    responses.append({"attr": attr_code_fix, **res})

In [25]:
df = pd.DataFrame(responses)
df

Unnamed: 0,attr,status,message
0,icecat_1024_fixed,204,No Content
1,icecat_12435_fixed,201,Created
2,icecat_12437_fixed,201,Created
3,icecat_13246_fixed,201,Created
4,icecat_13248_fixed,204,No Content
...,...,...,...
58,icecat_8072_fixed,204,No Content
59,icecat_8367_fixed,204,No Content
60,icecat_8745_fixed,204,No Content
61,icecat_898_fixed,204,No Content


### Check Locales

In [None]:
attr = client.request("pim_api_attribute_list")
attr_df = pd.DataFrame(attr)

tmp = []

for _, code, labels in attr_df[["code", "labels"]].itertuples():
    tmp.append({"code": code, **labels})

locales_df = pd.DataFrame(tmp)

Check for missing locales:

In [29]:
locales_missing_df = locales_df[
    (locales_df["en_US"] == "") |
    (locales_df["en_GB"] == "") |
    (locales_df["de_DE"] == "")
]
locales_missing_df

Unnamed: 0,code,en_US,en_GB,de_DE
