# Fixing Attributes and Product Values



In [9]:
import pandas as pd

from src import akeneo, config

## Loading Resources

In [10]:
fixes_df = pd.read_csv(config.dir_data / "dataset" / "attributes-fixes.csv")
fixes_df

Unnamed: 0,code,type_akeneo,type_icecat,target_type
0,icecat_1024,pim_catalog_text,dropdown,pim_catalog_simpleselect
1,icecat_12435,pim_catalog_text,numerical,pim_catalog_number
2,icecat_12437,pim_catalog_text,numerical,pim_catalog_number
3,icecat_13246,pim_catalog_text,numerical,pim_catalog_number
4,icecat_13248,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
...,...,...,...,...
58,icecat_8072,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
59,icecat_8367,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
60,icecat_8745,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect
61,icecat_898,pim_catalog_simpleselect,multi_dropdown,pim_catalog_multiselect


In [None]:
client = akeneo.create_client_from_env()

## Fixing Values

In [15]:
def extract_locales(values: list[dict]) -> dict:
    result = {}
    for value in values:
        locale = value["locale"]
        data = value["data"]
        if locale not in result:
            result[locale] = data
    return result

In [51]:
def to_code(label: str) -> str:
    return (
        label.lower()
        .replace("(", "")
        .replace(")", "")
        .replace("-", "_")
        .replace(".", "_")
        .replace("@", "_at_")
        .replace("+", "plus")
        .strip()
        .replace(" ", "_")
    )

### Fix Numbers

In [60]:
def fix_number(attr_code: str) -> list[str]:
    logs = []

    attr_code_fix = f"{attr_code}_fixed"
    products = client.request("pim_api_product_list", params={"attributes": attr_code})
    for product in products:
        if attr_code not in product["values"]: continue

        prod_id = product["identifier"]
        values = product["values"][attr_code]

        locales = extract_locales(values)
        num = float(locales["en_US"].strip())
        
        for i in range(len(values)):
            values[i]["data"] = num
            
        res = client.request(
            "pim_api_product_partial_update",
            {"code": prod_id},
            {"values": {attr_code_fix: values}},
        )
        logs.append(f"update product {prod_id} => {res}")

    return logs

In [61]:
logs_numbers = []

numbers = fixes_df[fixes_df["target_type"] == "pim_catalog_number"]
for attr_code in numbers["code"]:
    log = fix_number(attr_code)
    logs_numbers.append(log)

### Fix Single Selects

In [52]:
def fix_single_select(attr_code: str) -> list[str]:
    logs = []

    options: set[str] = set()
    attr_code_fix = f"{attr_code}_fixed"

    products = client.request("pim_api_product_list", params={"attributes": attr_code})
    for product in products:
        if attr_code not in product["values"]: continue

        prod_id = product["identifier"]
        values = product["values"][attr_code]

        locales = extract_locales(values)
        opt_code = to_code(locales["en_US"])

        if opt_code not in options:
            res = client.request(
                "pim_api_attribute_option_partial_update",
                {"attributeCode": attr_code_fix, "code": opt_code},
                {"labels": locales},
            )
            logs.append(f"Add {opt_code} to {attr_code_fix} => {res}")
            options.add(opt_code)

        for i in range(len(values)):
            values[i]["data"] = opt_code
        res = client.request(
            "pim_api_product_partial_update",
            {"code": prod_id},
            {"values": {attr_code_fix: values}},
        )
        logs.append(f"update product {prod_id} => {res}")

    return logs

In [21]:
logs_single_selects = []

simple_selects = fixes_df[fixes_df["target_type"] == "pim_catalog_simpleselect"]
for attr_code in simple_selects["code"]:
    log = fix_single_select(attr_code)
    logs_single_selects.append(log)

### Fix Multi Selects

In [53]:
def extract_multi_options(attr_code: str):
    map_options: dict[str, list[str]] = {}
    options: dict[str, dict] = {}

    attr_opts = client.request(
        "pim_api_attribute_option_list", {"attributeCode": attr_code}
    )
    for attr_opt in attr_opts:
        code = attr_opt["code"]
        opts = list(map(to_code, (
            attr_opt["labels"]["en_US"].split(",")
        )))
        ens = attr_opt["labels"]["en_US"].split(",")
        des = attr_opt["labels"]["de_DE"].split(",")
        for i in range(len(opts)):
            if opts[i] not in options:
                options[opts[i]] = {
                    "labels": {
                        "en_US": ens[i].strip(),
                        "en_GB": ens[i].strip(),
                        "de_DE": des[i].strip(),
                    }
                }
        map_options[code] = opts

    return map_options, options

extract_multi_options("icecat_898")

({'348497133': ['silicone'],
  '348497265': ['silicone'],
  '348497633': ['polyethylene_terephthalate_pet'],
  '348497805': ['leather'],
  '348497814': ['leather'],
  '348832151': ['tempered_glass', 'thermoplastic_polyurethane_tpu'],
  '348832844': ['tempered_glass', 'thermoplastic_polyurethane_tpu'],
  '367992077': ['silicone', 'thermoplastic_polyurethane_tpu'],
  '367992090': ['silicone', 'thermoplastic_polyurethane_tpu'],
  '409367243': ['rubber'],
  '409368369': ['rubber'],
  '409601264': ['microfiber', 'silicone'],
  '409601297': ['microfibre', 'silicone'],
  '428874343': ['thermoplastic_polyurethane_tpu', 'silicone'],
  '428874357': ['thermoplastic_polyurethane_tpu', 'silicone'],
  '428875574': ['polycarbonate_pc'],
  '442880730': ['polycarbonate_pc', 'thermoplastic_polyurethane_tpu'],
  '442880733': ['polycarbonate_pc', 'thermoplastic_polyurethane_tpu'],
  '460421316': ['thermoplastic_polyurethane_tpu'],
  '460421840': ['microfiber', 'silicone'],
  'leather': ['leather'],
  'mic

In [54]:
def add_multi_options(attr_code: str, options: dict[str, dict]) -> list[str]:
    logs = []
    for opt_code, body in options.items():
        res = client.request(
            "pim_api_attribute_option_partial_update",
            {"attributeCode": attr_code, "code": opt_code},
            body,
        )
        logs.append(f"Add {opt_code} to {attr_code} => {res}")
    return logs

In [55]:
def add_multi_options_to_products(attr_code: str, map_options: dict[str, list[str]]) -> list[str]:
    logs = []
    products = client.request("pim_api_product_list", params={"attributes": attr_code})
    for product in products:
        prod_id = product["identifier"]
        if attr_code in product["values"]:
            values = product["values"][attr_code]
            for i in range(len(values)):
                values[i]["data"] = map_options[values[i]["data"]]
            res = client.request(
                "pim_api_product_partial_update",
                {"code": prod_id},
                {"values": {f"{attr_code}_fixed": values}},
            )
            logs.append(f"update product {prod_id} => {res}")
    return logs

In [56]:
def fix_multi_select(attr_code: str) -> list[str]:
    logs = []
    
    mapping, options = extract_multi_options(attr_code)
    logs.append(f"mapping: {mapping}, options: {options}")

    logs += add_multi_options(f"{attr_code}_fixed", options)
    logs += add_multi_options_to_products(attr_code, mapping)

    return logs

In [57]:
logs_multi_selects = []

multi_selects = fixes_df[fixes_df["target_type"] == "pim_catalog_multiselect"]
for attr_code in multi_selects["code"]:
    log = fix_multi_select(attr_code)
    logs_multi_selects.append(log)