# Adding the Products

In [1]:
import pandas as pd

from src import akeneo, config

In [2]:
data_dir = config.dir_data / "dataset"

In [3]:
client = akeneo.create_client_from_env()

## Categories

### Load Categories from File

In [4]:
categories_df = pd.read_csv(data_dir / "categories.csv")
categories_df

Unnamed: 0,parent,code,en,de
0,,master,Master,Master
1,master,samsung,Samsung,Samsung
2,samsung,s20_series,S20 Series,S20-Reihe
3,s20_series,s20,S20,S20
4,s20_series,s20_plus,S20+,S20+
5,s20_series,s20_ultra,S20 Ultra,S20 Ultra
6,s20_series,s20_fe,S20 FE,S20 FE
7,samsung,s21_series,S21 Series,S21-Reihe
8,s21_series,s21,S21,S21
9,s21_series,s21_plus,S21+,S21+


### Insert Categories into PIM

In [5]:
for _, parent, code, en, de in categories_df.itertuples():
    res = client.request(
        "pim_api_category_partial_update",
        {"code": code},
        {
            "parent": parent if pd.notna(parent) else None,
            "labels": {
                "en_US": en,
                "en_GB": en,
                "de_DE": de,
            },
        },
    )
    print(code, "=>", res)

master => {'status': 204, 'message': 'No Content'}
samsung => {'status': 204, 'message': 'No Content'}
s20_series => {'status': 204, 'message': 'No Content'}
s20 => {'status': 204, 'message': 'No Content'}
s20_plus => {'status': 204, 'message': 'No Content'}
s20_ultra => {'status': 204, 'message': 'No Content'}
s20_fe => {'status': 204, 'message': 'No Content'}
s21_series => {'status': 204, 'message': 'No Content'}
s21 => {'status': 204, 'message': 'No Content'}
s21_plus => {'status': 204, 'message': 'No Content'}
s21_ultra => {'status': 204, 'message': 'No Content'}
s21_fe => {'status': 204, 'message': 'No Content'}
s22_series => {'status': 204, 'message': 'No Content'}
s22 => {'status': 204, 'message': 'No Content'}
s22_plus => {'status': 204, 'message': 'No Content'}
s22_ultra => {'status': 204, 'message': 'No Content'}
duplicates => {'status': 204, 'message': 'No Content'}
dup_s20 => {'status': 204, 'message': 'No Content'}
dup_s20_ultra => {'status': 204, 'message': 'No Content'}


## Products

### Load Products from File

In [6]:
products_file_df = pd.read_csv(data_dir / "products.csv")
products_file_df

Unnamed: 0,family,brand,categories,url
0,mobile_phone_cases,Mobiparts,s20,https://icecat.biz/en/p/mobiparts/104889/mobil...
1,mobile_phone_cases,Mobiparts,s20,https://icecat.biz/en/p/mobiparts/mp-104927/mo...
2,mobile_phone_cases,Mobiparts,s20,https://icecat.biz/en/p/mobiparts/mp-104925/mo...
3,mobile_phone_cases,Mobiparts,s20,https://icecat.biz/en/p/mobiparts/mp-108422/mo...
4,mobile_phone_cases,Mobiparts,s20,https://icecat.biz/en/p/mobiparts/mp-104926/mo...
...,...,...,...,...
117,smartphones,Samsung,s21_fe,https://icecat.biz/de/p/samsung/sm-g990bzageud...
118,smartphones,Samsung,s22,https://icecat.biz/de/p/samsung/sm-s901bzadeue...
119,smartphones,Samsung,s22,https://icecat.biz/de/p/samsung/sm-s901bzkdeue...
120,smartphones,Samsung,s22_plus,https://icecat.biz/de/p/samsung/sm-s906bzgdeue...


In [7]:
products_file_df.describe()

Unnamed: 0,family,brand,categories,url
count,122,122,122,122
unique,2,4,17,122
top,mobile_phone_cases,Samsung,s20,https://icecat.biz/en/p/mobiparts/104889/mobil...
freq,80,75,21,1


### Get SKU and EAN from Icecat

In [8]:
def get_sku_and_ean(url: str):
    ean_parts = url.split("/")[7].split("-")
    ean = ""
    for part in ean_parts:
        if part.isnumeric():
            ean = part
            break

    return (
        url,
        url.split("/")[6].upper(),
        ean,
    )

print(get_sku_and_ean("https://icecat.biz/en/p/samsung/ef-pg980tbegeu/mobile+phone+cases-8806090267246-ef-pg980-77657600.html"))
print(get_sku_and_ean("https://icecat.biz/de/p/samsung/sm-s901bzadeue/galaxy-smartphones-8806094306170-sm-s901b-ds-97302928.html"))

('https://icecat.biz/en/p/samsung/ef-pg980tbegeu/mobile+phone+cases-8806090267246-ef-pg980-77657600.html', 'EF-PG980TBEGEU', '8806090267246')
('https://icecat.biz/de/p/samsung/sm-s901bzadeue/galaxy-smartphones-8806094306170-sm-s901b-ds-97302928.html', 'SM-S901BZADEUE', '8806094306170')


In [9]:
sku_and_ean_df = pd.DataFrame.from_records(products_file_df["url"].map(get_sku_and_ean), columns=["url", "sku", "ean"])

products_df = pd.merge(
    products_file_df,
    sku_and_ean_df,
    on="url"
)
products_df["categories"] = products_df["categories"].str.split(",")
products_df

Unnamed: 0,family,brand,categories,url,sku,ean
0,mobile_phone_cases,Mobiparts,[s20],https://icecat.biz/en/p/mobiparts/104889/mobil...,104889,8718066381026
1,mobile_phone_cases,Mobiparts,[s20],https://icecat.biz/en/p/mobiparts/mp-104927/mo...,MP-104927,8718066381170
2,mobile_phone_cases,Mobiparts,[s20],https://icecat.biz/en/p/mobiparts/mp-104925/mo...,MP-104925,8718066381156
3,mobile_phone_cases,Mobiparts,[s20],https://icecat.biz/en/p/mobiparts/mp-108422/mo...,MP-108422,8718066383501
4,mobile_phone_cases,Mobiparts,[s20],https://icecat.biz/en/p/mobiparts/mp-104926/mo...,MP-104926,8718066381163
...,...,...,...,...,...,...
117,smartphones,Samsung,[s21_fe],https://icecat.biz/de/p/samsung/sm-g990bzageud...,SM-G990BZAGEUD,
118,smartphones,Samsung,[s22],https://icecat.biz/de/p/samsung/sm-s901bzadeue...,SM-S901BZADEUE,8806094306170
119,smartphones,Samsung,[s22],https://icecat.biz/de/p/samsung/sm-s901bzkdeue...,SM-S901BZKDEUE,8806092878617
120,smartphones,Samsung,[s22_plus],https://icecat.biz/de/p/samsung/sm-s906bzgdeue...,SM-S906BZGDEUE,8806092980266


### Insert Products into PIM

In [10]:
responses = []
for _, family, brand, categories, url, sku, ean in products_df.itertuples():
    res = client.request(
        "pim_api_product_partial_update",
        {"code": sku},
        {
            "family": family,
            "categories": categories,
            "values": {
                "ean": [{"locale": None, "scope": None, "data": ean}],
                "icecat_brand": [
                    {"locale": "en_US", "scope": "default", "data": brand},
                    {"locale": "en_GB", "scope": "default", "data": brand},
                    {"locale": "de_DE", "scope": "default", "data": brand},
                ],
            },
        },
    )
    responses.append({"sku": sku, **res})

In [11]:
df = pd.DataFrame(responses)
df.head()

Unnamed: 0,sku,status,message
0,104889,201,Created
1,MP-104927,201,Created
2,MP-104925,201,Created
3,MP-108422,201,Created
4,MP-104926,201,Created


In [15]:
df[~df["status"].isin([201, 204])]

Unnamed: 0,sku,status,message


### Check for Not Imported Products

In [12]:
products_in_pim_df = pd.DataFrame(client.request("pim_api_product_list"))
products_df[
    ~products_df["sku"].isin(products_in_pim_df["identifier"])
]

Unnamed: 0,family,brand,categories,url,sku,ean
