# Importación de librerias y archivos .csv

In [4873]:
import pandas as pd
import numpy as np

In [4874]:
df_mercadona = pd.read_csv('/content/limpio_mercadona.csv')
df_mercadona.head(2)

Unnamed: 0,id,name,description,price,reference_price,reference_unit,subcategory_2_nivel_id,subcategory_2_nivel_name,category_id,category_name,subcategory_id,subcategory_name,price_corrected
0,4241.0,"Aceite de oliva 0,4º Hacendado",Garrafa 5.0 l,19.95,3.99,L,420,Aceite de oliva,12,"Aceite, especias y salsas",112,"Aceite, vinagre y sal",False
1,4240.0,"Aceite de oliva 0,4º Hacendado",Botella 1.0 l,4.45,4.45,L,420,Aceite de oliva,12,"Aceite, especias y salsas",112,"Aceite, vinagre y sal",False


In [4875]:
df_mercadona.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        4803 non-null   float64
 1   name                      4803 non-null   object 
 2   description               4803 non-null   object 
 3   price                     4803 non-null   float64
 4   reference_price           4803 non-null   float64
 5   reference_unit            4803 non-null   object 
 6   subcategory_2_nivel_id    4803 non-null   int64  
 7   subcategory_2_nivel_name  4803 non-null   object 
 8   category_id               4803 non-null   int64  
 9   category_name             4803 non-null   object 
 10  subcategory_id            4803 non-null   int64  
 11  subcategory_name          4803 non-null   object 
 12  price_corrected           4803 non-null   bool   
dtypes: bool(1), float64(3), int64(3), object(6)
memory usage: 455.1

In [4876]:
df_datamarket = pd.read_csv('/content/limpio_datamarket.csv')
df_datamarket.head(2)

Unnamed: 0,id,supermarket,category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected
0,25869112,mercadona.es,huevos_leche_y_mantequilla|mantequilla_y_margarina,Margarina Flora Original,Tarrina,otras marcas,False,3.45,8.625,kg,2023-03-15,False,False
1,25855500,carrefour.es,productos_frescos|platos_preparados|fritos,Nuggets de pechuga de pollo 250 g,,otras marcas,False,3.1,12.4,kg,2023-03-15,False,False


In [4877]:
df_datamarket.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5000 non-null   int64  
 1   supermarket                5000 non-null   object 
 2   category                   5000 non-null   object 
 3   name                       4999 non-null   object 
 4   description                1832 non-null   object 
 5   trademark                  4921 non-null   object 
 6   trademark_propietary_flag  4921 non-null   object 
 7   price                      5000 non-null   float64
 8   reference_price            5000 non-null   float64
 9   reference_unit             5000 non-null   object 
 10  insert_date                5000 non-null   object 
 11  price_corrected            5000 non-null   bool   
 12  reference_price_corrected  5000 non-null   bool   
dtypes: bool(2), float64(2), int64(1), object(8)
memo

# Creación del diccionario de categorias

En esta etapa preparamos el dataframe df_mercadona para construir un diccionario de categorías. Eliminamos las columnas con identificadores ('id', 'subcategory_2_nivel_id', 'subcategory_id', 'category_id'), ya que no son necesarias para el emparejamiento con otros supermercados y podrían causar confusión durante el análisis conjunto. Nuestro diccionario se basará exclusivamente en los campos descriptivos que contienen los nombres de las categorías y subcategorías, ya que son los que utilizaremos para realizar la unificación de estructuras.

In [4878]:
df_mercadona = df_mercadona.drop(columns=['id', 'subcategory_2_nivel_id', 'subcategory_id', 'category_id'])
df_mercadona.head()

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected
0,"Aceite de oliva 0,4º Hacendado",Garrafa 5.0 l,19.95,3.99,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False
1,"Aceite de oliva 0,4º Hacendado",Botella 1.0 l,4.45,4.45,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False
2,Aceite de oliva virgen extra Hacendado,Garrafa 3.0 l,15.85,5.284,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False
3,Aceite de oliva virgen extra Hacendado,Botella 1.0 l,5.55,5.55,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False
4,Aceite de oliva virgen extra Hacendado Gran Selección,Botella 0.75 l,6.55,8.734,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False


Creamos el diccionario de categorias basado en sistema de categorias de Mercadona

In [4879]:
df_category = df_mercadona[['category_name', 'subcategory_name', 'subcategory_2_nivel_name']].drop_duplicates().reset_index(drop=True)
df_category['category_id'] = df_category.index + 1
df_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
0,"Aceite, especias y salsas","Aceite, vinagre y sal",Aceite de oliva,1
1,"Aceite, especias y salsas","Aceite, vinagre y sal","Aceite de girasol, semillas y maíz",2
2,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos,3
3,"Aceite, especias y salsas","Aceite, vinagre y sal",Sal y bicarbonato,4
4,"Aceite, especias y salsas",Especias,Hierbas,5
...,...,...,...,...
438,Zumos,Fruta variada,Smoothie,439
439,Zumos,Melocotón y piña,Melocotón,440
440,Zumos,Melocotón y piña,Piña,441
441,Zumos,Naranja,Naranja,442


In [4880]:
df_category.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 443 entries, 0 to 442
Data columns (total 4 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   category_name             443 non-null    object
 1   subcategory_name          443 non-null    object
 2   subcategory_2_nivel_name  443 non-null    object
 3   category_id               443 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 14.0+ KB


In [4881]:
df_category.to_csv('category.csv', index=False, encoding='utf-8-sig')

Realizamos una unión con el diccionario df_category para añadir el identificador único category_id a cada producto de Mercadona.

In [4882]:
df_mercadona = df_mercadona.merge(df_category, on=['category_name', 'subcategory_name', 'subcategory_2_nivel_name'], how='left')
df_mercadona.head()

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
0,"Aceite de oliva 0,4º Hacendado",Garrafa 5.0 l,19.95,3.99,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False,1
1,"Aceite de oliva 0,4º Hacendado",Botella 1.0 l,4.45,4.45,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False,1
2,Aceite de oliva virgen extra Hacendado,Garrafa 3.0 l,15.85,5.284,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False,1
3,Aceite de oliva virgen extra Hacendado,Botella 1.0 l,5.55,5.55,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False,1
4,Aceite de oliva virgen extra Hacendado Gran Selección,Botella 0.75 l,6.55,8.734,L,Aceite de oliva,"Aceite, especias y salsas","Aceite, vinagre y sal",False,1


Antes de realizar la correspondencia con la estructura de Mercadona, exploramos el sistema de categorías utilizado por Datamarket

In [4883]:
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', None)

In [4884]:
df_datamarket['category'].drop_duplicates()


Unnamed: 0,category
0,huevos_leche_y_mantequilla|mantequilla_y_margarina
1,productos_frescos|platos_preparados|fritos
2,conservas_caldos_y_cremas|berberechos_y_mejillones
3,despensa|conservas|conservas_vegetales
4,despensa|desayunos_y_dulces|caramelos_chicles_y_golosinas
...,...
4867,limpieza_y_hogar|bazar|automovil
4869,productos_frescos|charcuteria|tablas_y_surtidos
4871,cuidado_personal|cuidado_corporal|crema_solar
4931,parafarmacia|botiquin|geles_hidroalcoholicos


La columna 'category' contiene las categorías completas en un solo string, separadas por el símbolo |. Para trabajar con esta información de forma más ordenada y hacer el mapeo con el sistema de categorías de Mercadona, vamos a desglosar esta columna en tres niveles: categoría, subcategoría y sub-subcategoría. Esto hará que sea más fácil agrupar, comparar y analizar.

In [4885]:
# Dividimos la columna 'category' en tres columnas separadas
df_categorias_datamarket = df_datamarket['category'].drop_duplicates().str.split('|', expand=True)
df_categorias_datamarket.columns = ['category', 'subcategory', 'subsubcategory']

In [4886]:
df_categorias_datamarket.head(10)

Unnamed: 0,category,subcategory,subsubcategory
0,huevos_leche_y_mantequilla,mantequilla_y_margarina,
1,productos_frescos,platos_preparados,fritos
2,conservas_caldos_y_cremas,berberechos_y_mejillones,
3,despensa,conservas,conservas_vegetales
4,despensa,desayunos_y_dulces,caramelos_chicles_y_golosinas
5,bebidas,licores_y_cremas,pacharan
6,charcuteria_y_quesos,queso_lonchas_rallado_y_en_porciones,
7,cuidado_del_hogar,utensilios_de_limpieza,
8,zumos,melocoton_y_pina,
9,despensa,salsas,mayonesa_y_otras_salsas


In [4887]:
# Agrupamos por categoría y vamos a ver qué subcategorías existen para cada una
agrupados = df_categorias_datamarket.groupby('category')['subcategory'].unique().reset_index()
agrupados

Unnamed: 0,category,subcategory
0,aceite_especias_y_salsas,"[otras_salsas, especias, aceite_vinagre_y_sal, mayonesa_ketchup_y_mostaza]"
1,agua_y_refrescos,"[agua, refresco_de_te_y_sin_gas, tonica_y_bitter, isotonico_y_energetico, refresco_de_naranja_y_de_limon, refresco_de_cola]"
2,aperitivos,"[aceitunas_y_encurtidos, frutos_secos_y_fruta_desecada, patatas_fritas_y_snacks]"
3,arroz_legumbres_y_pasta,"[pasta_y_fideos, legumbres, arroz]"
4,azucar_caramelos_y_chocolate,"[chocolate, golosinas, azucar_y_edulcorante, chicles_y_caramelos, mermelada_y_miel]"
5,bebe,"[panales, toallitas_y_panales, leche_para_bebes, cuidado_del_bebe, alimentacion_infantil, higiene_y_cuidado, papillas, biberon_chupete_y_menaje, potitos_y_tarritos, bebidas_galletas_y_yogures, toallitas]"
6,bebidas,"[licores_y_cremas, alcoholes, aguas_y_zumos, refrescos, aguas, vinos, cervezas, zumos, sidra, bebidas_isotonicas_y_energeticas, cerveza, cava_y_champagne, batidos_y_horchata]"
7,bodega,"[cerveza, vino, alcoholes, sidra_y_cava, vino_tinto, espumosos, vino_blanco, licores, tinto_de_verano_y_sangria, vino_rosado, cerveza_sin_alcohol, vinos_de_mesa_sangrias_y_tintos_de_verano, vino_lambrusco_y_espumoso]"
8,cacao_cafe_e_infusiones,"[te_e_infusiones, cafe_capsula_y_monodosis, cafe_molido_y_en_grano, cafe_soluble_y_otras_bebidas, cacao_soluble_y_chocolate_a_la_taza]"
9,carne,"[cerdo, aves_y_pollo, vacuno, hamburguesas_y_picadas, arreglos, carne_congelada, embutido, empanados_y_elaborados, conejo_y_cordero]"


Esta clasificación presenta agrupaciones lógicas. Para integrarlo con la estructura de Mercadona, será necesario realizar un trabajo de mapeo que permita unificar ambas taxonomías.

# Procesamiento de la categoría "aceite_especias_y_salsas"

Analizamos la categoría de Datamarket "aceite_especias_y_salsas" para identificar sus subcategorías. Observamos subcategorías como otras_salsas, especias, aceite_vinagre_y_sal, mayonesa_ketchup_y_mostaza. Posteriormente, extraemos todas las subcategorías existentes en el diccionario de Mercadona bajo la categoría "Aceite, especias y salsas". Estas subcategorías servirán como base para realizar el mapeo de los productos de Datamarket al sistema de categorías de Mercadona.

In [4888]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'aceite_especias_y_salsas']

Unnamed: 0,category,subcategory,subsubcategory
83,aceite_especias_y_salsas,otras_salsas,
92,aceite_especias_y_salsas,especias,
456,aceite_especias_y_salsas,aceite_vinagre_y_sal,
1090,aceite_especias_y_salsas,mayonesa_ketchup_y_mostaza,


In [4889]:
current_category = df_category[df_category["category_name"] == "Aceite, especias y salsas"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
0,"Aceite, especias y salsas","Aceite, vinagre y sal",Aceite de oliva,1
1,"Aceite, especias y salsas","Aceite, vinagre y sal","Aceite de girasol, semillas y maíz",2
2,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos,3
3,"Aceite, especias y salsas","Aceite, vinagre y sal",Sal y bicarbonato,4
4,"Aceite, especias y salsas",Especias,Hierbas,5
5,"Aceite, especias y salsas",Especias,Colorante y pimentón,6
6,"Aceite, especias y salsas",Especias,Pimienta,7
7,"Aceite, especias y salsas",Especias,Otras especias,8
8,"Aceite, especias y salsas",Especias,Sazonadores,9
9,"Aceite, especias y salsas","Mayonesa, ketchup y mostaza",Mayonesa,10


In [4890]:
# Creamos tres columnas con valores nulos para preparar la nueva categorización
df_datamarket['category_name'] = pd.NA
df_datamarket['subcategory_name'] = pd.NA
df_datamarket['subcategory_2_nivel_name'] = pd.NA

In [4891]:
df_datamarket.rename(columns={'category': 'brand_category'}, inplace=True)

In [4892]:
df_datamarket.columns

Index(['id', 'supermarket', 'brand_category', 'name', 'description', 'trademark', 'trademark_propietary_flag', 'price', 'reference_price', 'reference_unit', 'insert_date', 'price_corrected', 'reference_price_corrected', 'category_name', 'subcategory_name', 'subcategory_2_nivel_name'], dtype='object')

In [4893]:
df_datamarket[df_datamarket['brand_category'] == 'aceite_especias_y_salsas|aceite_vinagre_y_sal']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
456,25865372,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Limón exprimido Hacendado,Botella,hacendado,True,0.95,3.393,l,2023-03-15,False,False,,,
1186,25865366,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,"Aceite de girasol refinado 0,2º Hacendado",Garrafa,hacendado,True,8.95,1.79,l,2023-03-15,False,False,,,
1517,25865373,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Crema de vinagre balsámico de Módena Hacendado,Botella,hacendado,True,1.8,7.2,kg,2023-03-15,False,False,,,
1641,25865371,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Vinagre de manzana Hacendado,Botella,hacendado,True,0.82,0.82,l,2023-03-15,False,False,,,
1877,25865374,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Vinagre de Jerez reserva Hacendado,Botella,hacendado,True,1.8,7.2,l,2023-03-15,False,False,,,
2245,25865367,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,"Aceite de girasol refinado 0,2º Hacendado",Botella,hacendado,True,1.95,1.95,l,2023-03-15,False,False,,,
2585,25865378,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Crema de vinagre balsámico de manzana Hacendado,Botella,hacendado,True,2.7,10.8,kg,2023-03-15,False,False,,,
3650,25865382,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,Sal gruesa Hacendado,Paquete,hacendado,True,0.3,0.3,kg,2023-03-15,False,False,,,
4059,25865356,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,"Aceite de oliva 0,4º Hacendado",Botella,hacendado,True,4.77,4.77,l,2023-03-15,False,False,,,
4129,25865377,mercadona.es,aceite_especias_y_salsas|aceite_vinagre_y_sal,"Aceite de oliva, vinagre y sal Merry",Paquete,otras marcas,False,2.0,20.0,l,2023-03-15,False,False,,,


Creamos una función que va a clasificar los productos de la categoría 'aceite_especias_y_salsas|aceite_vinagre_y_sal' en niveles jerárquicos según su nombre. Por ejemplo, si el nombre contiene la palabra 'oliva', se considera como aceite de oliva. La función devuelve una tupla con category_name,
subcategory_name y subcategory_2_nivel_name.

In [4894]:
def clasificar_aceites(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'aceite_especias_y_salsas|aceite_vinagre_y_sal':
        if 'oliva' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de oliva')
        elif 'girasol' in name or 'semillas' in name or 'maíz' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de girasol, semillas y maíz')
        elif 'vinagre' in name or 'limón' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Vinagre y otros aderezos')
        elif 'sal' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Sal y bicarbonato')

    return (pd.NA, pd.NA, pd.NA)


Creamos una función datamarket_update que permite aplicar la clasificación a los productos de una subcategoría específica del DataFrame df_datamarket. Esta función crea una máscara para filtrar solo las filas correspondientes a la subcategoría indicada, genera una copia temporal del DataFrame (df_temp) y le aplica la función de clasificación clasificar_category fila por fila (axis=1).
Finalmente, se actualiza el DataFrame original (df_datamarket) con las nuevas columnas generadas.

In [4895]:
def datamarket_update (category, clasificar_category):
    mask = df_datamarket['brand_category'] == category
    df_temp = df_datamarket.loc[mask].copy()
    df_temp[['category_name', 'subcategory_name', 'subcategory_2_nivel_name']] = df_temp.apply(clasificar_category, axis=1, result_type='expand')
    df_datamarket.update(df_temp)
    df_datamarket[df_datamarket['brand_category'] == category].info()

In [4896]:
datamarket_update('aceite_especias_y_salsas|aceite_vinagre_y_sal', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 456 to 4948
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [4897]:
# Verificación de los resultados
df_datamarket[df_datamarket['brand_category'] == 'aceite_especias_y_salsas|aceite_vinagre_y_sal'][['name', 'category_name', 'subcategory_name', 'subcategory_2_nivel_name']]

Unnamed: 0,name,category_name,subcategory_name,subcategory_2_nivel_name
456,Limón exprimido Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos
1186,"Aceite de girasol refinado 0,2º Hacendado","Aceite, especias y salsas","Aceite, vinagre y sal","Aceite de girasol, semillas y maíz"
1517,Crema de vinagre balsámico de Módena Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos
1641,Vinagre de manzana Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos
1877,Vinagre de Jerez reserva Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos
2245,"Aceite de girasol refinado 0,2º Hacendado","Aceite, especias y salsas","Aceite, vinagre y sal","Aceite de girasol, semillas y maíz"
2585,Crema de vinagre balsámico de manzana Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos
3650,Sal gruesa Hacendado,"Aceite, especias y salsas","Aceite, vinagre y sal",Sal y bicarbonato
4059,"Aceite de oliva 0,4º Hacendado","Aceite, especias y salsas","Aceite, vinagre y sal",Aceite de oliva
4129,"Aceite de oliva, vinagre y sal Merry","Aceite, especias y salsas","Aceite, vinagre y sal",Aceite de oliva


Podemos ver que ya 12 productos ya han sido clasificados y vinculados con la nueva estructura jerárquica de categorías de Mercadona. Esto se refleja en las columnas category_name, subcategory_name y subcategory_2_nivel_name, que contienen 12 valores no nulos cada una.

Seguimos en la misma línea con el resto de categorías del catálogo de Datamarket.

In [4898]:
def clasificar_salsas(row):
    subcat = row['brand_category']
    name = row['name'].lower()

    if subcat == 'aceite_especias_y_salsas|mayonesa_ketchup_y_mostaza' or 'aceite_especias_y_salsas|otras_salsas':
        if 'mayonesa' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mayonesa')
        elif 'ketchup' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Ketchup')
        elif 'mostaza' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mostaza')
        elif 'allioli' in name or 'ali-oli' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Allioli')
        elif 'soja' in name or 'teriyaki' in name or 'agridulce' in name or 'chili' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas orientales')
        elif 'barbacoa' in name or 'piri piri' in name or 'burger' in name or 'curry' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para carnes')
        elif 'fresca' in name or 'pesto' in name or 'boloñesa' in name or 'carbonara' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para pasta')
        elif 'tomate frito' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Tomate frito')
        else:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Otras salsas')

    if subcat == 'aceite_especias_y_salsas|especias':
        if 'sal' in name or 'bicarbonato' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Sal y bicarbonato')
        elif 'pimienta' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Pimienta')
        elif 'pimentón' in name or 'colorante' in name or 'azafrán' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Colorante y pimentón')
        elif 'sazonador' in name or 'mezcla' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Sazonadores')
        elif any(x in name for x in ['orégano', 'perejil', 'romero', 'laurel', 'tomillo', 'cilantro', 'eneldo', 'hierbas']):
            return ('Aceite, especias y salsas', 'Especias', 'Hierbas')
        else:
            return ('Aceite, especias y salsas', 'Especias', 'Otras especias')


    return (np.nan, np.nan, np.nan)

In [4899]:
datamarket_update('aceite_especias_y_salsas|mayonesa_ketchup_y_mostaza', clasificar_salsas)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1090 to 4681
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

In [4900]:
datamarket_update('aceite_especias_y_salsas|otras_salsas', clasificar_salsas)

<class 'pandas.core.frame.DataFrame'>
Index: 21 entries, 83 to 4758
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         21 non-null     int64  
 1   supermarket                21 non-null     object 
 2   brand_category             21 non-null     object 
 3   name                       21 non-null     object 
 4   description                21 non-null     object 
 5   trademark                  21 non-null     object 
 6   trademark_propietary_flag  21 non-null     object 
 7   price                      21 non-null     float64
 8   reference_price            21 non-null     float64
 9   reference_unit             21 non-null     object 
 10  insert_date                21 non-null     object 
 11  price_corrected            21 non-null     bool   
 12  reference_price_corrected  21 non-null     bool   
 13  category_name              21 non-null     object 
 14

In [4901]:
datamarket_update('aceite_especias_y_salsas|especias', clasificar_salsas)

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 92 to 4991
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         30 non-null     int64  
 1   supermarket                30 non-null     object 
 2   brand_category             30 non-null     object 
 3   name                       30 non-null     object 
 4   description                30 non-null     object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      30 non-null     float64
 8   reference_price            30 non-null     float64
 9   reference_unit             30 non-null     object 
 10  insert_date                30 non-null     object 
 11  price_corrected            30 non-null     bool   
 12  reference_price_corrected  30 non-null     bool   
 13  category_name              30 non-null     object 
 14

Vamos a ver todas las categorias de Mercadona

In [4902]:
df_category['category_name'].unique()

array(['Aceite, especias y salsas', 'Agua y refrescos', 'Aperitivos',
       'Arroz, legumbres y pasta', 'Azúcar, caramelos y chocolate',
       'Bebé', 'Bodega', 'Cacao, café e infusiones', 'Carne',
       'Cereales y galletas', 'Charcutería y quesos', 'Congelados',
       'Conservas, caldos y cremas', 'Cuidado del cabello',
       'Cuidado facial y corporal', 'Fitoterapia y parafarmacia',
       'Fruta y verdura', 'Huevos, leche y mantequilla',
       'Limpieza y hogar', 'Maquillaje', 'Marisco y pescado', 'Mascotas',
       'Panadería y pastelería', 'Pizzas y platos preparados',
       'Postres y yogures', 'Zumos'], dtype=object)

# Procesamiento de la categoría "agua_y_refrescos"

Vamos a procesar la proxima categoría

In [4903]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'agua_y_refrescos']

Unnamed: 0,category,subcategory,subsubcategory
86,agua_y_refrescos,agua,
182,agua_y_refrescos,refresco_de_te_y_sin_gas,
265,agua_y_refrescos,tonica_y_bitter,
334,agua_y_refrescos,isotonico_y_energetico,
358,agua_y_refrescos,refresco_de_naranja_y_de_limon,
720,agua_y_refrescos,refresco_de_cola,


In [4904]:
current_category = df_category[df_category["category_name"] == "Agua y refrescos"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
18,Agua y refrescos,Agua,Agua sin gas,19
19,Agua y refrescos,Agua,Agua con gas,20
20,Agua y refrescos,Agua,Gaseosa,21
21,Agua y refrescos,Isotónico y energético,Isotónico,22
22,Agua y refrescos,Isotónico y energético,Energético,23
23,Agua y refrescos,Refresco de cola,Cola clásica,24
24,Agua y refrescos,Refresco de cola,Cola zero,25
25,Agua y refrescos,Refresco de cola,Cola sin cafeína,26
26,Agua y refrescos,Refresco de naranja y de limón,Limón,27
27,Agua y refrescos,Refresco de naranja y de limón,Lima limón,28


In [4905]:
def clasificar_category_aguas(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'agua_y_refrescos|refresco_de_naranja_y_de_limon':
        if 'naranja' in name:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Naranja')
        elif 'limón' in name:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Limón')
        else:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Lima limón')

    if subcat == 'agua_y_refrescos|refresco_de_te_y_sin_gas':
        if 'té' in name:
            return ('Agua y refrescos', 'Refresco de té y sin gas', 'Té')
        else:
            return ('Agua y refrescos', 'Refresco de té y sin gas', 'Otros refrescos sin gas')

    if subcat == 'agua_y_refrescos|tonica_y_bitter':
        return ('Agua y refrescos', 'Tónica y bitter', 'Tónica y bitter')

    if subcat == 'agua_y_refrescos|isotonico_y_energetico':
        if 'energético' in name:
            return ('Agua y refrescos', 'Isotónico y energético', 'Energético')
        else:
            return ('Agua y refrescos', 'Isotónico y energético', 'Isotónico')

    if subcat == 'agua_y_refrescos|agua':
        if 'gas' in name:
            return ('Agua y refrescos', 'Agua', 'Agua con gas')
        elif 'gaseosa' in name:
            return ('Agua y refrescos', 'Agua', 'Gaseosa')
        else:
            return ('Agua y refrescos', 'Agua', 'Agua sin gas')

    if subcat == 'agua_y_refrescos|refresco_de_cola':
        if 'zero cafeína' in name or 'zero zero' in name or 'sin cafeína' in name:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola sin cafeína')
        elif 'zero azúcar' in name or 'azúcar' in name:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola zero')
        else:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola clásica')


    return (pd.NA, pd.NA, pd.NA)


In [4906]:
datamarket_update('agua_y_refrescos|refresco_de_naranja_y_de_limon', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 358 to 4927
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [4907]:
datamarket_update('agua_y_refrescos|refresco_de_te_y_sin_gas', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 182 to 4148
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [4908]:
datamarket_update('agua_y_refrescos|tonica_y_bitter', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 265 to 3471
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [4909]:
datamarket_update('agua_y_refrescos|isotonico_y_energetico', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 334 to 4856
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [4910]:
datamarket_update('agua_y_refrescos|agua', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 21 entries, 86 to 4946
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         21 non-null     int64  
 1   supermarket                21 non-null     object 
 2   brand_category             21 non-null     object 
 3   name                       21 non-null     object 
 4   description                21 non-null     object 
 5   trademark                  21 non-null     object 
 6   trademark_propietary_flag  21 non-null     object 
 7   price                      21 non-null     float64
 8   reference_price            21 non-null     float64
 9   reference_unit             21 non-null     object 
 10  insert_date                21 non-null     object 
 11  price_corrected            21 non-null     bool   
 12  reference_price_corrected  21 non-null     bool   
 13  category_name              21 non-null     object 
 14

In [4911]:
datamarket_update('agua_y_refrescos|refresco_de_cola', clasificar_category_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 720 to 4779
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         14 non-null     int64  
 1   supermarket                14 non-null     object 
 2   brand_category             14 non-null     object 
 3   name                       14 non-null     object 
 4   description                14 non-null     object 
 5   trademark                  14 non-null     object 
 6   trademark_propietary_flag  14 non-null     object 
 7   price                      14 non-null     float64
 8   reference_price            14 non-null     float64
 9   reference_unit             14 non-null     object 
 10  insert_date                14 non-null     object 
 11  price_corrected            14 non-null     bool   
 12  reference_price_corrected  14 non-null     bool   
 13  category_name              14 non-null     object 
 1

# Procesamiento de la categoría "zumos"

In [4912]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'zumos']

Unnamed: 0,category,subcategory,subsubcategory
8,zumos,melocoton_y_pina,
391,zumos,fruta_variada,
482,zumos,naranja,
775,zumos,tomate_y_otros_sabores,


In [4913]:
current_category = df_category[df_category["category_name"] == "Zumos"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
436,Zumos,Fruta variada,Fruta + leche,437
437,Zumos,Fruta variada,Fruta variada y otros sabores,438
438,Zumos,Fruta variada,Smoothie,439
439,Zumos,Melocotón y piña,Melocotón,440
440,Zumos,Melocotón y piña,Piña,441
441,Zumos,Naranja,Naranja,442
442,Zumos,Tomate y otros sabores,Otros sabores,443


In [4914]:
def clasificar_category_zumos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'zumos|fruta_variada':
        if 'leche' in name:
            return ('Zumos', 'Fruta variada', 'Fruta + leche')
        elif 'smoothie' in name:
            return ('Zumos', 'Fruta variada', 'Smoothie')
        else:
            return ('Zumos', 'Fruta variada', 'Fruta variada y otros sabores')

    if subcat == 'zumos|melocoton_y_pina':
        if 'melocotón' in name:
            return ('Zumos', 'Melocotón y piña', 'Melocotón')
        else:
            return ('Zumos', 'Melocotón y piña', 'Piña')

    if subcat == 'zumos|naranja':
        return ('Zumos', 'Naranja', 'Naranja')

    if subcat == 'zumos|tomate_y_otros_sabores':
        return ('Zumos', 'Tomate y otros sabores', 'Otros sabores')

    return (pd.NA, pd.NA, pd.NA)

In [4915]:
datamarket_update('zumos|fruta_variada', clasificar_category_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 391 to 4968
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [4916]:
datamarket_update('zumos|melocoton_y_pina', clasificar_category_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 8 to 4142
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14  

In [4917]:
datamarket_update('zumos|naranja', clasificar_category_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 482 to 3096
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [4918]:
datamarket_update('zumos|tomate_y_otros_sabores', clasificar_category_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 775 to 4837
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

# Procesamiento de la categoría "aperitivos"

In [4919]:
current_category = df_category[df_category["category_name"] == "Aperitivos"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
32,Aperitivos,Aceitunas y encurtidos,Aceitunas verdes,33
33,Aperitivos,Aceitunas y encurtidos,Aceitunas negras,34
34,Aperitivos,Aceitunas y encurtidos,Cóctel y banderillas,35
35,Aperitivos,Aceitunas y encurtidos,Pepinillos y otros encurtidos,36
36,Aperitivos,Frutos secos y fruta desecada,Frutos secos,37
37,Aperitivos,Frutos secos y fruta desecada,Cocktails,38
38,Aperitivos,Frutos secos y fruta desecada,Fruta desecada,39
39,Aperitivos,Patatas fritas y snacks,Patatas fritas,40
40,Aperitivos,Patatas fritas y snacks,Snacks,41


In [4920]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'aperitivos']

Unnamed: 0,category,subcategory,subsubcategory
39,aperitivos,aceitunas_y_encurtidos,
128,aperitivos,frutos_secos_y_fruta_desecada,
303,aperitivos,patatas_fritas_y_snacks,


In [4921]:
def clasificar_category_aperitivos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'aperitivos|aceitunas_y_encurtidos':
        if 'aceitunas verdes' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas verdes')
        elif 'aceitunas negras' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas negras')
        elif 'abanderillas' in name or 'mix' in name or 'cóctel' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Cóctel y banderillas')
        else:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Pepinillos y otros encurtidos')

    if subcat == 'aperitivos|frutos_secos_y_fruta_desecada':
        if 'cocktail' in name or 'combinado' in name:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Cocktails')
        elif 'dátiles' in name or 'pasas' in name or 'desecados' in name or 'deshidratado' in name or 'albaricoque' in name or 'arándanos' in name:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Fruta desecada')
        else:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')

    if subcat == 'aperitivos|patatas_fritas_y_snacks':
        if 'patata' in name or 'patatas' in name or 'patatinas' in name:
            return ('Aperitivos', 'Patatas fritas y snacks', 'Patatas fritas')
        else:
            return ('Aperitivos', 'Patatas fritas y snacks', 'Snacks')

    return (pd.NA, pd.NA, pd.NA)

In [4922]:
datamarket_update('aperitivos|aceitunas_y_encurtidos', clasificar_category_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 39 to 4727
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                15 non-null     object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 14

In [4923]:
datamarket_update('aperitivos|frutos_secos_y_fruta_desecada', clasificar_category_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, 128 to 4955
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         28 non-null     int64  
 1   supermarket                28 non-null     object 
 2   brand_category             28 non-null     object 
 3   name                       28 non-null     object 
 4   description                28 non-null     object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      28 non-null     float64
 8   reference_price            28 non-null     float64
 9   reference_unit             28 non-null     object 
 10  insert_date                28 non-null     object 
 11  price_corrected            28 non-null     bool   
 12  reference_price_corrected  28 non-null     bool   
 13  category_name              28 non-null     object 
 1

In [4924]:
datamarket_update('aperitivos|patatas_fritas_y_snacks', clasificar_category_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 303 to 4540
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                19 non-null     object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

# Procesamiento de la categoría "arroz_legumbres_y_pasta"

In [4925]:
current_category = df_category[df_category["category_name"] == "Arroz, legumbres y pasta"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
41,"Arroz, legumbres y pasta",Arroz,Arroz,42
42,"Arroz, legumbres y pasta",Legumbres,Garbanzos,43
43,"Arroz, legumbres y pasta",Legumbres,Alubias,44
44,"Arroz, legumbres y pasta",Legumbres,Lentejas y otros,45
45,"Arroz, legumbres y pasta",Pasta y fideos,Fideos,46
46,"Arroz, legumbres y pasta",Pasta y fideos,"Macarrones, pajaritas y hélices",47
47,"Arroz, legumbres y pasta",Pasta y fideos,Spaghetti y tallarines,48
48,"Arroz, legumbres y pasta",Pasta y fideos,Pasta rellena,49
49,"Arroz, legumbres y pasta",Pasta y fideos,Fideos orientales,50
50,"Arroz, legumbres y pasta",Pasta y fideos,Lasaña y canelones,51


In [4926]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'arroz_legumbres_y_pasta']

Unnamed: 0,category,subcategory,subsubcategory
166,arroz_legumbres_y_pasta,pasta_y_fideos,
173,arroz_legumbres_y_pasta,legumbres,
728,arroz_legumbres_y_pasta,arroz,


In [4927]:
def clasificar_category_legumbres(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'arroz_legumbres_y_pasta|arroz':
        return ('Arroz, legumbres y pasta', 'Arroz', 'Arroz')

    if subcat == 'arroz_legumbres_y_pasta|legumbres':
        if 'garbanzos' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Garbanzos')
        elif 'alubias' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Alubias')
        else:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Lentejas y otros')
    if subcat == 'arroz_legumbres_y_pasta|pasta_y_fideos':
        if any(x in name for x in ['fideuá', 'fideo', 'estrellas', 'maravilla', 'piñones']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos')
        elif any(x in name for x in ['pajaritas', 'penne', 'tortiglioni', 'hélices', 'macarrón', 'fusilli', 'trottole', 'tiburón']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')
        elif any(x in name for x in ['tallarines', 'spaghetti', 'nidos', 'noodles', 'tagliatelle']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Spaghetti y tallarines')
        elif any(x in name for x in ['tortellini', 'ravioli', 'gnocchi', 'girasoles', 'medialunas']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Pasta rellena')
        elif 'orientales' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos orientales')
        elif 'canelones' in name or 'lazaña' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Lasaña y canelones')
        else:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')

    return (pd.NA, pd.NA, pd.NA)

In [4928]:
datamarket_update('arroz_legumbres_y_pasta|arroz', clasificar_category_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 728 to 4194
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [4929]:
datamarket_update('arroz_legumbres_y_pasta|legumbres', clasificar_category_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 173 to 4087
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [4930]:
datamarket_update('arroz_legumbres_y_pasta|pasta_y_fideos', clasificar_category_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 25 entries, 166 to 4929
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         25 non-null     int64  
 1   supermarket                25 non-null     object 
 2   brand_category             25 non-null     object 
 3   name                       25 non-null     object 
 4   description                25 non-null     object 
 5   trademark                  25 non-null     object 
 6   trademark_propietary_flag  25 non-null     object 
 7   price                      25 non-null     float64
 8   reference_price            25 non-null     float64
 9   reference_unit             25 non-null     object 
 10  insert_date                25 non-null     object 
 11  price_corrected            25 non-null     bool   
 12  reference_price_corrected  25 non-null     bool   
 13  category_name              25 non-null     object 
 1

# Procesamiento de la categoría "azucar_caramelos_y_chocolate"

In [4931]:
current_category = df_category[df_category["category_name"] == "Azúcar, caramelos y chocolate"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
51,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Azúcar,52
52,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Edulcorante y otros,53
53,"Azúcar, caramelos y chocolate",Chicles y caramelos,Chicles,54
54,"Azúcar, caramelos y chocolate",Chicles y caramelos,Caramelos,55
55,"Azúcar, caramelos y chocolate",Chocolate,Chocolate negro,56
56,"Azúcar, caramelos y chocolate",Chocolate,Chocolate con leche,57
57,"Azúcar, caramelos y chocolate",Chocolate,Chocolate blanco,58
58,"Azúcar, caramelos y chocolate",Chocolate,Chocolatinas,59
59,"Azúcar, caramelos y chocolate",Chocolate,Bombones,60
60,"Azúcar, caramelos y chocolate",Chocolate,Cremas de untar,61


In [4932]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'azucar_caramelos_y_chocolate']

Unnamed: 0,category,subcategory,subsubcategory
70,azucar_caramelos_y_chocolate,chocolate,
220,azucar_caramelos_y_chocolate,golosinas,
314,azucar_caramelos_y_chocolate,azucar_y_edulcorante,
715,azucar_caramelos_y_chocolate,chicles_y_caramelos,
929,azucar_caramelos_y_chocolate,mermelada_y_miel,


In [4933]:
def clasificar_category_azucar(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'azucar_caramelos_y_chocolate|mermelada_y_miel':
        if 'miel' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Miel')
        elif 'mermelada' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Mermelada')
        else:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Confitura y otros')

    if subcat == 'azucar_caramelos_y_chocolate|azucar_y_edulcorante':
        if 'azúcar' in name:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Azúcar')
        else:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Edulcorante y otros')

    if subcat == 'azucar_caramelos_y_chocolate|chocolate':
        if 'bombones' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Bombones')
        elif any(x in name for x in ['barritas', 'huevos', 'pasqua', 'disquitos', 'cacahuetes', 'bolas', 'huevo', 'figuras', 'figura']):
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolatinas')
        elif 'chocolate negro' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate negro')
        elif 'chocolate blanco' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate blanco')
        elif 'chocolate con leche' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate con leche')
        else:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Cremas de untar')

    if subcat == 'azucar_caramelos_y_chocolate|chicles_y_caramelos':
        if 'chicles' in name:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Chicles')
        else:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Caramelos')

    if subcat == 'azucar_caramelos_y_chocolate|golosinas':
        return ('Azúcar, caramelos y chocolate', 'Golosinas', 'Golosinas')

    return (pd.NA, pd.NA, pd.NA)

In [4934]:
datamarket_update('azucar_caramelos_y_chocolate|mermelada_y_miel', clasificar_category_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 929 to 4884
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [4935]:
datamarket_update('azucar_caramelos_y_chocolate|azucar_y_edulcorante', clasificar_category_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 314 to 4745
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [4936]:
datamarket_update('azucar_caramelos_y_chocolate|chocolate', clasificar_category_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 70 to 4813
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                28 non-null     object 
 5   trademark                  31 non-null     object 
 6   trademark_propietary_flag  31 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 14

In [4937]:
datamarket_update('azucar_caramelos_y_chocolate|chicles_y_caramelos', clasificar_category_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 715 to 4760
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

In [4938]:
datamarket_update('azucar_caramelos_y_chocolate|golosinas', clasificar_category_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 220 to 4633
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

# Procesamiento de la categoría 'bebe'

In [4939]:
current_category = df_category[df_category["category_name"] == 'Bebé']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
65,Bebé,Alimentación infantil,Tarritos salados,66
66,Bebé,Alimentación infantil,Tarritos de fruta,67
67,Bebé,Alimentación infantil,Yogures y postres,68
68,Bebé,Alimentación infantil,Leche,69
69,Bebé,Alimentación infantil,Leche en polvo,70
70,Bebé,Alimentación infantil,Papillas,71
71,Bebé,Biberón y chupete,Biberón,72
72,Bebé,Biberón y chupete,Chupete,73
73,Bebé,Higiene y cuidado,Champú y jabón,74
74,Bebé,Higiene y cuidado,Aceite y crema,75


In [4940]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'bebe']

Unnamed: 0,category,subcategory,subsubcategory
23,bebe,panales,
63,bebe,toallitas_y_panales,
89,bebe,leche_para_bebes,
169,bebe,cuidado_del_bebe,
231,bebe,alimentacion_infantil,
278,bebe,higiene_y_cuidado,
387,bebe,papillas,
791,bebe,biberon_chupete_y_menaje,
942,bebe,potitos_y_tarritos,
1000,bebe,bebidas_galletas_y_yogures,


In [4941]:
def clasificar_category_bebe(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'bebe|papillas':
        return ('Bebé', 'Alimentación infantil', 'Papillas')

    if subcat == 'bebe|bebidas_galletas_y_yogures':
        return ('Bebé', 'Alimentación infantil', 'Yogures y postres')

    if subcat == 'bebe|leche_para_bebes':
        if 'polvo' in name:
            return ('Bebé', 'Alimentación infantil', 'Leche en polvo')
        else:
            return ('Bebé', 'Alimentación infantil', 'Leche')

    if subcat == 'bebe|potitos_y_tarritos':
        if any(x in name for x in ['fruta', 'frutas', 'plátano', 'fresa', 'pera', 'manzana', 'mandarina', 'arándanos']):
            return ('Bebé', 'Alimentación infantil', 'Tarritos de fruta')
        else:
            return ('Bebé', 'Alimentación infantil', 'Tarritos salados')

    if subcat == 'bebe|alimentacion_infantil':
       if 'polvo' in name and 'leche' in name:
           return ('Bebé', 'Alimentación infantil', 'Leche en polvo')
       elif 'leche' in name:
           return ('Bebé', 'Alimentación infantil', 'Leche')
       elif 'bebida' in name or 'galletas' in name or 'yogur' in name:
           return ('Bebé', 'Alimentación infantil', 'Yogures y postres')
       elif 'papilla' in name:
           return ('Bebé', 'Alimentación infantil', 'Papillas')
       elif any(x in name for x in ['fruta', 'frutas', 'plátano', 'fresa', 'pera', 'manzana', 'mandarina', 'arándanos']):
            return ('Bebé', 'Alimentación infantil', 'Tarritos de fruta')
       else:
            return ('Bebé', 'Alimentación infantil', 'Tarritos salados')

    if subcat in ['bebe|toallitas_y_panales', 'bebe|panales', 'bebe|toallitas']:
        if 'toallitas' in name:
            return ('Bebé', 'Toallitas y pañales', 'Toallitas')
        elif 'braguita' in name or 'braguitas' in name or 'bañador' in name or 'cambiador' in name:
            return ('Bebé', 'Toallitas y pañales', 'Bañador y braguita')
        elif 'talla 0' in name or 'talla 1' in name or 'talla 2' in name or 'talla 3' in name:
            return ('Bebé', 'Toallitas y pañales', 'Pañal talla de 0 a 3')
        else:
            return ('Bebé', 'Toallitas y pañales', 'Pañal talla de 4 a XL')

    if subcat in ['bebe|cuidado_del_bebe', 'bebe|higiene_y_cuidado', 'bebe|biberon_chupete_y_menaje']:
        if 'biberón' in name:
            return ('Bebé', 'Biberón y chupete', 'Biberón')
        elif 'chupete' in name:
            return ('Bebé', 'Biberón y chupete', 'Chupete')
        elif 'champú' in name or 'gel corporal' in name or 'jabón' in name:
            return ('Bebé', 'Higiene y cuidado', 'Champú y jabón')
        elif 'crema' in name or 'pomada' in name or 'aceite' in name or 'loción' in name or 'bálsamo' in name:
            return ('Bebé', 'Higiene y cuidado', 'Aceite y crema')
        elif 'colonia' in name or 'agua perfumada' in name:
            return ('Bebé', 'Higiene y cuidado', 'Colonia')
        else:
            return ('Bebé', 'Higiene y cuidado', 'Accesorios')

    return (pd.NA, pd.NA, pd.NA)

In [4942]:
datamarket_update('bebe|papillas', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 387 to 3120
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [4943]:
datamarket_update('bebe|bebidas_galletas_y_yogures', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1000 to 3295
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [4944]:
datamarket_update('bebe|leche_para_bebes', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 89 to 4028
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14 

In [4945]:
datamarket_update('bebe|potitos_y_tarritos', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 942 to 4701
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

In [4946]:
datamarket_update('bebe|alimentacion_infantil', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 231 to 4917
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         30 non-null     int64  
 1   supermarket                30 non-null     object 
 2   brand_category             30 non-null     object 
 3   name                       30 non-null     object 
 4   description                27 non-null     object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      30 non-null     float64
 8   reference_price            30 non-null     float64
 9   reference_unit             30 non-null     object 
 10  insert_date                30 non-null     object 
 11  price_corrected            30 non-null     bool   
 12  reference_price_corrected  30 non-null     bool   
 13  category_name              30 non-null     object 
 1

In [4947]:
datamarket_update('bebe|toallitas_y_panales', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 63 to 4591
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 14

In [4948]:
datamarket_update('bebe|panales', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 23 to 4769
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 14

In [4949]:
datamarket_update('bebe|toallitas', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1332 to 1590
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4950]:
datamarket_update('bebe|cuidado_del_bebe', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 169 to 4990
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [4951]:
datamarket_update('bebe|higiene_y_cuidado', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 278 to 3680
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [4952]:
datamarket_update('bebe|biberon_chupete_y_menaje', clasificar_category_bebe)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 791 to 4578
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                8 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

# Procesamiento de la categoría "bebidas"

In [4953]:
current_category = df_category[df_category["category_name"] == "Agua y refrescos"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
18,Agua y refrescos,Agua,Agua sin gas,19
19,Agua y refrescos,Agua,Agua con gas,20
20,Agua y refrescos,Agua,Gaseosa,21
21,Agua y refrescos,Isotónico y energético,Isotónico,22
22,Agua y refrescos,Isotónico y energético,Energético,23
23,Agua y refrescos,Refresco de cola,Cola clásica,24
24,Agua y refrescos,Refresco de cola,Cola zero,25
25,Agua y refrescos,Refresco de cola,Cola sin cafeína,26
26,Agua y refrescos,Refresco de naranja y de limón,Limón,27
27,Agua y refrescos,Refresco de naranja y de limón,Lima limón,28


In [4954]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'bebidas']

Unnamed: 0,category,subcategory,subsubcategory
5,bebidas,licores_y_cremas,pacharan
10,bebidas,alcoholes,brandy_y_conac
15,bebidas,aguas_y_zumos,zumos_no_refrigerados
25,bebidas,refrescos,
26,bebidas,aguas,
55,bebidas,refrescos,sabores_con_gas
61,bebidas,refrescos,te
68,bebidas,vinos,de_mesa
127,bebidas,cervezas,con_limon
132,bebidas,zumos,


In [4955]:
def clasificar_category_bebidas_aguas(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['bebidas|refrescos|sabores_con_gas', 'bebidas|refrescos', 'bebidas|refrescos|sabores_sin_gas', 'bebidas|aguas_y_zumos|agua_con_sabor']:
        if 'naranja' in name:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Naranja')
        elif 'limón' in name:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Limón')
        else:
            return ('Agua y refrescos', 'Refresco de naranja y de limón', 'Lima limón')

    if subcat in ['bebidas|refrescos|sabores_sin_gas', 'bebidas|refrescos|te', 'bebidas|batidos_y_horchata']:
        if 'té' in name:
            return ('Agua y refrescos', 'Refresco de té y sin gas', 'Té')
        else:
            return ('Agua y refrescos', 'Refresco de té y sin gas', 'Otros refrescos sin gas')

    if subcat in ['bebidas|refrescos|tonica', 'bebidas|refrescos|bitter_y_ginger_ale']:
        return ('Agua y refrescos', 'Tónica y bitter', 'Tónica y bitter')

    if subcat in ['bebidas|bebidas_isotonicas_y_energeticas', 'bebidas|refrescos|isotonicas', 'bebidas|refrescos|energeticas']:
        if 'energético' in name:
            return ('Agua y refrescos', 'Isotónico y energético', 'Energético')
        else:
            return ('Agua y refrescos', 'Isotónico y energético', 'Isotónico')

    if subcat in ['bebidas|aguas', 'bebidas|aguas_y_zumos|agua_hasta_075_litros', 'bebidas|aguas_y_zumos|agua_con_gas', 'bebidas|aguas_y_zumos|agua_de_mas_de_2_litros']:
        if 'gas' in name:
            return ('Agua y refrescos', 'Agua', 'Agua con gas')
        elif 'gaseosa' in name:
            return ('Agua y refrescos', 'Agua', 'Gaseosa')
        else:
            return ('Agua y refrescos', 'Agua', 'Agua sin gas')

    if subcat == 'bebidas|refrescos|gaseosa':
        return ('Agua y refrescos', 'Agua', 'Gaseosa')

    if subcat == 'bebidas|refrescos|colas':
        if 'zero cafeína' in name or 'zero zero' in name or 'sin cafeína' in name:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola sin cafeína')
        elif 'zero azúcar' in name or 'azúcar' in name:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola zero')
        else:
            return ('Agua y refrescos', 'Refresco de cola', 'Cola clásica')


    return (pd.NA, pd.NA, pd.NA)

In [4956]:
datamarket_update('bebidas|refrescos|sabores_con_gas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 55 to 3672
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [4957]:
datamarket_update('bebidas|refrescos', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 43 entries, 25 to 4724
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         43 non-null     int64  
 1   supermarket                43 non-null     object 
 2   brand_category             43 non-null     object 
 3   name                       43 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  43 non-null     object 
 6   trademark_propietary_flag  43 non-null     object 
 7   price                      43 non-null     float64
 8   reference_price            43 non-null     float64
 9   reference_unit             43 non-null     object 
 10  insert_date                43 non-null     object 
 11  price_corrected            43 non-null     bool   
 12  reference_price_corrected  43 non-null     bool   
 13  category_name              43 non-null     object 
 14

In [4958]:
datamarket_update('bebidas|refrescos|sabores_sin_gas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2072 to 4620
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [4959]:
datamarket_update('bebidas|aguas_y_zumos|agua_con_sabor', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1201 to 4631
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [4960]:
datamarket_update('bebidas|refrescos|te', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 61 to 3038
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [4961]:
datamarket_update('bebidas|batidos_y_horchata', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 896 to 4226
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [4962]:
datamarket_update('bebidas|refrescos|tonica', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1294 to 3909
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4963]:
datamarket_update('bebidas|refrescos|bitter_y_ginger_ale', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 623 to 3824
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [4964]:
datamarket_update('bebidas|bebidas_isotonicas_y_energeticas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 673 to 4670
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [4965]:
datamarket_update('bebidas|refrescos|isotonicas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2465 to 2465
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [4966]:
datamarket_update('bebidas|refrescos|energeticas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1957 to 4478
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4967]:
datamarket_update('bebidas|aguas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 26 to 4936
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 14

In [4968]:
datamarket_update('bebidas|aguas_y_zumos|agua_hasta_075_litros', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 194 to 2360
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [4969]:
datamarket_update('bebidas|aguas_y_zumos|agua_con_gas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 506 to 4084
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [4970]:
datamarket_update('bebidas|aguas_y_zumos|agua_de_mas_de_2_litros', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3210 to 3210
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [4971]:
datamarket_update('bebidas|refrescos|colas', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1266 to 4632
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4972]:
datamarket_update('bebidas|refrescos|gaseosa', clasificar_category_bebidas_aguas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1405 to 3008
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4973]:
def clasificar_category_bebidas_zumos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['bebidas|aguas_y_zumos|zumos_no_refrigerados', 'bebidas|aguas_y_zumos|zumos_refrigerados', 'bebidas|zumos']:
        if 'leche' in name:
            return ('Zumos', 'Fruta variada', 'Fruta + leche')
        elif 'smoothie' in name:
            return ('Zumos', 'Fruta variada', 'Smoothie')
        elif 'melocotón' in name:
            return ('Zumos', 'Melocotón y piña', 'Melocotón')
        elif 'piña' in name:
            return ('Zumos', 'Melocotón y piña', 'Piña')
        elif 'naranja' in name:
            return ('Zumos', 'Naranja', 'Naranja')
        elif 'tomate' in name:
            return ('Zumos', 'Tomate y otros sabores', 'Otros sabores')
        else:
            return ('Zumos', 'Fruta variada', 'Fruta variada y otros sabores')

    return (pd.NA, pd.NA, pd.NA)

In [4974]:
datamarket_update('bebidas|aguas_y_zumos|zumos_no_refrigerados', clasificar_category_bebidas_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 15 to 2833
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14 

In [4975]:
datamarket_update('bebidas|aguas_y_zumos|zumos_refrigerados', clasificar_category_bebidas_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 653 to 4069
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [4976]:
datamarket_update('bebidas|zumos', clasificar_category_bebidas_zumos)

<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, 132 to 4573
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         32 non-null     int64  
 1   supermarket                32 non-null     object 
 2   brand_category             32 non-null     object 
 3   name                       32 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  32 non-null     object 
 6   trademark_propietary_flag  32 non-null     object 
 7   price                      32 non-null     float64
 8   reference_price            32 non-null     float64
 9   reference_unit             32 non-null     object 
 10  insert_date                32 non-null     object 
 11  price_corrected            32 non-null     bool   
 12  reference_price_corrected  32 non-null     bool   
 13  category_name              32 non-null     object 
 1

In [4977]:
current_category = df_category[df_category["category_name"] == 'Bodega']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
81,Bodega,Cerveza,Cerveza lata,82
82,Bodega,Cerveza,Cerveza botella y botellín,83
83,Bodega,Cerveza,Combinado de cerveza,84
84,Bodega,Cerveza sin alcohol,Cerveza botella y botellín,85
85,Bodega,Cerveza sin alcohol,Cerveza lata,86
86,Bodega,Licores,Vermouth y aperitivos,87
87,Bodega,Licores,Ginebra,88
88,Bodega,Licores,Brandy,89
89,Bodega,Licores,Whisky,90
90,Bodega,Licores,Ron,91


In [4978]:
def clasificar_category_bebidas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'bebidas|cervezas|con_limon':
        return ('Bodega', 'Cerveza', 'Combinado de cerveza')

    if subcat == 'bebidas|alcoholes|vermouth':
        return ('Bodega', 'Licores', 'Vermouth y aperitivos')

    if subcat == 'bebidas|vinos|rosado':
        return ('Bodega', 'Vino rosado', 'Vino rosado')

    if subcat == 'bebidas|alcoholes|brandy_y_conac':
        return ('Bodega', 'Licores', 'Brandy')

    if subcat == 'bebidas|vinos|sangria_y_tinto_de_verano':
        return ('Bodega', 'Tinto de verano y sangría', 'Tinto de verano y sangría')

    if subcat in ['bebidas|cervezas|con_limon', 'bebidas|cervezas|especiales', 'bebidas|cervezas|nacionales', 'bebidas|cerveza|negra',
                  'bebidas|cerveza|radler_clara', 'bebidas|cerveza|rubia', 'bebidas|cerveza|tostada__roja', 'bebidas|cerveza|artesanas']:
        if 'lata' in name:
            return ('Bodega', 'Cerveza', 'Cerveza lata')
        else:
            return ('Bodega', 'Cerveza', 'Cerveza botella y botellín')

    if subcat in ['bebidas|cervezas|sin_alcohol', 'bebidas|cerveza|sin_alcohol']:
        if 'lata' in name:
            return ('Bodega', 'Cerveza sin alcohol', 'Cerveza lata')
        else:
            return ('Bodega', 'Cerveza sin alcohol', 'Cerveza botella y botellín')

    if subcat == 'bebidas|vinos|tinto':
        if 'rioja' in name:
            return ('Bodega', 'Vino tinto', 'Rioja')
        elif 'castilla la mancha' in name:
            return ('Bodega', 'Vino tinto', 'Castilla la Mancha')
        elif 'ribera del duero' in name:
            return ('Bodega', 'Vino tinto', 'Ribera del Duero')
        elif 'de mesa' in name:
            return ('Bodega', 'Vino tinto', 'Vino tinto de mesa')
        else:
            return ('Bodega', 'Vino tinto', 'Otros vinos tintos')

    if subcat == 'bebidas|vinos|blanco':
        if 'rueda' in name:
            return ('Bodega', 'Vino blanco', 'Rueda')
        elif 'semidulce' in name:
            return ('Bodega', 'Vino blanco', 'Vinos semidulces')
        elif 'dulce' in name or 'mosto' in name or 'mistela' in name:
            return ('Bodega', 'Vino blanco', 'Vinos dulces y mosto')
        elif 'de mesa' in name:
            return ('Bodega', 'Vino blanco', 'Vino blanco de mesa')
        else:
            return ('Bodega', 'Vino blanco', 'Rioja y otras denominaciones')

    if subcat == 'bebidas|vinos|de_mesa':
        if 'blanco' in name:
            return ('Bodega', 'Vino blanco', 'Vino blanco de mesa')
        else:
            return ('Bodega', 'Vino tinto', 'Vino tinto de mesa')

    if subcat == 'bebidas|vinos|generosos_y_dulces':
        return ('Bodega', 'Vino blanco', 'Vinos dulces y mosto')

    if subcat in ['bebidas|vinos|estuches_de_vino', 'bebidas|vinos|vinos_internacionales']:
        if 'blanco' in name:
            if 'rueda' in name:
                return ('Bodega', 'Vino blanco', 'Rueda')
            elif 'semidulce' in name:
                return ('Bodega', 'Vino blanco', 'Vinos semidulces')
            elif 'dulce' in name or 'mosto' in name or 'mistela' in name:
                return ('Bodega', 'Vino blanco', 'Vinos dulces y mosto')
            elif 'de mesa' in name:
                return ('Bodega', 'Vino blanco', 'Vino blanco de mesa')
            else:
                return ('Bodega', 'Vino blanco', 'Rioja y otras denominaciones')
        elif 'tinto' in name:
            if 'rioja' in name:
                return ('Bodega', 'Vino tinto', 'Rioja')
            elif 'castilla la mancha' in name:
                return ('Bodega', 'Vino tinto', 'Castilla la Mancha')
            elif 'ribera del duero' in name:
                return ('Bodega', 'Vino tinto', 'Ribera del Duero')
            elif 'de mesa' in name:
                return ('Bodega', 'Vino tinto', 'Vino tinto de mesa')
            else:
                return ('Bodega', 'Vino tinto', 'Otros vinos tintos')
        else:
            return ('Bodega', 'Vino rosado', 'Vino rosado')

    if subcat == 'bebidas|cava_y_champagne|cavas':
        if 'cava brut' in name:
            return ('Bodega', 'Sidra y cava', 'Cava brut')
        else:
            return ('Bodega', 'Sidra y cava', 'Cava semi seco')

    if subcat == 'bebidas|cava_y_champagne|champagne':
        return ('Bodega', 'Vino lambrusco y espumoso', 'Vino lambrusco y espumoso')

    if subcat in ['bebidas|sidra|achampanada', 'bebidas|sidra|cider', 'bebidas|sidra|natural']:
        return ('Bodega', 'Sidra y cava', 'Sidra')

    if subcat == 'bebidas|alcoholes|ginebra':
        return ('Bodega', 'Licores', 'Ginebra')

    if subcat == 'bebidas|alcoholes|vodka':
        return ('Bodega', 'Licores', 'Vodka')

    if subcat == 'bebidas|alcoholes|ron':
        return ('Bodega', 'Licores', 'Ron')

    if subcat == 'bebidas|alcoholes|whisky_y_bourbon':
        return ('Bodega', 'Licores', 'Whisky')

    if subcat == 'bebidas|licores_y_cremas|cremas':
        return ('Bodega', 'Licores', 'Cremas')

    if subcat in ['bebidas|licores_y_cremas|licores_sin_alcohol', 'bebidas|alcoholes|sin_alcohol']:
        return ('Bodega', 'Licores', 'Licores sin alcohol')

    if subcat == 'bebidas|licores_y_cremas|anis_y_chinchon':
        return ('Bodega', 'Licores', 'Anís')

    if subcat in ['bebidas|licores_y_cremas|pacharan', 'bebidas|licores_y_cremas|licores_y_orujo', 'bebidas|alcoholes|tequila', 'bebidas|alcoholes|cockteles_y_combinados']:
        return ('Bodega', 'Licores', 'Otros licores')

    return (pd.NA, pd.NA, pd.NA)

In [4979]:
datamarket_update('bebidas|cervezas|con_limon', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 127 to 2735
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [4980]:
datamarket_update('bebidas|alcoholes|vermouth', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1564 to 4233
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4981]:
datamarket_update('bebidas|vinos|rosado', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1780 to 2175
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4982]:
datamarket_update('bebidas|alcoholes|brandy_y_conac', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 10 to 4907
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14 

In [4983]:
datamarket_update('bebidas|vinos|sangria_y_tinto_de_verano', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 741 to 4988
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [4984]:
datamarket_update('bebidas|cervezas|especiales', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 336 to 4996
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [4985]:
datamarket_update('bebidas|cervezas|nacionales', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 1237 to 4901
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         14 non-null     int64  
 1   supermarket                14 non-null     object 
 2   brand_category             14 non-null     object 
 3   name                       14 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  14 non-null     object 
 6   trademark_propietary_flag  14 non-null     object 
 7   price                      14 non-null     float64
 8   reference_price            14 non-null     float64
 9   reference_unit             14 non-null     object 
 10  insert_date                14 non-null     object 
 11  price_corrected            14 non-null     bool   
 12  reference_price_corrected  14 non-null     bool   
 13  category_name              14 non-null     object 
 

In [4986]:
datamarket_update('bebidas|cerveza|negra', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 995 to 1480
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [4987]:
datamarket_update('bebidas|cerveza|radler_clara', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2507 to 3000
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4988]:
datamarket_update('bebidas|cerveza|rubia', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3798 to 4272
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4989]:
datamarket_update('bebidas|cerveza|tostada__roja', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1451 to 1451
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [4990]:
datamarket_update('bebidas|cerveza|artesanas', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1214 to 4892
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [4991]:
datamarket_update('bebidas|cervezas|sin_alcohol', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 687 to 4943
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [4992]:
datamarket_update('bebidas|cerveza|sin_alcohol', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 803 to 4177
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [4993]:
datamarket_update('bebidas|vinos|tinto', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1285 to 3863
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [4994]:
datamarket_update('bebidas|vinos|blanco', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3048 to 3114
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4995]:
datamarket_update('bebidas|vinos|de_mesa', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 68 to 1790
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14 

In [4996]:
datamarket_update('bebidas|vinos|generosos_y_dulces', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1162 to 3514
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4997]:
datamarket_update('bebidas|vinos|estuches_de_vino', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2383 to 2931
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [4998]:
datamarket_update('bebidas|vinos|vinos_internacionales', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2870 to 2870
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [4999]:
datamarket_update('bebidas|cava_y_champagne|cavas', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3357 to 4508
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5000]:
datamarket_update('bebidas|cava_y_champagne|champagne', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 890 to 4963
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5001]:
datamarket_update('bebidas|sidra|achampanada', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 480 to 3520
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5002]:
datamarket_update('bebidas|sidra|cider', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 470 to 4893
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5003]:
datamarket_update('bebidas|sidra|natural', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 698 to 2579
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5004]:
datamarket_update('bebidas|alcoholes|ginebra', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1113 to 2214
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5005]:
datamarket_update('bebidas|alcoholes|vodka', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 1421 to 4773
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 1

In [5006]:
datamarket_update('bebidas|alcoholes|ron', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1576 to 2886
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5007]:
datamarket_update('bebidas|alcoholes|whisky_y_bourbon', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 989 to 4960
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5008]:
datamarket_update('bebidas|licores_y_cremas|cremas', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1429 to 4305
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5009]:
datamarket_update('bebidas|licores_y_cremas|licores_sin_alcohol', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2021 to 2021
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5010]:
datamarket_update('bebidas|alcoholes|sin_alcohol', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3597 to 4176
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5011]:
datamarket_update('bebidas|licores_y_cremas|anis_y_chinchon', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 798 to 3675
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5012]:
datamarket_update('bebidas|licores_y_cremas|pacharan', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 5 to 674
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14  s

In [5013]:
datamarket_update('bebidas|licores_y_cremas|licores_y_orujo', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1922 to 3277
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5014]:
datamarket_update('bebidas|alcoholes|tequila', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2466 to 4333
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5015]:
datamarket_update('bebidas|alcoholes|cockteles_y_combinados', clasificar_category_bebidas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1131 to 3770
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

# Procesamiento de la categoría "bodega"

In [5016]:
current_category = df_category[df_category["category_name"] == 'Bodega']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
81,Bodega,Cerveza,Cerveza lata,82
82,Bodega,Cerveza,Cerveza botella y botellín,83
83,Bodega,Cerveza,Combinado de cerveza,84
84,Bodega,Cerveza sin alcohol,Cerveza botella y botellín,85
85,Bodega,Cerveza sin alcohol,Cerveza lata,86
86,Bodega,Licores,Vermouth y aperitivos,87
87,Bodega,Licores,Ginebra,88
88,Bodega,Licores,Brandy,89
89,Bodega,Licores,Whisky,90
90,Bodega,Licores,Ron,91


In [5017]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'bodega']

Unnamed: 0,category,subcategory,subsubcategory
11,bodega,cerveza,
42,bodega,vino,tinto
95,bodega,vino,blanco
104,bodega,alcoholes,whisky
129,bodega,sidra_y_cava,
159,bodega,vino,rosado
172,bodega,vino_tinto,
193,bodega,alcoholes,vermouth
270,bodega,alcoholes,ginebra
309,bodega,espumosos,


In [5018]:
def clasificar_category_bodega(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'bodega|cerveza':
        if 'limón' in name:
           return ('Bodega', 'Cerveza', 'Combinado de cerveza')

    if subcat == 'bodega|alcoholes|vermouth':
        return ('Bodega', 'Licores', 'Vermouth y aperitivos')

    if subcat in ['bodega|vino|rosado', 'bodega|vino_rosado']:
        return ('Bodega', 'Vino rosado', 'Vino rosado')

    if subcat == 'bodega|alcoholes|brandy':
        return ('Bodega', 'Licores', 'Brandy')

    if subcat in ['bodega|tinto_de_verano_y_sangria', 'bodega|vinos_de_mesa_sangrias_y_tintos_de_verano']:
        if 'sangría' in name or 'verano' in name:
            return ('Bodega', 'Tinto de verano y sangría', 'Tinto de verano y sangría')

    if subcat in ['bodega|cerveza', 'bodega|cerveza_sin_alcohol']:
        if 'lata' in name:
            return ('Bodega', 'Cerveza', 'Cerveza lata')
        else:
            return ('Bodega', 'Cerveza', 'Cerveza botella y botellín')

    if subcat == 'bodega|cerveza_sin_alcohol':
        if 'lata' in name:
            return ('Bodega', 'Cerveza sin alcohol', 'Cerveza lata')
        else:
            return ('Bodega', 'Cerveza sin alcohol', 'Cerveza botella y botellín')

    if subcat in ['bodega|vino|tinto', 'bodega|vino_tinto']:
        if 'rioja' in name:
            return ('Bodega', 'Vino tinto', 'Rioja')
        elif 'castilla la mancha' in name:
            return ('Bodega', 'Vino tinto', 'Castilla la Mancha')
        elif 'ribera del duero' in name:
            return ('Bodega', 'Vino tinto', 'Ribera del Duero')
        elif 'de mesa' in name:
            return ('Bodega', 'Vino tinto', 'Vino tinto de mesa')
        else:
            return ('Bodega', 'Vino tinto', 'Otros vinos tintos')

    if subcat in ['bodega|vino|blanco', 'bodega|vino_blanco']:
        if 'rueda' in name:
            return ('Bodega', 'Vino blanco', 'Rueda')
        elif 'semidulce' in name:
            return ('Bodega', 'Vino blanco', 'Vinos semidulces')
        elif 'dulce' in name or 'mosto' in name or 'mistela' in name:
            return ('Bodega', 'Vino blanco', 'Vinos dulces y mosto')
        elif 'de mesa' in name:
            return ('Bodega', 'Vino blanco', 'Vino blanco de mesa')
        else:
            return ('Bodega', 'Vino blanco', 'Rioja y otras denominaciones')

    if subcat in ['bodega|sidra_y_cava', 'bodega|vino_lambrusco_y_espumoso', 'bodega|espumosos']:
        if 'sidra' in name:
            return ('Bodega', 'Sidra y cava', 'Sidra')
        elif 'cava brut' in name:
            return ('Bodega', 'Sidra y cava', 'Cava brut')
        elif 'espumoso' in name or 'lambrusco' in name:
            return ('Bodega', 'Vino lambrusco y espumoso', 'Vino lambrusco y espumoso')
        else:
            return ('Bodega', 'Sidra y cava', 'Cava semi seco')

    if subcat == 'bodega|vino|generoso_y_dulce':
        return ('Bodega', 'Vino blanco', 'Vinos dulces y mosto')

    if subcat == 'bodega|vinos_de_mesa_sangrias_y_tintos_de_verano':
        if 'blanco' in name and 'vino' in name:
            return ('Bodega', 'Vino blanco', 'Vino blanco de mesa')
        elif 'vino' in name:
            return ('Bodega', 'Vino tinto', 'Vino tinto de mesa')

    if subcat == 'bodega|alcoholes|ginebra':
        return ('Bodega', 'Licores', 'Ginebra')

    if subcat == 'bodega|alcoholes|vodka':
        return ('Bodega', 'Licores', 'Vodka')

    if subcat == 'bodega|alcoholes|ron':
        return ('Bodega', 'Licores', 'Ron')

    if subcat == 'bodega|alcoholes|whisky':
        return ('Bodega', 'Licores', 'Whisky')

    if subcat in ['bodega|licores', 'bodega|licores|sin_alcohol', 'bodega|licores|anis']:
        if 'crema' in name:
            return ('Bodega', 'Licores', 'Cremas')
        elif 'sin alcohol' in name:
            return ('Bodega', 'Licores', 'Licores sin alcohol')
        elif 'anís' in name:
            return ('Bodega', 'Licores', 'Anís')
        else:
            return ('Bodega', 'Licores', 'Otros licores')

    if subcat in ['bodega|licores|de_hierbas', 'bodega|licores|otros_licores', 'bodega|licores|pacharan']:
        return ('Bodega', 'Licores', 'Otros licores')

    return (pd.NA, pd.NA, pd.NA)

In [5019]:
datamarket_update('bodega|cerveza', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 11 to 4863
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                30 non-null     object 
 5   trademark                  31 non-null     object 
 6   trademark_propietary_flag  31 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 14

In [5020]:
datamarket_update('bodega|licores|de_hierbas', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4855 to 4855
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5021]:
datamarket_update('bodega|licores|otros_licores', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2421 to 3759
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5022]:
datamarket_update('bodega|licores|pacharan', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1194 to 1194
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5023]:
datamarket_update('bodega|licores', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 416 to 4735
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                20 non-null     object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [5024]:
datamarket_update('bodega|licores|sin_alcohol', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3637 to 3637
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5025]:
datamarket_update('bodega|licores|anis', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4783 to 4783
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5026]:
datamarket_update('bodega|alcoholes|whisky', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 104 to 2726
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5027]:
datamarket_update('bodega|alcoholes|ron', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 880 to 2832
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5028]:
datamarket_update('bodega|alcoholes|vodka', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1041 to 1041
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5029]:
datamarket_update('bodega|alcoholes|ginebra', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 270 to 3890
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5030]:
datamarket_update('bodega|vinos_de_mesa_sangrias_y_tintos_de_verano', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 2996 to 4687
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5031]:
datamarket_update('bodega|vino|generoso_y_dulce', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1170 to 2115
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5032]:
datamarket_update('bodega|espumosos', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 309 to 4644
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [5033]:
datamarket_update('bodega|vino_lambrusco_y_espumoso', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3389 to 3389
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                1 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5034]:
datamarket_update('bodega|sidra_y_cava', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 129 to 4603
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [5035]:
datamarket_update('bodega|vino_blanco', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 17 entries, 324 to 4241
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         17 non-null     int64  
 1   supermarket                17 non-null     object 
 2   brand_category             17 non-null     object 
 3   name                       17 non-null     object 
 4   description                17 non-null     object 
 5   trademark                  17 non-null     object 
 6   trademark_propietary_flag  17 non-null     object 
 7   price                      17 non-null     float64
 8   reference_price            17 non-null     float64
 9   reference_unit             17 non-null     object 
 10  insert_date                17 non-null     object 
 11  price_corrected            17 non-null     bool   
 12  reference_price_corrected  17 non-null     bool   
 13  category_name              17 non-null     object 
 1

In [5036]:
datamarket_update('bodega|vino|blanco', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 95 to 4720
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 14

In [5037]:
datamarket_update('bodega|vino_tinto', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 172 to 4739
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

In [5038]:
datamarket_update('bodega|vino|tinto', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 47 entries, 42 to 4962
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         47 non-null     int64  
 1   supermarket                47 non-null     object 
 2   brand_category             47 non-null     object 
 3   name                       47 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  47 non-null     object 
 6   trademark_propietary_flag  47 non-null     object 
 7   price                      47 non-null     float64
 8   reference_price            47 non-null     float64
 9   reference_unit             47 non-null     object 
 10  insert_date                47 non-null     object 
 11  price_corrected            47 non-null     bool   
 12  reference_price_corrected  47 non-null     bool   
 13  category_name              47 non-null     object 
 14

In [5039]:
datamarket_update('bodega|cerveza_sin_alcohol', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1955 to 3840
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                4 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5040]:
datamarket_update('bodega|tinto_de_verano_y_sangria', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 758 to 4935
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                3 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5041]:
datamarket_update('bodega|alcoholes|brandy', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 785 to 4064
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5042]:
datamarket_update('bodega|vino_rosado', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1314 to 1314
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                1 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5043]:
datamarket_update('bodega|vino|rosado', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 159 to 159
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5044]:
datamarket_update('bodega|alcoholes|vermouth', clasificar_category_bodega)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 193 to 4094
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

# Procesamiento de la categoría "cacao_cafe_e_infusiones"

In [5045]:
current_category = df_category[df_category["category_name"] == 'Cacao, café e infusiones']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
112,"Cacao, café e infusiones",Cacao soluble y chocolate a la taza,Cacao soluble,113
113,"Cacao, café e infusiones",Cacao soluble y chocolate a la taza,Chocolate a la taza,114
114,"Cacao, café e infusiones",Café cápsula y monodosis,Cápsulas compatibles Nespresso,115
115,"Cacao, café e infusiones",Café cápsula y monodosis,Cápsulas compatibles Dolce gusto,116
116,"Cacao, café e infusiones",Café cápsula y monodosis,Cápsulas compatibles Tassimo,117
117,"Cacao, café e infusiones",Café cápsula y monodosis,Monodosis,118
118,"Cacao, café e infusiones",Café molido y en grano,Café molido,119
119,"Cacao, café e infusiones",Café molido y en grano,Café en grano,120
120,"Cacao, café e infusiones",Café soluble y otras bebidas,Café soluble,121
121,"Cacao, café e infusiones",Café soluble y otras bebidas,Bebidas frías,122


In [5046]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cacao_cafe_e_infusiones']

Unnamed: 0,category,subcategory,subsubcategory
32,cacao_cafe_e_infusiones,te_e_infusiones,
212,cacao_cafe_e_infusiones,cafe_capsula_y_monodosis,
271,cacao_cafe_e_infusiones,cafe_molido_y_en_grano,
667,cacao_cafe_e_infusiones,cafe_soluble_y_otras_bebidas,
800,cacao_cafe_e_infusiones,cacao_soluble_y_chocolate_a_la_taza,


In [5047]:
def clasificar_category_cacao(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cacao_cafe_e_infusiones|cafe_capsula_y_monodosis':
        if 'cápsulas' in name and 'nespresso' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Nespresso')
        elif 'cápsulas' in name and 'dolce gusto' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Dolce gusto')
        elif 'cápsulas' in name and 'tassimo' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Tassimo')
        else:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Monodosis')

    if subcat == 'cacao_cafe_e_infusiones|cafe_molido_y_en_grano':
        if 'molido' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café molido')
        elif 'en grano' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café en grano')
        else:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Otros')

    if subcat == 'cacao_cafe_e_infusiones|cafe_soluble_y_otras_bebidas':
        if 'soluble' in name:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Café soluble')
        else:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Otros')

    if subcat == 'cacao_cafe_e_infusiones|cacao_soluble_y_chocolate_a_la_taza':
        if 'chocolate' in name:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Chocolate a la taza')
        else:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Cacao soluble')

    if subcat == 'cacao_cafe_e_infusiones|te_e_infusiones':
        if 'té' in name:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Té')
        else:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Infusiones')

    return (pd.NA, pd.NA, pd.NA)

In [5048]:
datamarket_update('cacao_cafe_e_infusiones|te_e_infusiones', clasificar_category_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 32 to 4904
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14 

In [5049]:
datamarket_update('cacao_cafe_e_infusiones|cacao_soluble_y_chocolate_a_la_taza', clasificar_category_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 800 to 3358
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5050]:
datamarket_update('cacao_cafe_e_infusiones|cafe_soluble_y_otras_bebidas', clasificar_category_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 667 to 4818
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5051]:
datamarket_update('cacao_cafe_e_infusiones|cafe_molido_y_en_grano', clasificar_category_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 271 to 4278
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5052]:
datamarket_update('cacao_cafe_e_infusiones|cafe_capsula_y_monodosis', clasificar_category_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 212 to 4982
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

# Procesamiento de la categoría "carne"

In [5053]:
current_category = df_category[df_category["category_name"] == 'Carne']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
125,Carne,Arreglos,Arreglos,126
126,Carne,Aves y pollo,Pavo y otras aves,127
127,Carne,Aves y pollo,Pollo,128
128,Carne,Carne congelada,Carne congelada,129
129,Carne,Cerdo,Cerdo,130
130,Carne,Conejo y cordero,Conejo,131
131,Carne,Conejo y cordero,Cordero,132
132,Carne,Embutido,Embutido,133
133,Carne,Hamburguesas y picadas,Hamburguesas,134
134,Carne,Hamburguesas y picadas,Picadas y otros,135


In [5054]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'carne']

Unnamed: 0,category,subcategory,subsubcategory
14,carne,cerdo,
53,carne,aves_y_pollo,
77,carne,vacuno,
191,carne,hamburguesas_y_picadas,
318,carne,arreglos,
349,carne,carne_congelada,
661,carne,embutido,
898,carne,empanados_y_elaborados,
941,carne,conejo_y_cordero,


In [5055]:
def clasificar_category_carne(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'carne|aves_y_pollo':
        if 'pollo' in name:
            return ('Carne', 'Aves y pollo', 'Pollo')
        else:
            return ('Carne', 'Aves y pollo', 'Pavo y otras aves')

    if subcat == 'carne|vacuno':
        return ('Carne', 'Vacuno', 'Vacuno')

    if subcat == 'carne|cerdo':
        return ('Carne', 'Cerdo', 'Cerdo')

    if subcat == 'carne|arreglos':
        return ('Carne', 'Arreglos', 'Arreglos')

    if subcat == 'carne|carne_congelada':
        return ('Carne', 'Carne congelada', 'Carne congelada')

    if subcat == 'carne|embutido':
        return ('Carne', 'Embutido', 'Embutido')

    if subcat == 'carne|conejo_y_cordero':
        if 'conejo' in name:
            return ('Carne', 'Conejo y cordero', 'Conejo')
        else:
            return ('Carne', 'Conejo y cordero', 'Cordero')

    if subcat == 'carne|hamburguesas_y_picadas':
        if 'burger' in name or 'hamburguesa' in name or 'burgers' in name:
            return ('Carne', 'Hamburguesas y picadas', 'Hamburguesas')
        else:
            return ('Carne', 'Hamburguesas y picadas', 'Picadas y otros')

    if subcat == 'carne|empanados_y_elaborados':
        if 'congelado' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y rebozados congelados')
        else:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')


    return (pd.NA, pd.NA, pd.NA)

In [5056]:
datamarket_update('carne|aves_y_pollo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, 53 to 4814
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         28 non-null     int64  
 1   supermarket                28 non-null     object 
 2   brand_category             28 non-null     object 
 3   name                       28 non-null     object 
 4   description                28 non-null     object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      28 non-null     float64
 8   reference_price            28 non-null     float64
 9   reference_unit             28 non-null     object 
 10  insert_date                28 non-null     object 
 11  price_corrected            28 non-null     bool   
 12  reference_price_corrected  28 non-null     bool   
 13  category_name              28 non-null     object 
 14

In [5057]:
datamarket_update('carne|vacuno', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 77 to 4387
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 14

In [5058]:
datamarket_update('carne|cerdo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 14 to 4886
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                18 non-null     object 
 5   trademark                  18 non-null     object 
 6   trademark_propietary_flag  18 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 14

In [5059]:
datamarket_update('carne|arreglos', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 318 to 4180
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5060]:
datamarket_update('carne|carne_congelada', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 349 to 3778
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                4 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5061]:
datamarket_update('carne|embutido', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 661 to 4777
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5062]:
datamarket_update('carne|conejo_y_cordero', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 941 to 4086
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [5063]:
datamarket_update('carne|hamburguesas_y_picadas', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 191 to 4833
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [5064]:
datamarket_update('carne|empanados_y_elaborados', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 898 to 3598
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

# Procesamiento de la categoría "mascotas"

In [5065]:
current_category = df_category[df_category["category_name"] == 'Mascotas']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
371,Mascotas,Gato,Alimentación húmeda,372
372,Mascotas,Gato,Alimentación seca,373
373,Mascotas,Gato,Aseo y cuidado,374
374,Mascotas,Gato,Snacks,375
375,Mascotas,Perro,Alimentación húmeda,376
376,Mascotas,Perro,Alimentación seca,377
377,Mascotas,Perro,Aseo y cuidado,378
378,Mascotas,Perro,Snacks,379
379,Mascotas,Otros,Pájaro,380
380,Mascotas,Otros,Otros,381


In [5066]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'mascotas']

Unnamed: 0,category,subcategory,subsubcategory
54,mascotas,gatos,
90,mascotas,gatos,accesorios_e_higiene
98,mascotas,gato,
187,mascotas,peces_y_tortugas,tortugas
230,mascotas,perro,
275,mascotas,perros,
338,mascotas,gatos,premios_y_snacks
465,mascotas,perros,collares_y_correas
529,mascotas,conejos_y_roedores,pienso_para_conejos_y_rodeores
531,mascotas,otros_animales,


In [5067]:
def clasificar_category_mascotas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['mascotas|perros', 'mascotas|perro']:
        if 'paté' in name or 'salsa' in name or 'gelatina' in name or 'salchicha' in name:
            return ('Mascotas', 'Perro', 'Alimentación húmeda')
        elif 'aritos' in name or 'comida' in name:
            return ('Mascotas', 'Perro', 'Alimentación seca')
        elif 'snack' in name or 'premio' in name:
            return ('Mascotas', 'Perro', 'Snacks')
        else:
            return ('Mascotas', 'Perro', 'Aseo y cuidado')

    if subcat == 'mascotas|perros|collares_y_correas':
            return ('Mascotas', 'Perro', 'Aseo y cuidado')

    if subcat in ['mascotas|gatos', 'mascotas|gato']:
        if 'paté' in name or 'salsa' in name or 'gelatina' in name or 'mousse' in name:
            return ('Mascotas', 'Gato', 'Alimentación húmeda')
        elif 'comida' in name:
            return ('Mascotas', 'Gato', 'Alimentación seca')
        elif 'snack' in name or 'snacks' in name:
            return ('Mascotas', 'Gato', 'Snacks')
        else:
            return ('Mascotas', 'Gato', 'Aseo y cuidado')

    if subcat == 'mascotas|gatos|premios_y_snacks':
            return ('Mascotas', 'Gato', 'Snacks')

    if subcat == 'mascotas|gatos|accesorios_e_higiene':
            return ('Mascotas', 'Gato', 'Aseo y cuidado')

    if subcat in ['mascotas|otros', 'mascotas|otros_animales']:
        if 'periquitos' in name or 'canarios' in name or 'ninfa' in name or 'loros' in name or 'cotorras' in name:
            return ('Mascotas', 'Otros', 'Pájaro')
        else:
            return ('Mascotas', 'Otros', 'Otros')

    if subcat in 'mascotas|pajaros|pienso_para_pajaros':
        return ('Mascotas', 'Otros', 'Pájaro')

    if subcat in ['mascotas|peces_y_tortugas|tortugas', 'mascotas|peces_y_tortugas|peces', 'mascotas|conejos_y_roedores|pienso_para_conejos_y_rodeores', 'mascotas|conejos_y_roedores|accesorios_e_higiene']:
        return ('Mascotas', 'Otros', 'Otros')

    return (pd.NA, pd.NA, pd.NA)

In [5068]:
datamarket_update('mascotas|perros', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 275 to 4908
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         30 non-null     int64  
 1   supermarket                30 non-null     object 
 2   brand_category             30 non-null     object 
 3   name                       30 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      30 non-null     float64
 8   reference_price            30 non-null     float64
 9   reference_unit             30 non-null     object 
 10  insert_date                30 non-null     object 
 11  price_corrected            30 non-null     bool   
 12  reference_price_corrected  30 non-null     bool   
 13  category_name              30 non-null     object 
 1

In [5069]:
datamarket_update('mascotas|perro', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 230 to 4916
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         23 non-null     int64  
 1   supermarket                23 non-null     object 
 2   brand_category             23 non-null     object 
 3   name                       23 non-null     object 
 4   description                22 non-null     object 
 5   trademark                  23 non-null     object 
 6   trademark_propietary_flag  23 non-null     object 
 7   price                      23 non-null     float64
 8   reference_price            23 non-null     float64
 9   reference_unit             23 non-null     object 
 10  insert_date                23 non-null     object 
 11  price_corrected            23 non-null     bool   
 12  reference_price_corrected  23 non-null     bool   
 13  category_name              23 non-null     object 
 1

In [5070]:
datamarket_update('mascotas|gatos', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, 54 to 4843
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         28 non-null     int64  
 1   supermarket                28 non-null     object 
 2   brand_category             28 non-null     object 
 3   name                       28 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      28 non-null     float64
 8   reference_price            28 non-null     float64
 9   reference_unit             28 non-null     object 
 10  insert_date                28 non-null     object 
 11  price_corrected            28 non-null     bool   
 12  reference_price_corrected  28 non-null     bool   
 13  category_name              28 non-null     object 
 14

In [5071]:
datamarket_update('mascotas|gato', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 98 to 4956
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 14

In [5072]:
datamarket_update('mascotas|perros|collares_y_correas', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 465 to 465
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5073]:
datamarket_update('mascotas|gatos|premios_y_snacks', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 338 to 338
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5074]:
datamarket_update('mascotas|gatos|accesorios_e_higiene', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 90 to 4223
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14 

In [5075]:
datamarket_update('mascotas|otros_animales', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 531 to 4898
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5076]:
datamarket_update('mascotas|otros', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 826 to 3158
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                3 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5077]:
datamarket_update('mascotas|pajaros|pienso_para_pajaros', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 2851 to 4585
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5078]:
datamarket_update('mascotas|peces_y_tortugas|tortugas', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 187 to 187
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5079]:
datamarket_update('mascotas|peces_y_tortugas|peces', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4517 to 4517
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5080]:
datamarket_update('mascotas|conejos_y_roedores|pienso_para_conejos_y_rodeores', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 529 to 4657
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5081]:
datamarket_update('mascotas|conejos_y_roedores|accesorios_e_higiene', clasificar_category_mascotas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 805 to 4699
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

# Procesamiento de la categoría "charcuteria_y_quesos"

In [5082]:
current_category = df_category[df_category["category_name"] == 'Charcutería y quesos']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
146,Charcutería y quesos,Aves y jamón cocido,Pavo y otros,147
147,Charcutería y quesos,Aves y jamón cocido,Jamón cocido,148
148,Charcutería y quesos,Bacón y salchichas,Bacón,149
149,Charcutería y quesos,Bacón y salchichas,Salchichas,150
150,Charcutería y quesos,Chopped y mortadela,Chopped,151
151,Charcutería y quesos,Chopped y mortadela,Mortadela,152
152,Charcutería y quesos,Embutido curado,Salchichón,153
153,Charcutería y quesos,Embutido curado,Chorizo,154
154,Charcutería y quesos,Embutido curado,Lomo y otros,155
155,Charcutería y quesos,Jamón serrano,Jamón serrano,156


In [5083]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'charcuteria_y_quesos']

Unnamed: 0,category,subcategory,subsubcategory
6,charcuteria_y_quesos,queso_lonchas_rallado_y_en_porciones,
94,charcuteria_y_quesos,queso_curado_semicurado_y_tierno,
138,charcuteria_y_quesos,aves_y_jamon_cocido,
165,charcuteria_y_quesos,embutido_curado,
262,charcuteria_y_quesos,queso_untable_y_fresco,
287,charcuteria_y_quesos,bacon_y_salchichas,
403,charcuteria_y_quesos,pate_y_sobrasada,
484,charcuteria_y_quesos,jamon_serrano,
1094,charcuteria_y_quesos,chopped_y_mortadela,


In [5084]:
def clasificar_category_charcuteria(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'charcuteria_y_quesos|queso_untable_y_fresco':
        if 'fresco' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso fresco')
        elif 'roquefort' in name or 'camembert' in name or 'cabra' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso roquefort, camembert y cabra')
        else:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso untable')

    if subcat == 'charcuteria_y_quesos|jamon_serrano':
        return ('Charcutería y quesos', 'Jamón serrano', 'Jamón serrano')

    if subcat == 'charcuteria_y_quesos|aves_y_jamon_cocido':
        if 'jamón' in name:
            return ('Charcutería y quesos', 'Aves y jamón cocido', 'Jamón cocido')
        else:
            return ('Charcutería y quesos', 'Aves y jamón cocido', 'Pavo y otros')

    if subcat == 'charcuteria_y_quesos|chopped_y_mortadela':
        if 'mortadela' in name or 'galantina' in name:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Mortadela')
        else:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Chopped')

    if subcat == 'charcuteria_y_quesos|bacon_y_salchichas':
       if 'bacón' in name or 'panceta' in name:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Bacón')
       else:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Salchichas')

    if subcat == 'charcuteria_y_quesos|embutido_curado':
        if 'salchichón' in name or 'longaniza' in name or 'pepperoni' in name or'salami' in name:
            return ('Charcutería y quesos', 'Embutido curado', 'Salchichón')
        elif 'chorizo' in name:
            return ('Charcutería y quesos', 'Embutido curado', 'Chorizo')
        else:
            return ('Charcutería y quesos', 'Embutido curado', 'Lomo y otros')

    if subcat == 'charcuteria_y_quesos|pate_y_sobrasada':
        if 'sobrasada' in name:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Sobrasada')
        else:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Paté')

    if subcat in ['charcuteria_y_quesos|queso_curado_semicurado_y_tierno','charcuteria_y_quesos|queso_lonchas_rallado_y_en_porciones']:
        if 'añejo' in name or 'curado' in name or 'viejo' in name or'grana padano' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso curado')
        elif 'semicurado' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso semicurado')
        else:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso tierno')

    if subcat in ['charcuteria_y_quesos|queso_curado_semicurado_y_tierno','charcuteria_y_quesos|queso_lonchas_rallado_y_en_porciones']:
        if 'roquefort' in name or 'camembert' in name or 'cabra' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso roquefort, camembert y cabra')

    if subcat == 'charcuteria_y_quesos|queso_lonchas_rallado_y_en_porciones':
        if 'lonchas' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso lonchas')
        elif 'rallado' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso rallado')
        else:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso en porciones')


    return (pd.NA, pd.NA, pd.NA)

In [5085]:
datamarket_update('charcuteria_y_quesos|queso_untable_y_fresco', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 262 to 4709
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         26 non-null     int64  
 1   supermarket                26 non-null     object 
 2   brand_category             26 non-null     object 
 3   name                       26 non-null     object 
 4   description                26 non-null     object 
 5   trademark                  26 non-null     object 
 6   trademark_propietary_flag  26 non-null     object 
 7   price                      26 non-null     float64
 8   reference_price            26 non-null     float64
 9   reference_unit             26 non-null     object 
 10  insert_date                26 non-null     object 
 11  price_corrected            26 non-null     bool   
 12  reference_price_corrected  26 non-null     bool   
 13  category_name              26 non-null     object 
 1

In [5086]:
datamarket_update('charcuteria_y_quesos|queso_lonchas_rallado_y_en_porciones', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 6 to 4347
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 14 

In [5087]:
datamarket_update('charcuteria_y_quesos|queso_curado_semicurado_y_tierno', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 94 to 4291
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         24 non-null     int64  
 1   supermarket                24 non-null     object 
 2   brand_category             24 non-null     object 
 3   name                       24 non-null     object 
 4   description                24 non-null     object 
 5   trademark                  24 non-null     object 
 6   trademark_propietary_flag  24 non-null     object 
 7   price                      24 non-null     float64
 8   reference_price            24 non-null     float64
 9   reference_unit             24 non-null     object 
 10  insert_date                24 non-null     object 
 11  price_corrected            24 non-null     bool   
 12  reference_price_corrected  24 non-null     bool   
 13  category_name              24 non-null     object 
 14

In [5088]:
datamarket_update('charcuteria_y_quesos|pate_y_sobrasada', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 403 to 4327
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [5089]:
datamarket_update('charcuteria_y_quesos|embutido_curado', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 165 to 3806
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 1

In [5090]:
datamarket_update('charcuteria_y_quesos|bacon_y_salchichas', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 287 to 4807
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [5091]:
datamarket_update('charcuteria_y_quesos|chopped_y_mortadela', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1094 to 2787
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                3 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5092]:
datamarket_update('charcuteria_y_quesos|aves_y_jamon_cocido', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 138 to 4900
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                19 non-null     object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

In [5093]:
datamarket_update('charcuteria_y_quesos|jamon_serrano', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 484 to 4340
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

# Procesamiento de la categoría "conservas_caldos_y_cremas"

In [5094]:
current_category = df_category[df_category["category_name"] == 'Conservas, caldos y cremas']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
190,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Atún,191
191,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Bonito,192
192,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Caballa y melva,193
193,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Sardinas,194
194,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Otras conservas de pescado,195
195,"Conservas, caldos y cremas",Berberechos y mejillones,Berberechos y almejas,196
196,"Conservas, caldos y cremas",Berberechos y mejillones,Mejillones,197
197,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas verdura,198
198,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas fruta,199
199,"Conservas, caldos y cremas",Gazpacho y cremas,Gazpacho y salmorejo,200


In [5095]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'conservas_caldos_y_cremas']

Unnamed: 0,category,subcategory,subsubcategory
2,conservas_caldos_y_cremas,berberechos_y_mejillones,
205,conservas_caldos_y_cremas,conservas_de_verdura_y_frutas,
374,conservas_caldos_y_cremas,atun_y_otras_conservas_de_pescado,
955,conservas_caldos_y_cremas,gazpacho_y_cremas,
1230,conservas_caldos_y_cremas,sopa_y_caldo,
2111,conservas_caldos_y_cremas,tomate,


In [5096]:
def clasificar_category_conservas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'conservas_caldos_y_cremas|berberechos_y_mejillones':
        if 'mejillones' in name:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Mejillones')
        else:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Berberechos y almejas')

    if subcat == 'conservas_caldos_y_cremas|atun_y_otras_conservas_de_pescado':
        if 'atún' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Atún')
        elif 'bonito' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Bonito')
        elif 'caballa' in name or 'melva' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Caballa y melva')
        elif 'sardina' in name or 'sardinilla' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Sardinas')
        else:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Otras conservas de pescado')

    if subcat == 'conservas_caldos_y_cremas|conservas_de_verdura_y_frutas':
        if any(x in name for x in ['maíz', 'espárragos', 'champiñones', 'pimientos', 'guisantes', 'judías', 'alcachofa',
                                   'zanahoria', 'remolacha', 'acelgas', 'verduras', 'patata', 'ensalada', 'brotes', 'cebolla']):
            return ('Conservas, caldos y cremas', 'Conservas de verdura y frutas', 'Conservas verdura')
        elif 'gazpacho' in name or 'salmorejo' in name:
            return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Gazpacho y salmorejo')
        elif 'tomate' in name:
            return ('Conservas, caldos y cremas', 'Tomate', 'Tomate')
        else:
            return ('Conservas, caldos y cremas', 'Conservas de verdura y frutas', 'Conservas fruta')

    if subcat == 'conservas_caldos_y_cremas|tomate':
        return ('Conservas, caldos y cremas', 'Tomate', 'Tomate')

    if subcat == 'conservas_caldos_y_cremas|sopa_y_caldo':
        if 'en pastillas' in name:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo en pastillas')
        if 'caldo' in name:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo líquido')
        else:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Sopa')

    if subcat == 'conservas_caldos_y_cremas|gazpacho_y_cremas':
        if 'gazpacho' in name or 'salmorejo' in name:
            return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Gazpacho y salmorejo')
        else:
            return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Cremas y puré')

    return (pd.NA, pd.NA, pd.NA)

In [5097]:
datamarket_update('conservas_caldos_y_cremas|gazpacho_y_cremas', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 955 to 4570
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5098]:
datamarket_update('conservas_caldos_y_cremas|sopa_y_caldo', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1230 to 4984
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

In [5099]:
datamarket_update('conservas_caldos_y_cremas|tomate', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2111 to 2197
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5100]:
datamarket_update('conservas_caldos_y_cremas|conservas_de_verdura_y_frutas', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 205 to 4959
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                31 non-null     object 
 5   trademark                  31 non-null     object 
 6   trademark_propietary_flag  31 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 1

In [5101]:
datamarket_update('conservas_caldos_y_cremas|atun_y_otras_conservas_de_pescado', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 374 to 4798
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                15 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

In [5102]:
datamarket_update('conservas_caldos_y_cremas|berberechos_y_mejillones', clasificar_category_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 2 to 4961
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14  

# Procesamiento de la categoría "huevos_leche_y_mantequilla"

In [5103]:
current_category = df_category[df_category["category_name"] == 'Huevos, leche y mantequilla']
current_category

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
278,"Huevos, leche y mantequilla",Huevos,Huevos,279
279,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche semidesnatada,280
280,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche desnatada,281
281,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche entera,282
282,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Bebidas vegetales,283
283,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Batidos,284
284,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche Infantil,285
285,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche condensada y otros,286
286,"Huevos, leche y mantequilla",Mantequilla y margarina,Mantequilla,287
287,"Huevos, leche y mantequilla",Mantequilla y margarina,Margarina,288


In [5104]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'huevos_leche_y_mantequilla']

Unnamed: 0,category,subcategory,subsubcategory
0,huevos_leche_y_mantequilla,mantequilla_y_margarina,
69,huevos_leche_y_mantequilla,leche_y_bebidas_vegetales,
2561,huevos_leche_y_mantequilla,huevos,


In [5105]:
def clasificar_category_leche(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'huevos_leche_y_mantequilla|huevos':
        return ('Huevos, leche y mantequilla', 'Huevos', 'Huevos')

    if subcat == 'Nata':
        return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Nata')

    if subcat == 'huevos_leche_y_mantequilla|leche_y_bebidas_vegetales':
        if 'semidesnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche semidesnatada')
        elif 'desnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche desnatada')
        elif 'infantil' in name or 'preparado lácteo' in name or 'bebida láctea' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche Infantil')
        elif 'entera' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche entera')
        elif 'nata' in name:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Nata')
        elif 'batido' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Batidos')
        elif any(x in name for x in ['almendra', 'coco', 'arroz', 'soja', 'chufa', 'bebida', 'avellana', 'nueses', 'vegetal']):
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Bebidas vegetales')
        else:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche condensada y otros')

    if subcat == 'huevos_leche_y_mantequilla|mantequilla_y_margarina':
        if 'mantequilla' in name:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Mantequilla')
        else:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Margarina')

    return (pd.NA, pd.NA, pd.NA)

In [5106]:
datamarket_update('huevos_leche_y_mantequilla|mantequilla_y_margarina', clasificar_category_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 0 to 3715
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14  

In [5107]:
datamarket_update('huevos_leche_y_mantequilla|leche_y_bebidas_vegetales', clasificar_category_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 69 to 4876
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         38 non-null     int64  
 1   supermarket                38 non-null     object 
 2   brand_category             38 non-null     object 
 3   name                       38 non-null     object 
 4   description                38 non-null     object 
 5   trademark                  38 non-null     object 
 6   trademark_propietary_flag  38 non-null     object 
 7   price                      38 non-null     float64
 8   reference_price            38 non-null     float64
 9   reference_unit             38 non-null     object 
 10  insert_date                38 non-null     object 
 11  price_corrected            38 non-null     bool   
 12  reference_price_corrected  38 non-null     bool   
 13  category_name              38 non-null     object 
 14

In [5108]:
datamarket_update('huevos_leche_y_mantequilla|huevos', clasificar_category_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 2561 to 4941
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

# Procesamiento de la categoría "perfumeria_e_higiene"

In [5109]:
current_category = df_category[df_category["category_name"] == 'Cuidado facial y corporal']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
220,Cuidado facial y corporal,Afeitado y cuidado para hombre,Espuma de afeitar,221
221,Cuidado facial y corporal,Afeitado y cuidado para hombre,After shave,222
222,Cuidado facial y corporal,Afeitado y cuidado para hombre,Maquinillas de afeitar,223
223,Cuidado facial y corporal,Afeitado y cuidado para hombre,Recambios maquinilla de afeitar,224
224,Cuidado facial y corporal,Afeitado y cuidado para hombre,Crema y gel de cara,225
225,Cuidado facial y corporal,Cuidado corporal,Crema y aceite para el cuerpo,226
226,Cuidado facial y corporal,Cuidado corporal,Crema manos,227
227,Cuidado facial y corporal,Cuidado corporal,Crema pies,228
228,Cuidado facial y corporal,Cuidado corporal,Toallitas,229
229,Cuidado facial y corporal,Cuidado e higiene facial,Limpieza de cara,230


In [5110]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'perfumeria_e_higiene']

Unnamed: 0,category,subcategory,subsubcategory
217,perfumeria_e_higiene,bano_e_higiene_corporal,colonias
440,perfumeria_e_higiene,bano_e_higiene_corporal,geles_de_bano
551,perfumeria_e_higiene,bano_e_higiene_corporal,jabon_de_manos
686,perfumeria_e_higiene,bano_e_higiene_corporal,desodorantes
716,perfumeria_e_higiene,bano_e_higiene_corporal,esponjas_manoplas_y_cepillos_de_bano
1464,perfumeria_e_higiene,bano_e_higiene_corporal,ojos_y_oreja


In [5111]:
def clasificar_category_higiene(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|jabon_de_manos':
        return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Jabón de manos')

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|ojos_y_oreja':
        return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Contorno de ojos')

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|geles_de_bano':
        return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Gel')

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|esponjas_manoplas_y_cepillos_de_bano':
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Esponjas')

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|desodorantes':
        if 'roll' in name or 'stick'in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante roll on y stick')
        elif 'spray' in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante Spray')
        else:
            return ('Cuidado facial y corporal', 'Desodorante', 'Otros desodorantes')

    if subcat == 'perfumeria_e_higiene|bano_e_higiene_corporal|colonias':
        if 'eau de toilette mujer' in name or 'colonia mujer' in name or 'eau de parfum mujer' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia mujer')
        elif 'lote mujer' in name or 'neceser' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes mujer')
        elif 'eau de toilette hombre' in name or 'colonia hombre' in name or 'eau de parfum hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia hombre')
        elif 'lote hombre' in name or 'neceser hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes hombres')
        else:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Colonia infantil')


    return (pd.NA, pd.NA, pd.NA)

In [5112]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|colonias', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 217 to 1583
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5113]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|desodorantes', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 686 to 2861
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5114]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|esponjas_manoplas_y_cepillos_de_bano', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 716 to 2125
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5115]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|geles_de_bano', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 440 to 3905
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5116]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|ojos_y_oreja', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1464 to 1464
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5117]:
datamarket_update('perfumeria_e_higiene|bano_e_higiene_corporal|jabon_de_manos', clasificar_category_higiene)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 551 to 4888
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

# Procesamiento de la categoría "cuidado_personal"

In [5118]:
current_category = df_category[df_category["category_name"] == 'Cuidado facial y corporal']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
220,Cuidado facial y corporal,Afeitado y cuidado para hombre,Espuma de afeitar,221
221,Cuidado facial y corporal,Afeitado y cuidado para hombre,After shave,222
222,Cuidado facial y corporal,Afeitado y cuidado para hombre,Maquinillas de afeitar,223
223,Cuidado facial y corporal,Afeitado y cuidado para hombre,Recambios maquinilla de afeitar,224
224,Cuidado facial y corporal,Afeitado y cuidado para hombre,Crema y gel de cara,225
225,Cuidado facial y corporal,Cuidado corporal,Crema y aceite para el cuerpo,226
226,Cuidado facial y corporal,Cuidado corporal,Crema manos,227
227,Cuidado facial y corporal,Cuidado corporal,Crema pies,228
228,Cuidado facial y corporal,Cuidado corporal,Toallitas,229
229,Cuidado facial y corporal,Cuidado e higiene facial,Limpieza de cara,230


In [5119]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cuidado_personal']

Unnamed: 0,category,subcategory,subsubcategory
20,cuidado_personal,cuidado_bucal,
27,cuidado_personal,bano_e_higiene_personal,geles_de_duchas_y_esponjas
45,cuidado_personal,cuidado_corporal,cuidado_de_manos_y_pies
46,cuidado_personal,cuidado_corporal,limpieza_facial
100,cuidado_personal,cuidado_intimo,
118,cuidado_personal,cuidado_corporal,crema_facial
240,cuidado_personal,afeitado,
274,cuidado_personal,bano_e_higiene_personal,cuidado_del_cabello
302,cuidado_personal,bano_e_higiene_personal,desodorantes
313,cuidado_personal,bano_e_higiene_personal,champus


In [5120]:
df_datamarket[df_datamarket['brand_category'] == 'cuidado_personal|cuidado_corporal|cosmetica']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
2002,25864365,dia.es,cuidado_personal|cuidado_corporal|cosmetica,CUTEX quitaesmalte extrahidratante botella 200 ml,,cutex,False,3.31,16.55,l,2023-03-15,False,False,,,
3691,25864368,dia.es,cuidado_personal|cuidado_corporal|cosmetica,CUTEX quitaesmalte ultra efectivo bote 200 ml,,cutex,False,3.31,16.55,l,2023-03-15,False,False,,,


In [5121]:
def clasificar_category_cuidado_corporal(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cuidado_personal|cuidado_corporal|crema_solar':
        return ('Cuidado facial y corporal', 'Protector solar y aftersun', 'Protector solar y aftersun')

    if subcat == 'cuidado_personal|cuidado_corporal|cosmetica':
        return ('Cuidado facial y corporal', 'Manicura y pedicura', 'Cuidado de uñas y complementos')

    if subcat == 'cuidado_personal|bano_e_higiene_personal|jabon':
        return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Jabón de manos')

    if subcat == 'cuidado_personal|bano_e_higiene_personal|geles_de_duchas_y_esponjas':
        if 'gel' in name or 'gel-champú':
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Gel')
        else:
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Esponjas')

    if subcat == 'cuidado_personal|afeitado':
        if 'gel de afeitar' in name or 'espuma' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Espuma de afeitar')
        elif 'after shave' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'After shave')
        elif 'maquinilla' in name or 'maquinillas' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Maquinillas de afeitar')
        elif 'recambios' in name or 'recambio' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Recambios maquinilla de afeitar')
        else:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Crema y gel de cara')

    if subcat == 'cuidado_personal|cuidado_corporal|limpieza_facial':
        return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Limpieza de cara')

    if subcat == 'cuidado_personal|cuidado_corporal|crema_facial':
        if 'crema' in name or 'hidratante' in name or 'gel' in name:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Crema de cara')
        elif 'de ojos' in name or 'para ojos' in name:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Contorno de ojos')
        else:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Sérum y ampollas')

    if subcat == 'cuidado_personal|cuidado_corporal|hidratacion_corporal':
        if 'corporal' in name or 'anticelulítico' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema y aceite para el cuerpo')
        elif 'manos' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema manos')
        elif 'pies' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema pies')
        else:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Toallitas')

    if subcat == 'cuidado_personal|cuidado_corporal|cuidado_de_manos_y_pies':
        if 'manos' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema manos')
        else:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema pies')

    if subcat == 'cuidado_personal|cuidado_bucal':
        if 'pasta' in name or 'dentífrico' in name:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Pasta de dientes')
        elif 'sepillo' in name:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Cepillo de dientes')
        else:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Colutorio e hilo dental')

    if subcat == 'cuidado_personal|bano_e_higiene_personal|desodorantes':
        if 'roll' in name or 'stick'in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante roll on y stick')
        elif 'spray' in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante Spray')
        else:
            return ('Cuidado facial y corporal', 'Desodorante', 'Otros desodorantes')

    if subcat == 'cuidado_personal|cuidado_intimo':
        if 'compresas' in name or 'compresa' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Compresas')
        elif 'protegeslips' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Protegeslips')
        elif 'tampones' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Tampones')
        elif 'pañales' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Pañales para adulto')
        else:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Toallitas y gel')

    if subcat == 'cuidado_personal|bano_e_higiene_personal|colonias':
        if 'eau de toilette mujer' in name or 'colonia mujer' in name or 'eau de parfum mujer' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia mujer')
        elif 'lote mujer' in name or 'neceser' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes mujer')
        elif 'eau de toilette hombre' in name or 'colonia hombre' in name or 'eau de parfum hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia hombre')
        elif 'lote hombre' in name or 'neceser hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes hombres')
        else:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Colonia infantil')

    if subcat == 'cuidado_personal|depilacion':
        if 'bandas' in name or 'crema' in name or 'pinza' in name or 'cera' in name or 'gel' in name:
            return ('Cuidado facial y corporal', 'Depilación', 'Bandas, cera y crema')
        else:
            return ('Cuidado facial y corporal', 'Depilación', 'Cuchilla')

    return (pd.NA, pd.NA, pd.NA)

In [5122]:
datamarket_update('cuidado_personal|depilacion', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 535 to 4664
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5123]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|colonias', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 329 to 4640
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         14 non-null     int64  
 1   supermarket                14 non-null     object 
 2   brand_category             14 non-null     object 
 3   name                       14 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  14 non-null     object 
 6   trademark_propietary_flag  14 non-null     object 
 7   price                      14 non-null     float64
 8   reference_price            14 non-null     float64
 9   reference_unit             14 non-null     object 
 10  insert_date                14 non-null     object 
 11  price_corrected            14 non-null     bool   
 12  reference_price_corrected  14 non-null     bool   
 13  category_name              14 non-null     object 
 1

In [5124]:
datamarket_update('cuidado_personal|cuidado_intimo', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 29 entries, 100 to 4939
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         29 non-null     int64  
 1   supermarket                29 non-null     object 
 2   brand_category             29 non-null     object 
 3   name                       29 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      29 non-null     float64
 8   reference_price            29 non-null     float64
 9   reference_unit             29 non-null     object 
 10  insert_date                29 non-null     object 
 11  price_corrected            29 non-null     bool   
 12  reference_price_corrected  29 non-null     bool   
 13  category_name              29 non-null     object 
 1

In [5125]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|desodorantes', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 30 entries, 302 to 4972
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         30 non-null     int64  
 1   supermarket                30 non-null     object 
 2   brand_category             30 non-null     object 
 3   name                       30 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      30 non-null     float64
 8   reference_price            30 non-null     float64
 9   reference_unit             30 non-null     object 
 10  insert_date                30 non-null     object 
 11  price_corrected            30 non-null     bool   
 12  reference_price_corrected  30 non-null     bool   
 13  category_name              30 non-null     object 
 1

In [5126]:
datamarket_update('cuidado_personal|cuidado_bucal', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 20 to 4822
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         38 non-null     int64  
 1   supermarket                38 non-null     object 
 2   brand_category             38 non-null     object 
 3   name                       38 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  38 non-null     object 
 6   trademark_propietary_flag  38 non-null     object 
 7   price                      38 non-null     float64
 8   reference_price            38 non-null     float64
 9   reference_unit             38 non-null     object 
 10  insert_date                38 non-null     object 
 11  price_corrected            38 non-null     bool   
 12  reference_price_corrected  38 non-null     bool   
 13  category_name              38 non-null     object 
 14

In [5127]:
datamarket_update('cuidado_personal|cuidado_corporal|cuidado_de_manos_y_pies', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 45 to 4970
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [5128]:
datamarket_update('cuidado_personal|cuidado_corporal|hidratacion_corporal', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 323 to 4788
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         24 non-null     int64  
 1   supermarket                24 non-null     object 
 2   brand_category             24 non-null     object 
 3   name                       24 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  24 non-null     object 
 6   trademark_propietary_flag  24 non-null     object 
 7   price                      24 non-null     float64
 8   reference_price            24 non-null     float64
 9   reference_unit             24 non-null     object 
 10  insert_date                24 non-null     object 
 11  price_corrected            24 non-null     bool   
 12  reference_price_corrected  24 non-null     bool   
 13  category_name              24 non-null     object 
 1

In [5129]:
datamarket_update('cuidado_personal|cuidado_corporal|crema_facial', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 22 entries, 118 to 4598
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         22 non-null     int64  
 1   supermarket                22 non-null     object 
 2   brand_category             22 non-null     object 
 3   name                       22 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      22 non-null     float64
 8   reference_price            22 non-null     float64
 9   reference_unit             22 non-null     object 
 10  insert_date                22 non-null     object 
 11  price_corrected            22 non-null     bool   
 12  reference_price_corrected  22 non-null     bool   
 13  category_name              22 non-null     object 
 1

In [5130]:
datamarket_update('cuidado_personal|cuidado_corporal|limpieza_facial', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 46 to 4695
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 14

In [5131]:
datamarket_update('cuidado_personal|afeitado', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 240 to 4828
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5132]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|geles_de_duchas_y_esponjas', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 34 entries, 27 to 4915
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         34 non-null     int64  
 1   supermarket                34 non-null     object 
 2   brand_category             34 non-null     object 
 3   name                       34 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  34 non-null     object 
 6   trademark_propietary_flag  34 non-null     object 
 7   price                      34 non-null     float64
 8   reference_price            34 non-null     float64
 9   reference_unit             34 non-null     object 
 10  insert_date                34 non-null     object 
 11  price_corrected            34 non-null     bool   
 12  reference_price_corrected  34 non-null     bool   
 13  category_name              34 non-null     object 
 14

In [5133]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|jabon', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 1915 to 3491
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 1

In [5134]:
datamarket_update('cuidado_personal|cuidado_corporal|cosmetica', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2002 to 3691
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5135]:
datamarket_update('cuidado_personal|cuidado_corporal|crema_solar', clasificar_category_cuidado_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4871 to 4871
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5136]:
current_category = df_category[df_category["category_name"] == 'Cuidado del cabello']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
205,Cuidado del cabello,Acondicionador y mascarilla,Acondicionador,206
206,Cuidado del cabello,Acondicionador y mascarilla,Mascarilla,207
207,Cuidado del cabello,Acondicionador y mascarilla,Sérum y otros,208
208,Cuidado del cabello,Champú,Champú,209
209,Cuidado del cabello,Champú,Champú anticaspa,210
210,Cuidado del cabello,Champú,Champú infantil,211
211,Cuidado del cabello,Coloración cabello,Coloración color moreno,212
212,Cuidado del cabello,Coloración cabello,Coloración color castaño,213
213,Cuidado del cabello,Coloración cabello,Coloración color caoba,214
214,Cuidado del cabello,Coloración cabello,Coloración color rubio,215


In [5137]:
current_category = df_category[df_category["category_name"] == 'Fitoterapia y parafarmacia']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
257,Fitoterapia y parafarmacia,Fitoterapia,Fitoterapia,258
258,Fitoterapia y parafarmacia,Parafarmacia,Botiquín,259
259,Fitoterapia y parafarmacia,Parafarmacia,Preservativos,260


In [5138]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cuidado_personal']

Unnamed: 0,category,subcategory,subsubcategory
20,cuidado_personal,cuidado_bucal,
27,cuidado_personal,bano_e_higiene_personal,geles_de_duchas_y_esponjas
45,cuidado_personal,cuidado_corporal,cuidado_de_manos_y_pies
46,cuidado_personal,cuidado_corporal,limpieza_facial
100,cuidado_personal,cuidado_intimo,
118,cuidado_personal,cuidado_corporal,crema_facial
240,cuidado_personal,afeitado,
274,cuidado_personal,bano_e_higiene_personal,cuidado_del_cabello
302,cuidado_personal,bano_e_higiene_personal,desodorantes
313,cuidado_personal,bano_e_higiene_personal,champus


In [5139]:
def clasificar_category_cabello(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cuidado_personal|bano_e_higiene_personal|champus':
        if 'infantil' in name or 'bebé':
            return ('Cuidado del cabello', 'Champú', 'Champú infantil')
        elif 'anticaspa' in name:
            return ('Cuidado del cabello', 'Champú', 'Champú anticaspa')
        else:
            return ('Cuidado del cabello', 'Champú', 'Champú')

    if subcat == 'cuidado_personal|bano_e_higiene_personal|cuidado_del_cabello':
        if 'acondicionador' in name:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Acondicionador')
        elif 'mascarilla' in name:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Mascarilla')
        elif 'espuma' in name or 'laca' in name:
            return ('Cuidado del cabello', 'Fijación cabello', 'Espuma y laca')
        elif 'tintes' in name:
            if 'moreno' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color moreno')
            elif 'castaño' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color castaño')
            elif 'permanente' in name or 'rubio' in name or 'aclarante' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color rubio')
            elif 'hombre' in name or 'hombres' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Coloración hombre')
            elif 'caobae' in name or 'rojo' in name or 'cobre' in name or 'violín' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color caoba')
            elif 'retoca raíces' in name:
                return ('Cuidado del cabello', 'Coloración cabello', 'Retoca raíces')
            else:
                return ('Cuidado del cabello', 'Coloración cabello', 'Accesorios')
        elif 'gomina' in name or 'cera' in name:
            return ('Cuidado del cabello', 'Fijación cabello', 'Gomina y cera')
        else:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Sérum y otros')

    if subcat == 'cuidado_personal|salud_sexual':
       return ('Fitoterapia y parafarmacia', 'Parafarmacia', 'Preservativos')

    if subcat == 'cuidado_personal|botiquin':
        return ('Fitoterapia y parafarmacia', 'Parafarmacia', 'Botiquín')

    return (pd.NA, pd.NA, pd.NA)

In [5140]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|champus', clasificar_category_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 313 to 4979
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         24 non-null     int64  
 1   supermarket                24 non-null     object 
 2   brand_category             24 non-null     object 
 3   name                       24 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  24 non-null     object 
 6   trademark_propietary_flag  24 non-null     object 
 7   price                      24 non-null     float64
 8   reference_price            24 non-null     float64
 9   reference_unit             24 non-null     object 
 10  insert_date                24 non-null     object 
 11  price_corrected            24 non-null     bool   
 12  reference_price_corrected  24 non-null     bool   
 13  category_name              24 non-null     object 
 1

In [5141]:
datamarket_update('cuidado_personal|botiquin', clasificar_category_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 795 to 4952
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5142]:
datamarket_update('cuidado_personal|salud_sexual', clasificar_category_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2228 to 3477
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5143]:
datamarket_update('cuidado_personal|bano_e_higiene_personal|cuidado_del_cabello', clasificar_category_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 43 entries, 274 to 4985
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         43 non-null     int64  
 1   supermarket                43 non-null     object 
 2   brand_category             43 non-null     object 
 3   name                       43 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  43 non-null     object 
 6   trademark_propietary_flag  43 non-null     object 
 7   price                      43 non-null     float64
 8   reference_price            43 non-null     float64
 9   reference_unit             43 non-null     object 
 10  insert_date                43 non-null     object 
 11  price_corrected            43 non-null     bool   
 12  reference_price_corrected  43 non-null     bool   
 13  category_name              43 non-null     object 
 1

# Procesamiento de la categoría "fitoterapia_y_parafarmacia" y "parafarmacia"

In [5144]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'parafarmacia']

Unnamed: 0,category,subcategory,subsubcategory
17,parafarmacia,higiene_bucal,cuidado_y_fijacion_protesis_dentales
156,parafarmacia,bebe,champu
170,parafarmacia,bebe,papillas_y_galletas
237,parafarmacia,bebe,puericultura
288,parafarmacia,botiquin,gafas_presbicia
392,parafarmacia,botiquin,alivio_del_dolor
458,parafarmacia,bebe,anti_irritacion
609,parafarmacia,botiquin,tos_y_garganta
680,parafarmacia,higiene_bucal,ortodoncia
699,parafarmacia,higiene_bucal,pasta_de_dientes


In [5145]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'fitoterapia_y_parafarmacia']

Unnamed: 0,category,subcategory,subsubcategory
215,fitoterapia_y_parafarmacia,parafarmacia,
277,fitoterapia_y_parafarmacia,fitoterapia,


In [5146]:
current_category = df_category[df_category["category_name"] == 'Fitoterapia y parafarmacia']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
257,Fitoterapia y parafarmacia,Fitoterapia,Fitoterapia,258
258,Fitoterapia y parafarmacia,Parafarmacia,Botiquín,259
259,Fitoterapia y parafarmacia,Parafarmacia,Preservativos,260


In [5147]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'bebe']

Unnamed: 0,category,subcategory,subsubcategory
156,parafarmacia,bebe,champu
170,parafarmacia,bebe,papillas_y_galletas
237,parafarmacia,bebe,puericultura
458,parafarmacia,bebe,anti_irritacion
913,parafarmacia,bebe,accesorios_bano
1208,parafarmacia,bebe,complementos_alimenticios_e_infusiones_para_bebe
1493,parafarmacia,bebe,potitos
2260,parafarmacia,bebe,embarazo_y_lactancia
3425,parafarmacia,bebe,hidratantes_y_aceites_corporales
4940,parafarmacia,bebe,toallitas_bebe


In [5148]:
current_category = df_category[df_category["category_name"] == 'Bebé']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
65,Bebé,Alimentación infantil,Tarritos salados,66
66,Bebé,Alimentación infantil,Tarritos de fruta,67
67,Bebé,Alimentación infantil,Yogures y postres,68
68,Bebé,Alimentación infantil,Leche,69
69,Bebé,Alimentación infantil,Leche en polvo,70
70,Bebé,Alimentación infantil,Papillas,71
71,Bebé,Biberón y chupete,Biberón,72
72,Bebé,Biberón y chupete,Chupete,73
73,Bebé,Higiene y cuidado,Champú y jabón,74
74,Bebé,Higiene y cuidado,Aceite y crema,75


In [5149]:
df_mercadona[df_mercadona['name'].str.contains('Absorbente', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
3620,Papel hogar Compacto Absorbente Bosque Verde,Paquete 3.0 ud,2.35,0.784,ud,Rollo cocina,Limpieza y hogar,Papel higiénico y celulosa,False,325
3622,Papel cocina Gigante Absorbente Bosque Verde,Paquete 1.0 ud,2.95,2.95,ud,Rollo cocina,Limpieza y hogar,Papel higiénico y celulosa,False,325
3649,Fregona Microfibra Absorbente Bosque Verde,Paquete 2.0 ud,2.8,1.4,ud,"Fregonas, escobas y mopas",Limpieza y hogar,Utensilios de limpieza y calzado,False,331
3650,Fregona Microfibra Absorbente Bosque Verde,Paquete 1.0 ud,1.5,1.5,ud,"Fregonas, escobas y mopas",Limpieza y hogar,Utensilios de limpieza y calzado,False,331
3652,Fregona Resistente y Absorbente Bosque Verde,Paquete 2.0 ud,1.75,0.875,ud,"Fregonas, escobas y mopas",Limpieza y hogar,Utensilios de limpieza y calzado,False,331


In [5150]:
def clasificar_category_parafarmacia(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'fitoterapia_y_parafarmacia|fitoterapia':
        return ('Fitoterapia y parafarmacia', 'Fitoterapia', 'Fitoterapia')

    if subcat == 'fitoterapia_y_parafarmacia|parafarmacia':
        if 'preservativo' in name:
            return ('Fitoterapia y parafarmacia', 'Parafarmacia', 'Preservativos')
        else:
            return ('Fitoterapia y parafarmacia', 'Parafarmacia', 'Botiquín')

    if subcat in ['parafarmacia|botiquin|geles_hidroalcoholicos', 'parafarmacia|botiquin|antimosquitos',
                  'parafarmacia|botiquin|antisepticos_y_talcos', 'parafarmacia|botiquin|higiene_y_tiras_nasales',
                  'parafarmacia|botiquin|lentillas_y_sueros', 'parafarmacia|botiquin|apositos_y_gasas',
                  'parafarmacia|botiquin|mascarillas', 'parafarmacia|botiquin|oido_y_protectores',
                  'parafarmacia|botiquin|tos_y_garganta', 'parafarmacia|botiquin|alivio_del_dolor',
                  'parafarmacia|higiene_bucal|cuidado_y_fijacion_protesis_dentales', 'parafarmacia|botiquin|gafas_presbicia',
                  'parafarmacia|higiene_bucal|ortodoncia', 'parafarmacia|botiquin|higiene_y_tiras_nasales']:
            return ('Fitoterapia y parafarmacia', 'Parafarmacia', 'Botiquín')


    if subcat == 'parafarmacia|bebe|embarazo_y_lactancia':
	      return ('Cuidado facial y corporal',	'Higiene íntima',	'Compresas')

    if subcat == 'parafarmacia|higiene_bucal|pasta_de_dientes':
        return ('Cuidado facial y corporal', 'Higiene bucal', 'Pasta de dientes')

    if subcat == 'parafarmacia|higiene_bucal|cepillos_y_seda':
        return ('Cuidado facial y corporal', 'Higiene bucal', 'Cepillo de dientes')

    if subcat == 'parafarmacia|higiene_bucal|colutorio':
        return ('Cuidado facial y corporal', 'Higiene bucal', 'Colutorio e hilo dental')

    if subcat == 'parafarmacia|bebe|papillas_y_galletas':
        if 'papilla' in name:
            return ('Bebé', 'Alimentación infantil', 'Papillas')
        else:
            return ('Bebé', 'Alimentación infantil', 'Yogures y postres')


    if subcat == 'parafarmacia|bebe|potitos':
        if any(x in name for x in ['fruta', 'frutas', 'plátano', 'fresa', 'pera', 'manzana', 'mandarina', 'arándanos']):
            return ('Bebé', 'Alimentación infantil', 'Tarritos de fruta')
        else:
            return ('Bebé', 'Alimentación infantil', 'Tarritos salados')

    if subcat == 'parafarmacia|bebe|toallitas_bebe':
        return ('Bebé', 'Toallitas y pañales', 'Toallitas')

    if subcat == 'parafarmacia|bebe|hidratantes_y_aceites_corporales':
        return ('Bebé', 'Higiene y cuidado', 'Aceite y crema')

    if subcat == 'parafarmacia|bebe|champu':
        return ('Bebé', 'Higiene y cuidado', 'Champú y jabón')

    if subcat == 'parafarmacia|bebe|puericultura':
        if 'biberón' in name:
            return ('Bebé', 'Biberón y chupete', 'Biberón')
        elif 'chupete' in name:
            return ('Bebé', 'Biberón y chupete', 'Chupete')
        else:
            return ('Bebé', 'Higiene y cuidado', 'Accesorios')

    if subcat == 'parafarmacia|bebe|anti_irritacion':
        return ('Bebé', 'Higiene y cuidado', 'Aceite y crema')


    if subcat == 'parafarmacia|bebe|accesorios_bano':
        return ('Bebé', 'Higiene y cuidado', 'Accesorios')


    return (pd.NA, pd.NA, pd.NA)

In [5151]:
datamarket_update('parafarmacia|botiquin|higiene_y_tiras_nasales', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3445 to 4524
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5152]:
datamarket_update('fitoterapia_y_parafarmacia|parafarmacia', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 215 to 4913
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [5153]:
datamarket_update('fitoterapia_y_parafarmacia|fitoterapia', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 277 to 4965
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [5154]:
datamarket_update('parafarmacia|botiquin|geles_hidroalcoholicos', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4931 to 4931
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5155]:
datamarket_update('parafarmacia|botiquin|antimosquitos', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4349 to 4349
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5156]:
datamarket_update('parafarmacia|botiquin|antisepticos_y_talcos', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3884 to 4487
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5157]:
datamarket_update('parafarmacia|botiquin|lentillas_y_sueros', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2553 to 4408
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5158]:
datamarket_update('parafarmacia|botiquin|apositos_y_gasas', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1870 to 4562
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5159]:
datamarket_update('parafarmacia|botiquin|mascarillas', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1146 to 3329
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5160]:
datamarket_update('parafarmacia|botiquin|oido_y_protectores', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 818 to 1738
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5161]:
datamarket_update('parafarmacia|botiquin|tos_y_garganta', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 609 to 4009
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5162]:
datamarket_update('parafarmacia|botiquin|alivio_del_dolor', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 392 to 827
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14 

In [5163]:
datamarket_update('parafarmacia|higiene_bucal|cuidado_y_fijacion_protesis_dentales', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 17 to 3603
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14 

In [5164]:
datamarket_update('parafarmacia|botiquin|gafas_presbicia', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 288 to 3465
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5165]:

df_datamarket.loc[df_datamarket['brand_category'] == 'parafarmacia|bebe|complementos_alimenticios_e_infusiones_para_bebe', ['category_name', 'subcategory_name', 'subcategory_2_nivel_name']] = ['Fitoterapia y parafarmacia', 'Fitoterapia', 'Fitoterapia']


In [5166]:
df_datamarket[df_datamarket['brand_category'] == 'parafarmacia|bebe|complementos_alimenticios_e_infusiones_para_bebe']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
1208,25860001,carrefour.es,parafarmacia|bebe|complementos_alimenticios_e_infusiones_para_bebe,,,otras marcas,False,25.29,29.75,kg,2023-03-15,False,False,Fitoterapia y parafarmacia,Fitoterapia,Fitoterapia
2673,25860005,carrefour.es,parafarmacia|bebe|complementos_alimenticios_e_infusiones_para_bebe,Complemento alimenticio de chocolate Blenuten Colacao 800 g.,,otras marcas,False,23.1,28.87,kg,2023-03-15,False,False,Fitoterapia y parafarmacia,Fitoterapia,Fitoterapia
3374,25860003,carrefour.es,parafarmacia|bebe|complementos_alimenticios_e_infusiones_para_bebe,Infusión Infantil Blevit Digest 150 gr,,otras marcas,False,10.45,69.67,kg,2023-03-15,False,False,Fitoterapia y parafarmacia,Fitoterapia,Fitoterapia


In [5167]:
datamarket_update('parafarmacia|higiene_bucal|ortodoncia', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 680 to 3509
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5168]:
datamarket_update('parafarmacia|bebe|embarazo_y_lactancia', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2260 to 4274
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5169]:
datamarket_update('parafarmacia|higiene_bucal|pasta_de_dientes', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 699 to 4841
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5170]:
datamarket_update('parafarmacia|higiene_bucal|cepillos_y_seda', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2155 to 2155
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5171]:
datamarket_update('parafarmacia|higiene_bucal|colutorio', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3823 to 3823
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5172]:
datamarket_update('parafarmacia|bebe|papillas_y_galletas', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 170 to 4474
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5173]:
datamarket_update('parafarmacia|bebe|potitos', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1493 to 4021
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5174]:
datamarket_update('parafarmacia|bebe|toallitas_bebe', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4940 to 4940
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5175]:
datamarket_update('parafarmacia|bebe|hidratantes_y_aceites_corporales', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3425 to 4803
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5176]:
datamarket_update('parafarmacia|bebe|champu', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 156 to 3334
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5177]:
datamarket_update('parafarmacia|bebe|puericultura', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 237 to 2137
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5178]:
datamarket_update('parafarmacia|bebe|anti_irritacion', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 458 to 4683
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5179]:
datamarket_update('parafarmacia|bebe|accesorios_bano', clasificar_category_parafarmacia)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 913 to 4127
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

# Procesamiento de la categoría "cuidado_del_cabello"

In [5180]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cuidado_del_cabello']

Unnamed: 0,category,subcategory,subsubcategory
101,cuidado_del_cabello,coloracion_cabello,
452,cuidado_del_cabello,acondicionador_y_mascarilla,
610,cuidado_del_cabello,fijacion_cabello,
809,cuidado_del_cabello,champu,


In [5181]:
def clasificar_category_cuidado_cabello(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cuidado_del_cabello|champu':
        if 'infantil' in name or 'bebé':
            return ('Cuidado del cabello', 'Champú', 'Champú infantil')
        elif 'anticaspa' in name:
            return ('Cuidado del cabello', 'Champú', 'Champú anticaspa')
        else:
            return ('Cuidado del cabello', 'Champú', 'Champú')

    if subcat == 'cuidado_del_cabello|acondicionador_y_mascarilla':
        if 'acondicionador' in name:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Acondicionador')
        elif 'mascarilla' in name:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Mascarilla')
        else:
            return ('Cuidado del cabello', 'Acondicionador y mascarilla', 'Sérum y otros')

    if subcat == 'cuidado_del_cabello|fijacion_cabello':
        if 'espuma' in name or 'laca' in name:
            return ('Cuidado del cabello', 'Fijación cabello', 'Espuma y laca')
        else:
            return ('Cuidado del cabello', 'Fijación cabello', 'Gomina y cera')

    if subcat == 'cuidado_del_cabello|coloracion_cabello':
        if 'moreno' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color moreno')
        elif 'castaño' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color castaño')
        elif 'permanente' in name or 'rubio' in name or 'aclarante' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color rubio')
        elif 'hombre' in name or 'hombres' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Coloración hombre')
        elif 'caobae' in name or 'rojo' in name or 'cobre' in name or 'violín' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Coloración color caoba')
        elif 'retoca raíces' in name:
            return ('Cuidado del cabello', 'Coloración cabello', 'Retoca raíces')
        else:
            return ('Cuidado del cabello', 'Coloración cabello', 'Accesorios')


    return (pd.NA, pd.NA, pd.NA)

In [5182]:
datamarket_update('cuidado_del_cabello|coloracion_cabello', clasificar_category_cuidado_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 50 entries, 101 to 4973
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         50 non-null     int64  
 1   supermarket                50 non-null     object 
 2   brand_category             50 non-null     object 
 3   name                       50 non-null     object 
 4   description                50 non-null     object 
 5   trademark                  50 non-null     object 
 6   trademark_propietary_flag  50 non-null     object 
 7   price                      50 non-null     float64
 8   reference_price            50 non-null     float64
 9   reference_unit             50 non-null     object 
 10  insert_date                50 non-null     object 
 11  price_corrected            50 non-null     bool   
 12  reference_price_corrected  50 non-null     bool   
 13  category_name              50 non-null     object 
 1

In [5183]:
datamarket_update('cuidado_del_cabello|fijacion_cabello', clasificar_category_cuidado_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 610 to 4785
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

In [5184]:
datamarket_update('cuidado_del_cabello|acondicionador_y_mascarilla', clasificar_category_cuidado_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 452 to 4874
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5185]:
datamarket_update('cuidado_del_cabello|champu', clasificar_category_cuidado_cabello)

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 809 to 4722
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         14 non-null     int64  
 1   supermarket                14 non-null     object 
 2   brand_category             14 non-null     object 
 3   name                       14 non-null     object 
 4   description                14 non-null     object 
 5   trademark                  14 non-null     object 
 6   trademark_propietary_flag  14 non-null     object 
 7   price                      14 non-null     float64
 8   reference_price            14 non-null     float64
 9   reference_unit             14 non-null     object 
 10  insert_date                14 non-null     object 
 11  price_corrected            14 non-null     bool   
 12  reference_price_corrected  14 non-null     bool   
 13  category_name              14 non-null     object 
 1

# Procesamiento de la categoría "cuidado_facial_y_corporal"

In [5186]:
current_category = df_category[df_category["category_name"] == 'Cuidado facial y corporal']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
220,Cuidado facial y corporal,Afeitado y cuidado para hombre,Espuma de afeitar,221
221,Cuidado facial y corporal,Afeitado y cuidado para hombre,After shave,222
222,Cuidado facial y corporal,Afeitado y cuidado para hombre,Maquinillas de afeitar,223
223,Cuidado facial y corporal,Afeitado y cuidado para hombre,Recambios maquinilla de afeitar,224
224,Cuidado facial y corporal,Afeitado y cuidado para hombre,Crema y gel de cara,225
225,Cuidado facial y corporal,Cuidado corporal,Crema y aceite para el cuerpo,226
226,Cuidado facial y corporal,Cuidado corporal,Crema manos,227
227,Cuidado facial y corporal,Cuidado corporal,Crema pies,228
228,Cuidado facial y corporal,Cuidado corporal,Toallitas,229
229,Cuidado facial y corporal,Cuidado e higiene facial,Limpieza de cara,230


In [5187]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cuidado_facial_y_corporal']

Unnamed: 0,category,subcategory,subsubcategory
37,cuidado_facial_y_corporal,desodorante,
58,cuidado_facial_y_corporal,afeitado_y_cuidado_para_hombre,
64,cuidado_facial_y_corporal,gel_y_jabon_de_manos,
75,cuidado_facial_y_corporal,perfume_y_colonia,
85,cuidado_facial_y_corporal,higiene_intima,
87,cuidado_facial_y_corporal,depilacion,
108,cuidado_facial_y_corporal,cuidado_e_higiene_facial,
120,cuidado_facial_y_corporal,higiene_bucal,
131,cuidado_facial_y_corporal,manicura_y_pedicura,
150,cuidado_facial_y_corporal,protector_solar_y_aftersun,


In [5188]:
def clasificar_category_corporal(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cuidado_facial_y_corporal|protector_solar_y_aftersun':
        return ('Cuidado facial y corporal', 'Protector solar y aftersun', 'Protector solar y aftersun')

    if subcat == 'cuidado_facial_y_corporal|manicura_y_pedicura':
        if 'laca' in name:
            return ('Cuidado facial y corporal', 'Manicura y pedicura', 'Laca de uñas')
        else:
            return ('Cuidado facial y corporal', 'Manicura y pedicura', 'Cuidado de uñas y complementos')

    if subcat == 'cuidado_facial_y_corporal|gel_y_jabon_de_manos':
        if 'manos' in name:
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Jabón de manos')
        elif 'gel' in name or 'gel-champú':
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Gel')
        else:
            return ('Cuidado facial y corporal', 'Gel y jabón de manos', 'Esponjas')

    if subcat == 'cuidado_facial_y_corporal|afeitado_y_cuidado_para_hombre':
        if 'gel de afeitar' in name or 'espuma' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Espuma de afeitar')
        elif 'after shave' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'After shave')
        elif 'maquinilla' in name or 'maquinillas' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Maquinillas de afeitar')
        elif 'recambios' in name or 'recambio' in name:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Recambios maquinilla de afeitar')
        else:
            return ('Cuidado facial y corporal', 'Afeitado y cuidado para hombre', 'Crema y gel de cara')

    if subcat == 'cuidado_facial_y_corporal|cuidado_e_higiene_facial':
        if any(x in name for x in ['discos', 'toallitas', 'micelar', 'desmaquillador', 'peeling', 'limpiadora', 'esponja', 'limpiador', 'limpia', 'desmaquillante', 'tónico']):
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Limpieza de cara')
        elif 'crema' in name or 'hidratante' in name or 'gel' in name:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Crema de cara')
        elif 'de ojos' in name or 'para ojos' in name:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Contorno de ojos')
        else:
            return ('Cuidado facial y corporal', 'Cuidado e higiene facial', 'Sérum y ampollas')

    if subcat == 'cuidado_facial_y_corporal|cuidado_corporal':
        if 'corporal' in name or 'anticelulítico' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema y aceite para el cuerpo')
        elif 'manos' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema manos')
        elif 'pies' in name:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Crema pies')
        else:
            return ('Cuidado facial y corporal', 'Cuidado corporal', 'Toallitas')

    if subcat == 'cuidado_facial_y_corporal|higiene_bucal':
        if 'pasta' in name or 'dentífrico' in name:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Pasta de dientes')
        elif 'sepillo' in name:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Cepillo de dientes')
        else:
            return ('Cuidado facial y corporal', 'Higiene bucal', 'Colutorio e hilo dental')

    if subcat == 'cuidado_facial_y_corporal|desodorante':
        if 'roll' in name or 'stick'in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante roll on y stick')
        elif 'spray' in name:
            return ('Cuidado facial y corporal', 'Desodorante', 'Desodorante Spray')
        else:
            return ('Cuidado facial y corporal', 'Desodorante', 'Otros desodorantes')

    if subcat == 'cuidado_facial_y_corporal|higiene_intima':
        if 'compresas' in name or 'compresa' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Compresas')
        elif 'protegeslips' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Protegeslips')
        elif 'tampones' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Tampones')
        elif 'pañales' in name:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Pañales para adulto')
        else:
            return ('Cuidado facial y corporal', 'Higiene íntima', 'Toallitas y gel')

    if subcat == 'cuidado_facial_y_corporal|perfume_y_colonia':
        if 'eau de toilette mujer' in name or 'colonia mujer' in name or 'eau de parfum mujer' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia mujer')
        elif 'lote mujer' in name or 'neceser' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes mujer')
        elif 'eau de toilette hombre' in name or 'colonia hombre' in name or 'eau de parfum hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Perfume y colonia hombre')
        elif 'lote hombre' in name or 'neceser hombre' in name:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Lotes hombre')
        else:
            return ('Cuidado facial y corporal', 'Perfume y colonia', 'Colonia infantil')

    if subcat == 'cuidado_facial_y_corporal|depilacion':
        if 'bandas' in name or 'crema' in name or 'pinza' in name or 'cera' in name or 'gel' in name:
            return ('Cuidado facial y corporal', 'Depilación', 'Bandas, cera y crema')
        else:
            return ('Cuidado facial y corporal', 'Depilación', 'Cuchilla')

    return (pd.NA, pd.NA, pd.NA)

In [5189]:
datamarket_update('cuidado_facial_y_corporal|depilacion', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 87 to 4983
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 14

In [5190]:
datamarket_update('cuidado_facial_y_corporal|protector_solar_y_aftersun', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 150 to 4957
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                15 non-null     object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 1

In [5191]:
datamarket_update('cuidado_facial_y_corporal|manicura_y_pedicura', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 131 to 4634
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         24 non-null     int64  
 1   supermarket                24 non-null     object 
 2   brand_category             24 non-null     object 
 3   name                       24 non-null     object 
 4   description                3 non-null      object 
 5   trademark                  24 non-null     object 
 6   trademark_propietary_flag  24 non-null     object 
 7   price                      24 non-null     float64
 8   reference_price            24 non-null     float64
 9   reference_unit             24 non-null     object 
 10  insert_date                24 non-null     object 
 11  price_corrected            24 non-null     bool   
 12  reference_price_corrected  24 non-null     bool   
 13  category_name              24 non-null     object 
 1

In [5192]:
datamarket_update('cuidado_facial_y_corporal|gel_y_jabon_de_manos', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 64 to 4519
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                14 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 14

In [5193]:
datamarket_update('cuidado_facial_y_corporal|afeitado_y_cuidado_para_hombre', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 21 entries, 58 to 4434
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         21 non-null     int64  
 1   supermarket                21 non-null     object 
 2   brand_category             21 non-null     object 
 3   name                       21 non-null     object 
 4   description                19 non-null     object 
 5   trademark                  21 non-null     object 
 6   trademark_propietary_flag  21 non-null     object 
 7   price                      21 non-null     float64
 8   reference_price            21 non-null     float64
 9   reference_unit             21 non-null     object 
 10  insert_date                21 non-null     object 
 11  price_corrected            21 non-null     bool   
 12  reference_price_corrected  21 non-null     bool   
 13  category_name              21 non-null     object 
 14

In [5194]:
datamarket_update('cuidado_facial_y_corporal|cuidado_e_higiene_facial', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 108 to 4922
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                26 non-null     object 
 5   trademark                  31 non-null     object 
 6   trademark_propietary_flag  31 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 1

In [5195]:
datamarket_update('cuidado_facial_y_corporal|cuidado_corporal', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 312 to 3896
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [5196]:
datamarket_update('cuidado_facial_y_corporal|higiene_bucal', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 120 to 4919
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                19 non-null     object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

In [5197]:
datamarket_update('cuidado_facial_y_corporal|desodorante', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 21 entries, 37 to 4891
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         21 non-null     int64  
 1   supermarket                21 non-null     object 
 2   brand_category             21 non-null     object 
 3   name                       21 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  21 non-null     object 
 6   trademark_propietary_flag  21 non-null     object 
 7   price                      21 non-null     float64
 8   reference_price            21 non-null     float64
 9   reference_unit             21 non-null     object 
 10  insert_date                21 non-null     object 
 11  price_corrected            21 non-null     bool   
 12  reference_price_corrected  21 non-null     bool   
 13  category_name              21 non-null     object 
 14

In [5198]:
datamarket_update('cuidado_facial_y_corporal|higiene_intima', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 85 to 4975
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         27 non-null     int64  
 1   supermarket                27 non-null     object 
 2   brand_category             27 non-null     object 
 3   name                       27 non-null     object 
 4   description                27 non-null     object 
 5   trademark                  27 non-null     object 
 6   trademark_propietary_flag  27 non-null     object 
 7   price                      27 non-null     float64
 8   reference_price            27 non-null     float64
 9   reference_unit             27 non-null     object 
 10  insert_date                27 non-null     object 
 11  price_corrected            27 non-null     bool   
 12  reference_price_corrected  27 non-null     bool   
 13  category_name              27 non-null     object 
 14

In [5199]:
datamarket_update('cuidado_facial_y_corporal|perfume_y_colonia', clasificar_category_corporal)

<class 'pandas.core.frame.DataFrame'>
Index: 50 entries, 75 to 4992
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         50 non-null     int64  
 1   supermarket                50 non-null     object 
 2   brand_category             50 non-null     object 
 3   name                       50 non-null     object 
 4   description                47 non-null     object 
 5   trademark                  50 non-null     object 
 6   trademark_propietary_flag  50 non-null     object 
 7   price                      50 non-null     float64
 8   reference_price            50 non-null     float64
 9   reference_unit             50 non-null     object 
 10  insert_date                50 non-null     object 
 11  price_corrected            50 non-null     bool   
 12  reference_price_corrected  50 non-null     bool   
 13  category_name              50 non-null     object 
 14

# Procesamiento de la categoría "congelados"

In [5200]:
current_category = df_category[df_category["category_name"] == 'Congelados']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
167,Congelados,Arroz y pasta,Arroz,168
168,Congelados,Arroz y pasta,Pasta,169
169,Congelados,Carne,Carne,170
170,Congelados,Helados,Bombones,171
171,Congelados,Helados,Cucuruchos,172
172,Congelados,Helados,Granizados y helados de hielo,173
173,Congelados,Helados,Tarrinas,174
174,Congelados,Helados,Barras de helado y barquillos,175
175,Congelados,Hielo,Hielo,176
176,Congelados,Marisco,Marisco,177


In [5201]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'congelados']

Unnamed: 0,category,subcategory,subsubcategory
21,congelados,helados,tarrinas
29,congelados,verduras_y_hortalizas,
41,congelados,pescado_y_marisco,
78,congelados,precocinados,
148,congelados,verdura,
158,congelados,rebozados_y_platos_preparados,verduras_congeladas
164,congelados,arroz_y_pasta,
198,congelados,helados,
204,congelados,pizzas_bases_y_masas,
210,congelados,tartas_y_churros,


In [5202]:
def clasificar_category_congelados(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'congelados|helados':
        if 'hielo' in name:
            return ('Congelados', 'Hielo', 'Hielo')
        elif 'cucurucho' in name:
            return ('Congelados', 'Helados', 'Cucuruchos')
        elif 'sándwich' in name or 'mini' in name or 'bombón' in name:
            return ('Congelados', 'Helados', 'Bombones')
        elif 'tarrina' in name or 'tarta' in name:
            return ('Congelados', 'Helados', 'Tarrinas')
        elif 'barra' in name or 'tubitos' in name or 'barquillo' in name or 'sandwich' in name:
            return ('Congelados', 'Helados', 'Barras de helado y barquillos')
        else:
            return ('Congelados', 'Helados', 'Granizados y helados de hielo')

    if subcat in ['congelados|helados|tartas_heladas', 'congelados|helados|tarrinas']:
        return ('Congelados', 'Helados', 'Tarrinas')

    if subcat in ['congelados|hielo', 'congelados|helados|hielo']:
        return ('Congelados', 'Hielo', 'Hielo')

    if subcat == 'congelados|helados|bombon_helado':
        return ('Congelados', 'Helados', 'Bombones')

    if subcat == 'congelados|helados|conos':
        return ('Congelados', 'Helados', 'Cucuruchos')

    if subcat in ['congelados|helados|polos', 'congelados|helados|sandwiches', 'congelados|helados|barquillos']:
        return ('Congelados', 'Helados', 'Barras de helado y barquillos')

    if subcat in ['congelados|pizzas_bases_y_masas', 'congelados|pizzas', 'congelados|rebozados_y_platos_preparados|pizzas_congeladas']:
        if 'bases' in name or 'masa' in name or 'cocas':
            return ('Congelados', 'Pizzas', 'Base de pizza')
        else:
            return ('Congelados', 'Pizzas', 'Pizzas')

    if subcat == 'congelados|pescado':
        return ('Congelados', 'Pescado', 'Pescado')

    if subcat == 'congelados|marisco':
        if any(x in name for x in ['surimi', 'mejillón', 'mejillónes', 'almeja', 'almejas', 'caracoles', 'vieira', 'zamburiñas']):
            return ('Congelados', 'Marisco', 'Marisco de concha y otros')
        else:
            return ('Congelados', 'Marisco', 'Marisco')

    if subcat == 'congelados|pescado_y_marisco':
        if any(x in name for x in ['merluza', 'emperador', 'bacalao',  'pota', 'sepia', 'salmón', 'lenguado', 'calamar', 'panga', 'pescado', 'atún', 'potón', 'tintorera', 'rape', 'pulpo', 'Índia', 'migas']):
            return ('Congelados', 'Pescado', 'Pescado')
        elif any(x in name for x in ['surimi', 'mejillón', 'mejillónes', 'almeja', 'almejas', 'caracoles', 'vieira', 'zamburiñas']):
            return ('Congelados', 'Marisco', 'Marisco de concha y otros')
        else:
            return ('Congelados', 'Marisco', 'Marisco')

    if subcat == 'congelados|arroz_y_pasta':
        if 'arroz' in name:
            return ('Congelados', 'Arroz y pasta', 'Arroz')
        else:
            return ('Congelados', 'Arroz y pasta', 'Pasta')

    if subcat in ['congelados|tartas_y_churros', 'congelados|reposteria']:
        if 'churros' in name:
            return ('Congelados', 'Tartas y churros', 'Churros')
        elif 'tarta infantil' in name:
            return ('Congelados', 'Tartas y churros', 'Tartas infantiles')
        else:
            return ('Congelados', 'Tartas y churros', 'Tartas')

    if subcat in ['congelados|verdura', 'congelados|verduras_y_hortalizas',
                  'congelados|rebozados_y_platos_preparados|verduras_congeladas']:
        if 'patata' in name:
            return ('Congelados', 'Verdura', 'Patatas')
        if 'frutos' in name or 'fresas' in name or 'arándanos' in name:
            return ('Congelados', 'Verdura', 'Fruta')
        else:
            return ('Congelados', 'Verdura', 'Verdura')

    if subcat in ['congelados|rebozados', 'congelados|rebozados_y_platos_preparados|rebozados_y_platos_preparados',
                  'congelados|rebozados_y_platos_preparados|helados', 'congelados|rebozados_y_platos_preparados|salteados_congelados', 'congelados|precocinados']:
        if 'nuggets' in name or 'croquetas' in name or 'pollo' in name or 'empanados' in name:
            return ('Congelados', 'Rebozados', 'Carne rebozada')
        elif any(x in name for x in ['boquerón', 'bacalao', 'surimi', 'merluza', 'atún', 'rabas', 'pota', 'chipirones', 'langostino']):
            return ('Congelados', 'Rebozados', 'Pescado rebozado')
        else:
            return ('Congelados', 'Rebozados', 'Verdura rebozada y otros')

    if subcat in ['congelados|rebozados_y_platos_preparados|pulpo_calamar_y_sepia_congelados', 'congelados|rebozados_y_platos_preparados|mariscos_congelados',
                  'congelados|rebozados_y_platos_preparados|pescados_congelados', 'congelados|rebozados_y_platos_preparados|gulas_y_surimis_congelados']:
            return ('Congelados', 'Rebozados', 'Pescado rebozado')

    if subcat == 'congelados|rebozados_y_platos_preparados|verduras_congeladas':
        return ('Congelados', 'Rebozados', 'Verdura rebozada y otros')

    return (pd.NA, pd.NA, pd.NA)

In [5203]:
datamarket_update('congelados|helados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 42 entries, 198 to 4964
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         42 non-null     int64  
 1   supermarket                42 non-null     object 
 2   brand_category             42 non-null     object 
 3   name                       42 non-null     object 
 4   description                17 non-null     object 
 5   trademark                  42 non-null     object 
 6   trademark_propietary_flag  42 non-null     object 
 7   price                      42 non-null     float64
 8   reference_price            42 non-null     float64
 9   reference_unit             42 non-null     object 
 10  insert_date                42 non-null     object 
 11  price_corrected            42 non-null     bool   
 12  reference_price_corrected  42 non-null     bool   
 13  category_name              42 non-null     object 
 1

In [5204]:
datamarket_update('congelados|helados|tarrinas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 21 to 3287
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14 

In [5205]:
datamarket_update('congelados|helados|tartas_heladas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3604 to 3604
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5206]:
datamarket_update('congelados|hielo', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 4552 to 4903
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                1 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5207]:
datamarket_update('congelados|helados|hielo', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3613 to 3613
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5208]:
datamarket_update('congelados|helados|bombon_helado', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 311 to 4257
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5209]:
datamarket_update('congelados|helados|conos', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 679 to 1657
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5210]:
datamarket_update('congelados|helados|polos', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 626 to 3440
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5211]:
datamarket_update('congelados|helados|sandwiches', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4618 to 4618
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5212]:
datamarket_update('congelados|helados|barquillos', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2395 to 4812
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5213]:
datamarket_update('congelados|pizzas_bases_y_masas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 204 to 4834
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  18 non-null     object 
 6   trademark_propietary_flag  18 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 1

In [5214]:
datamarket_update('congelados|pizzas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 662 to 4958
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5215]:
datamarket_update('congelados|rebozados_y_platos_preparados|pizzas_congeladas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 997 to 997
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5216]:
datamarket_update('congelados|pescado', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 1264 to 4368
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 

In [5217]:
datamarket_update('congelados|marisco', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 594 to 4635
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5218]:
datamarket_update('congelados|pescado_y_marisco', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 25 entries, 41 to 4849
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         25 non-null     int64  
 1   supermarket                25 non-null     object 
 2   brand_category             25 non-null     object 
 3   name                       25 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  25 non-null     object 
 6   trademark_propietary_flag  25 non-null     object 
 7   price                      25 non-null     float64
 8   reference_price            25 non-null     float64
 9   reference_unit             25 non-null     object 
 10  insert_date                25 non-null     object 
 11  price_corrected            25 non-null     bool   
 12  reference_price_corrected  25 non-null     bool   
 13  category_name              25 non-null     object 
 14

In [5219]:
datamarket_update('congelados|arroz_y_pasta', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 164 to 3005
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5220]:
datamarket_update('congelados|tartas_y_churros', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 210 to 4493
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5221]:
datamarket_update('congelados|reposteria', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 478 to 4416
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5222]:
datamarket_update('congelados|verdura', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 148 to 4899
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         23 non-null     int64  
 1   supermarket                23 non-null     object 
 2   brand_category             23 non-null     object 
 3   name                       23 non-null     object 
 4   description                23 non-null     object 
 5   trademark                  23 non-null     object 
 6   trademark_propietary_flag  23 non-null     object 
 7   price                      23 non-null     float64
 8   reference_price            23 non-null     float64
 9   reference_unit             23 non-null     object 
 10  insert_date                23 non-null     object 
 11  price_corrected            23 non-null     bool   
 12  reference_price_corrected  23 non-null     bool   
 13  category_name              23 non-null     object 
 1

In [5223]:
datamarket_update('congelados|verduras_y_hortalizas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 29 to 4928
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 14

In [5224]:
datamarket_update('congelados|rebozados_y_platos_preparados|verduras_congeladas', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 158 to 774
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14 

In [5225]:
datamarket_update('congelados|rebozados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 721 to 3850
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5226]:
datamarket_update('congelados|rebozados_y_platos_preparados|rebozados_y_platos_preparados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3907 to 3907
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5227]:
datamarket_update('congelados|rebozados_y_platos_preparados|helados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1531 to 4162
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5228]:
datamarket_update('congelados|rebozados_y_platos_preparados|salteados_congelados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3274 to 4971
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5229]:
datamarket_update('congelados|precocinados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 78 to 4747
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 14

In [5230]:
datamarket_update('congelados|rebozados_y_platos_preparados|pulpo_calamar_y_sepia_congelados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 614 to 3586
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5231]:
datamarket_update('congelados|rebozados_y_platos_preparados|mariscos_congelados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 559 to 2953
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5232]:
datamarket_update('congelados|rebozados_y_platos_preparados|pescados_congelados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1109 to 4945
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5233]:
datamarket_update('congelados|rebozados_y_platos_preparados|gulas_y_surimis_congelados', clasificar_category_congelados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1036 to 1036
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

# Procesamiento de la categoría "postres_y_yogures"

In [5234]:
current_category = df_category[df_category["category_name"] == 'Postres y yogures']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
421,Postres y yogures,Bífidus,Bífidus de sabores,422
422,Postres y yogures,Bífidus,Bífidus naturales,423
423,Postres y yogures,Flan y natillas,Flan,424
424,Postres y yogures,Flan y natillas,Natillas,425
425,Postres y yogures,Gelatina y otros postres,Gelatina,426
426,Postres y yogures,Gelatina y otros postres,Otros postres,427
427,Postres y yogures,Postres de soja,Postres de soja,428
428,Postres y yogures,Yogures desnatados,Yogures desnatados,429
429,Postres y yogures,Yogures griegos,Yogures griegos,430
430,Postres y yogures,Yogures líquidos,Yogures líquidos,431


In [5235]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'postres_y_yogures']

Unnamed: 0,category,subcategory,subsubcategory
57,postres_y_yogures,yogures_griegos,
186,postres_y_yogures,bifidus,
222,postres_y_yogures,gelatina_y_otros_postres,
259,postres_y_yogures,yogures_desnatados,
398,postres_y_yogures,postres_de_soja,
848,postres_y_yogures,yogures_liquidos,
1211,postres_y_yogures,yogures_naturales_y_sabores,
1613,postres_y_yogures,flan_y_natillas,
1926,postres_y_yogures,yogures_y_postres_infantiles,


In [5236]:
def clasificar_category_yogures(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'postres_y_yogures|yogures_desnatados':
        return ('Postres y yogures', 'Yogures desnatados', 'Yogures desnatados')

    if subcat == 'postres_y_yogures|postres_de_soja':
        return ('Postres y yogures', 'Postres de soja', 'Postres de soja')

    if subcat == 'postres_y_yogures|yogures_naturales_y_sabores':
        if 'natural' in name:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures naturales')
        else:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures de sabores')

    if subcat == 'postres_y_yogures|yogures_griegos':
        return ('Postres y yogures', 'Yogures griegos', 'Yogures griegos')

    if subcat == 'postres_y_yogures|yogures_y_postres_infantiles':
        return ('Postres y yogures', 'Yogures y postres infantiles', 'Yogures y postres infantiles')

    if subcat == 'postres_y_yogures|yogures_liquidos':
        return ('Postres y yogures', 'Yogures líquidos', 'Yogures líquidos')

    if subcat == 'postres_y_yogures|bifidus':
        if 'natural' in name:
            return ('Postres y yogures', 'Bífidus', 'Bífidus naturales')
        elif 'colesterol' in name:
            return ('Postres y yogures', 'Yogures líquidos', 'Colesterol y otros')
        else:
            return ('Postres y yogures', 'Bífidus', 'Bífidus de sabores')

    if subcat == 'postres_y_yogures|flan_y_natillas':
        if 'flan' in name:
            return ('Postres y yogures', 'Flan y natillas', 'Flan')
        else:
            return ('Postres y yogures', 'Flan y natillas', 'Natillas')

    if subcat == 'postres_y_yogures|gelatina_y_otros_postres':
        if 'gelatina' in name:
            return ('Postres y yogures', 'Gelatina y otros postres', 'Gelatina')
        else:
            return ('Postres y yogures', 'Gelatina y otros postres', 'Otros postres')


    return (pd.NA, pd.NA, pd.NA)

In [5237]:
datamarket_update('postres_y_yogures|yogures_desnatados', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 259 to 4124
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [5238]:
datamarket_update('postres_y_yogures|postres_de_soja', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 398 to 4179
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5239]:
datamarket_update('postres_y_yogures|yogures_naturales_y_sabores', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1211 to 4914
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

In [5240]:
datamarket_update('postres_y_yogures|yogures_griegos', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 57 to 3886
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                4 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14 

In [5241]:
datamarket_update('postres_y_yogures|yogures_y_postres_infantiles', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1926 to 2161
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5242]:
datamarket_update('postres_y_yogures|yogures_liquidos', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 848 to 3578
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [5243]:
datamarket_update('postres_y_yogures|bifidus', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 186 to 3535
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5244]:
datamarket_update('postres_y_yogures|flan_y_natillas', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1613 to 4809
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5245]:
datamarket_update('postres_y_yogures|gelatina_y_otros_postres', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 222 to 4980
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

# Procesamiento de la categoría "panaderia_y_pasteleria"

In [5246]:
current_category = df_category[df_category["category_name"] == 'Panadería y pastelería']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
381,Panadería y pastelería,Bollería de horno,Bollería dulce,382
382,Panadería y pastelería,Bollería de horno,Bollería salada,383
383,Panadería y pastelería,Bollería envasada,Bollería envasada,384
384,Panadería y pastelería,Bollería envasada,Pastelitos surtidos,385
385,Panadería y pastelería,Harina y preparado repostería,Harina,386
386,Panadería y pastelería,Harina y preparado repostería,Levadura y preparado repostería,387
387,Panadería y pastelería,Harina y preparado repostería,Masas,388
388,Panadería y pastelería,Pan de horno,Barra de pan,389
389,Panadería y pastelería,Pan de horno,Pan de bocadillo,390
390,Panadería y pastelería,Pan de horno,Pan rebanado,391


In [5247]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'panaderia_y_pasteleria']

Unnamed: 0,category,subcategory,subsubcategory
24,panaderia_y_pasteleria,picos_rosquilletas_y_picatostes,
33,panaderia_y_pasteleria,pan_de_horno,
102,panaderia_y_pasteleria,bolleria_envasada,
162,panaderia_y_pasteleria,velas_y_decoracion,
196,panaderia_y_pasteleria,bolleria_de_horno,
226,panaderia_y_pasteleria,pan_tostado_y_rallado,
304,panaderia_y_pasteleria,harina_y_preparado_reposteria,
1144,panaderia_y_pasteleria,pan_de_molde_y_otras_especialidades,
3969,panaderia_y_pasteleria,tartas_y_pasteles,


In [5248]:
def clasificar_category_panes(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'panaderia_y_pasteleria|velas_y_decoracion':
        if 'vela' in name:
            return ('Panadería y pastelería', 'Velas y decoración', 'Velas')
        else:
            return ('Panadería y pastelería', 'Velas y decoración', 'Decoración')

    if subcat == 'panaderia_y_pasteleria|pan_de_horno':
        if 'rebanado' in name or 'rebanada' in name or 'rebanadas' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Pan rebanado')
        elif 'barra' in name or 'baguette' in name or 'barras' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Barra de pan')
        else:
            return ('Panadería y pastelería', 'Pan de horno', 'Pan de bocadillo')

    if subcat == 'panaderia_y_pasteleria|pan_de_molde_y_otras_especialidades':
        if 'de molde' in name:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de molde')
        elif any(x in name for x in ['tortillas', 'hot dog', 'hamburguesa', 'pita', 'piadinas', 'bocados']):
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')
        else:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Otros panes')

    if subcat == 'panaderia_y_pasteleria|picos_rosquilletas_y_picatostes':
        if any(x in name for x in ['piquitos', 'picos', 'grissini']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picos')
        elif any(x in name for x in ['rosquilletas', 'palitos', 'panes especiales']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Rosquilletas')
        else:
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picatostes')

    if subcat == 'panaderia_y_pasteleria|pan_tostado_y_rallado':
        if 'tostado' in name:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan tostado')
        else:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan rallado')

    if subcat == 'panaderia_y_pasteleria|harina_y_preparado_reposteria':
        if 'harina' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Harina')
        elif 'masa' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Masas')
        else:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Levadura y preparado repostería')

    if subcat == 'panaderia_y_pasteleria|bolleria_de_horno':
        if any(x in name for x in ['empanadillas', 'saladas', 'empanada']):
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería salada')
        else:
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería dulce')

    if subcat == 'panaderia_y_pasteleria|bolleria_envasada':
        if any(x in name for x in ['paquete', 'pack', 'bolsa']):
            return ('Panadería y pastelería', 'Bollería envasada', 'Bollería envasada')
        else:
            return ('Panadería y pastelería', 'Bollería envasada', 'Pastelitos surtidos')

    if subcat == 'panaderia_y_pasteleria|tartas_y_pasteles':
        if 'infantil' in name:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas infantiles')
        else:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas')


    return (pd.NA, pd.NA, pd.NA)

In [5249]:
datamarket_update('panaderia_y_pasteleria|tartas_y_pasteles', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3969 to 3969
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                1 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5250]:
datamarket_update('panaderia_y_pasteleria|bolleria_envasada', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 102 to 4772
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                14 non-null     object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

In [5251]:
datamarket_update('panaderia_y_pasteleria|bolleria_de_horno', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 22 entries, 196 to 4367
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         22 non-null     int64  
 1   supermarket                22 non-null     object 
 2   brand_category             22 non-null     object 
 3   name                       22 non-null     object 
 4   description                22 non-null     object 
 5   trademark                  22 non-null     object 
 6   trademark_propietary_flag  22 non-null     object 
 7   price                      22 non-null     float64
 8   reference_price            22 non-null     float64
 9   reference_unit             22 non-null     object 
 10  insert_date                22 non-null     object 
 11  price_corrected            22 non-null     bool   
 12  reference_price_corrected  22 non-null     bool   
 13  category_name              22 non-null     object 
 1

In [5252]:
datamarket_update('panaderia_y_pasteleria|harina_y_preparado_reposteria', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 304 to 4999
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                16 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

In [5253]:
datamarket_update('panaderia_y_pasteleria|pan_tostado_y_rallado', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 226 to 4419
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [5254]:
datamarket_update('panaderia_y_pasteleria|picos_rosquilletas_y_picatostes', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 24 to 4995
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14 

In [5255]:
datamarket_update('panaderia_y_pasteleria|pan_de_molde_y_otras_especialidades', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 1144 to 4797
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         14 non-null     int64  
 1   supermarket                14 non-null     object 
 2   brand_category             14 non-null     object 
 3   name                       14 non-null     object 
 4   description                14 non-null     object 
 5   trademark                  14 non-null     object 
 6   trademark_propietary_flag  14 non-null     object 
 7   price                      14 non-null     float64
 8   reference_price            14 non-null     float64
 9   reference_unit             14 non-null     object 
 10  insert_date                14 non-null     object 
 11  price_corrected            14 non-null     bool   
 12  reference_price_corrected  14 non-null     bool   
 13  category_name              14 non-null     object 
 

In [5256]:
datamarket_update('panaderia_y_pasteleria|pan_de_horno', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 33 to 4889
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                3 non-null      object 
 5   trademark                  18 non-null     object 
 6   trademark_propietary_flag  18 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 14

In [5257]:
datamarket_update('panaderia_y_pasteleria|velas_y_decoracion', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 162 to 4752
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

# Procesamiento de la categoría "cereales_y_galletas"

In [5258]:
current_category = df_category[df_category["category_name"] == 'Cereales y galletas']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
138,Cereales y galletas,Cereales,Cereales,139
139,Cereales y galletas,Cereales,Cereales integrales y muesli,140
140,Cereales y galletas,Cereales,Barritas de cereales,141
141,Cereales y galletas,Galletas,Galletas desayuno,142
142,Cereales y galletas,Galletas,Galletas integrales y digestive,143
143,Cereales y galletas,Galletas,Con chocolate y rellenas,144
144,Cereales y galletas,Galletas,Galletas surtidas,145
145,Cereales y galletas,Tortitas,Tortitas,146


In [5259]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cereales_y_galletas']

Unnamed: 0,category,subcategory,subsubcategory
111,cereales_y_galletas,cereales,
250,cereales_y_galletas,galletas,
1719,cereales_y_galletas,tortitas,


In [5260]:
def clasificar_category_galletas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cereales_y_galletas|tortitas':
        return ('Cereales y galletas', 'Tortitas', 'Tortitas')

    if subcat == 'cereales_y_galletas|cereales':
        if 'barrita' in name:
            return ('Cereales y galletas', 'Cereales', 'Barritas de cereales')
        elif any(x in name for x in ['crunchy', 'copos', 'muesli', 'granola', 'integral']):
            return ('Cereales y galletas', 'Cereales', 'Cereales integrales y muesli')
        else:
            return ('Cereales y galletas', 'Cereales', 'Cereales')

    if subcat == 'cereales_y_galletas|galletas':
        if any(x in name for x in ['chocolate', 'oreo', 'rellenos', 'rellenas', 'crema', 'mermelada']):
            return ('Cereales y galletas', 'Galletas', 'Con chocolate y rellenas')
        elif 'integral' in name or 'digestive' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas integrales y digestive')
        elif 'surtido' in name or 'mix' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas surtidas')
        else:
            return ('Cereales y galletas', 'Galletas', 'Galletas desayuno')


    return (pd.NA, pd.NA, pd.NA)

In [5261]:
datamarket_update('cereales_y_galletas|galletas', clasificar_category_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 250 to 4514
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         27 non-null     int64  
 1   supermarket                27 non-null     object 
 2   brand_category             27 non-null     object 
 3   name                       27 non-null     object 
 4   description                27 non-null     object 
 5   trademark                  27 non-null     object 
 6   trademark_propietary_flag  27 non-null     object 
 7   price                      27 non-null     float64
 8   reference_price            27 non-null     float64
 9   reference_unit             27 non-null     object 
 10  insert_date                27 non-null     object 
 11  price_corrected            27 non-null     bool   
 12  reference_price_corrected  27 non-null     bool   
 13  category_name              27 non-null     object 
 1

In [5262]:
datamarket_update('cereales_y_galletas|cereales', clasificar_category_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 111 to 4463
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         23 non-null     int64  
 1   supermarket                23 non-null     object 
 2   brand_category             23 non-null     object 
 3   name                       23 non-null     object 
 4   description                23 non-null     object 
 5   trademark                  23 non-null     object 
 6   trademark_propietary_flag  23 non-null     object 
 7   price                      23 non-null     float64
 8   reference_price            23 non-null     float64
 9   reference_unit             23 non-null     object 
 10  insert_date                23 non-null     object 
 11  price_corrected            23 non-null     bool   
 12  reference_price_corrected  23 non-null     bool   
 13  category_name              23 non-null     object 
 1

In [5263]:
datamarket_update('cereales_y_galletas|tortitas', clasificar_category_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1719 to 1719
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                1 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

# Procesamiento de la categoría "fruta_y_verdura"

In [5264]:
current_category = df_category[df_category["category_name"] == 'Fruta y verdura']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
260,Fruta y verdura,Fruta,Plátano y uva,261
261,Fruta y verdura,Fruta,Manzana y pera,262
262,Fruta y verdura,Fruta,Melón y sandía,263
263,Fruta y verdura,Fruta,Cítricos,264
264,Fruta y verdura,Fruta,Fruta tropical,265
265,Fruta y verdura,Fruta,Otras frutas,266
266,Fruta y verdura,Lechuga y ensalada preparada,Lechuga,267
267,Fruta y verdura,Lechuga y ensalada preparada,Ensalada preparada,268
268,Fruta y verdura,Verdura,Patata,269
269,Fruta y verdura,Verdura,Cebolla y ajo,270


In [5265]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'fruta_y_verdura']

Unnamed: 0,category,subcategory,subsubcategory
56,fruta_y_verdura,verdura,
790,fruta_y_verdura,fruta,
1333,fruta_y_verdura,lechuga_y_ensalada_preparada,


In [5266]:
def clasificar_category_fruta(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'fruta_y_verdura|fruta':
        if any(x in name for x in ['naranja', 'mandarina', 'pomelo', 'lima', 'limón']):
            return ('Fruta y verdura', 'Fruta', 'Cítricos')
        elif any(x in name for x in ['manzana', 'pera']):
            return ('Fruta y verdura', 'Fruta', 'Manzana y pera')
        elif any(x in name for x in ['plátano', 'uva', 'banana']):
            return ('Fruta y verdura', 'Fruta', 'Plátano y uva')
        elif 'melón' in name or 'sandía' in name:
            return ('Fruta y verdura', 'Fruta', 'Melón y sandía')
        elif any(x in name for x in ['mango', 'papaya', 'piña', 'maracuyá', 'guayaba', 'avocado','pitahaya', 'carambola', 'tamarindo', 'chirimoya']):
            return ('Fruta y verdura', 'Fruta', 'Fruta tropical')
        else:
            return ('Fruta y verdura', 'Fruta', 'Otras frutas')

    if subcat == 'fruta_y_verdura|verdura':
        if 'patata' in name:
            return ('Fruta y verdura', 'Verdura', 'Patata')
        elif 'zanahoria' in name or 'pepino' in name:
            return ('Fruta y verdura', 'Verdura', 'Pepino y zanahoria')
        elif 'tomate' in name:
            return ('Fruta y verdura', 'Verdura', 'Tomate')
        elif 'calabacín' in name or 'pimiento' in name:
            return ('Fruta y verdura', 'Verdura', 'Calabacín y pimiento')
        elif any(x in name for x in ['brócoli', 'coliflores', 'col', 'coliflor', 'repollo']):
            return ('Fruta y verdura', 'Verdura', 'Repollo y col')
        elif any(x in name for x in ['ajo', 'cebolla', 'puerro']):
            return ('Fruta y verdura', 'Verdura', 'Cebolla y ajo')
        elif 'setas' in name or 'champiñones' in name:
            return ('Fruta y verdura', 'Verdura', 'Setas y champiñones')
        elif any(x in name for x in ['perejil', 'jengibre', 'cilantro', 'albahaca', 'hierbabuena', 'cebollino']):
            return ('Fruta y verdura', 'Verdura', 'Hierbas aromáticas')
        elif 'al vapor' in name:
            return ('Fruta y verdura', 'Verdura', 'Verduras al vapor')
        else:
            return ('Fruta y verdura', 'Verdura', 'Otras verduras y hortalizas')

    if subcat == 'fruta_y_verdura|lechuga_y_ensalada_preparada':
        if 'lechuga' in name:
            return ('Fruta y verdura', 'Lechuga y ensalada preparada', 'Lechuga')
        else:
            return ('Fruta y verdura', 'Lechuga y ensalada preparada', 'Ensalada preparada')

    return (pd.NA, pd.NA, pd.NA)

In [5267]:
datamarket_update('fruta_y_verdura|fruta', clasificar_category_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 790 to 3874
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 1

In [5268]:
datamarket_update('fruta_y_verdura|verdura', clasificar_category_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 40 entries, 56 to 4894
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         40 non-null     int64  
 1   supermarket                40 non-null     object 
 2   brand_category             40 non-null     object 
 3   name                       40 non-null     object 
 4   description                39 non-null     object 
 5   trademark                  40 non-null     object 
 6   trademark_propietary_flag  40 non-null     object 
 7   price                      40 non-null     float64
 8   reference_price            40 non-null     float64
 9   reference_unit             40 non-null     object 
 10  insert_date                40 non-null     object 
 11  price_corrected            40 non-null     bool   
 12  reference_price_corrected  40 non-null     bool   
 13  category_name              40 non-null     object 
 14

In [5269]:
datamarket_update('fruta_y_verdura|lechuga_y_ensalada_preparada', clasificar_category_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 1333 to 4388
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 

# Procesamiento de la categoría "limpieza_y_hogar"

In [5270]:
current_category = df_category[df_category["category_name"] == 'Limpieza y hogar']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
289,Limpieza y hogar,Detergente y suavizante ropa,Detergente líquido y gel,290
290,Limpieza y hogar,Detergente y suavizante ropa,Detergente en polvo y monodosis,291
291,Limpieza y hogar,Detergente y suavizante ropa,Detergente lavado a mano,292
292,Limpieza y hogar,Detergente y suavizante ropa,Quitamanchas,293
293,Limpieza y hogar,Detergente y suavizante ropa,Activador y antical lavadora,294
294,Limpieza y hogar,Detergente y suavizante ropa,Suavizante,295
295,Limpieza y hogar,Detergente y suavizante ropa,Planchado,296
296,Limpieza y hogar,"Estropajo, bayeta y guantes",Estropajo,297
297,Limpieza y hogar,"Estropajo, bayeta y guantes",Bayeta,298
298,Limpieza y hogar,"Estropajo, bayeta y guantes",Guantes,299


In [5271]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'limpieza_y_hogar'].head(60)

Unnamed: 0,category,subcategory,subsubcategory
22,limpieza_y_hogar,productos_para_toda_la_casa,insecticidas
30,limpieza_y_hogar,productos_para_toda_la_casa,lejias_y_amoniacos
35,limpieza_y_hogar,papel_higienico_y_celulosa,
38,limpieza_y_hogar,ambientadores,electricos
47,limpieza_y_hogar,utensilios_de_limpieza_y_calzado,
48,limpieza_y_hogar,productos_para_toda_la_casa,suelos
67,limpieza_y_hogar,limpieza_muebles_y_multiusos,
74,limpieza_y_hogar,calzado,crema
137,limpieza_y_hogar,pilas_y_bolsas_de_basura,
149,limpieza_y_hogar,detergente_y_suavizante_ropa,


In [5272]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pilas_y_bolsas_de_basura']

Unnamed: 0,category,subcategory,subsubcategory
137,limpieza_y_hogar,pilas_y_bolsas_de_basura,


In [5273]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'estropajo_bayeta_y_guantes']

Unnamed: 0,category,subcategory,subsubcategory
402,limpieza_y_hogar,estropajo_bayeta_y_guantes,


In [5274]:
df_mercadona[df_mercadona['name'].str.contains('bombilla', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id


In [5275]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'limpieza_y_hogar']['subcategory'].unique()

array(['productos_para_toda_la_casa', 'papel_higienico_y_celulosa',
       'ambientadores', 'utensilios_de_limpieza_y_calzado',
       'limpieza_muebles_y_multiusos', 'calzado',
       'pilas_y_bolsas_de_basura', 'detergente_y_suavizante_ropa',
       'bazar', 'menaje_y_conservacion_de_alimentos', 'limpiacristales',
       'utensilios_de_limpieza', 'papeleria', 'papel_y_celulosa',
       'limpieza_vajilla', 'cuidado_de_la_ropa',
       'insecticida_y_ambientador', 'estropajo_bayeta_y_guantes',
       'menaje', 'productos_para_bano', 'productos_para_cocina',
       'lejia_y_liquidos_fuertes', 'limpieza_bano_y_wc',
       'conservacion_de_alimentos', 'limpiahogar_y_friegasuelos',
       'limpieza_cocina'], dtype=object)

In [5276]:
def clasificar_category_limpieza(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat in ['limpieza_y_hogar|conservacion_de_alimentos|bolsas', 'limpieza_y_hogar|conservacion_de_alimentos|papel_de_aluminio',
                  'limpieza_y_hogar|conservacion_de_alimentos|film_transparente']:
        return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Papel y bolsas de conservación')


    if subcat == 'limpieza_y_hogar|menaje|hermeticos':
        return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Herméticos y moldes')

    if subcat in ['limpieza_y_hogar|menaje|vajillas_y_vasos', 'limpieza_y_hogar|menaje|jarras_y_filtros_de_agua']:
        return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Cubiertos, vajilla y mantel')


    if subcat == 'limpieza_y_hogar|menaje|utensilios_de_cocina':
        if 'papel' in name or 'bolsas' in name or 'film' in name:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Papel y bolsas de conservación')
        else:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Encendedores, velas y carbón')


    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|limpiametales', 'limpieza_y_hogar|productos_para_toda_la_casa|limpiador_de_alfombras_y_tapicerias']:
        return ('Limpieza y hogar', 'Limpieza muebles y multiusos', 'Multiusos y otros')


    if subcat == 'limpieza_y_hogar|pilas_y_bolsas_de_basura':
        if 'bolsas' in name:
            return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Bolsas de basura')
        else:
            return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Pilas')

    if subcat == 'limpieza_y_hogar|bazar|pilas':
        return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Pilas')


    if subcat == 'limpieza_y_hogar|menaje_y_conservacion_de_alimentos':
        if 'papel' in name or 'bolsas' in name or 'film' in name:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Papel y bolsas de conservación')
        elif 'hermético' in name or 'molde' in name or 'recipiente' in name:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Herméticos y moldes')
        elif any(x in name for x in ['vasos', 'vaso', 'plato', 'bol', 'cucharas', 'tenedores', 'bandeja', 'mantel', 'palillos', 'pajitas', 'cuchillos']):
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Cubiertos, vajilla y mantel')
        else:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Encendedores, velas y carbón')


    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|limpia_muebles', 'limpieza_y_hogar|limpieza_muebles_y_multiusos']:
        if 'muebles' in name:
            return ('Limpieza y hogar', 'Limpieza muebles y multiusos', 'Limpieza muebles')
        else:
            return ('Limpieza y hogar', 'Limpieza muebles y multiusos', 'Multiusos y otros')


    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|limpiacristales_y_multiusos', 'limpieza_y_hogar|limpiacristales']:
        return ('Limpieza y hogar', 'Limpiacristales', 'Limpiacristales')


    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|suelos', 'limpieza_y_hogar|limpiahogar_y_friegasuelos']:
        if 'friegasuelos' in name or 'suelo' in name:
            return ('Limpieza y hogar', 'Limpiahogar y friegasuelos', 'Friegasuelos')
        else:
            return ('Limpieza y hogar', 'Limpiahogar y friegasuelos', 'Limpiahogar')


    if subcat == 'limpieza_y_hogar|estropajo_bayeta_y_guantes':
        if 'pila' in name or 'pilas' in name:
            return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Pilas')
        elif 'guantes' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Guantes')
        elif 'estropajo' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Estropajo')
        elif 'bayeta' in name or 'paño' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Bayeta')
        else:
          return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Otros utensilios de limpieza')

    return (pd.NA, pd.NA, pd.NA)

In [5277]:
datamarket_update('limpieza_y_hogar|bazar|pilas', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 154 to 4909
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5278]:
datamarket_update('limpieza_y_hogar|conservacion_de_alimentos|bolsas', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 861 to 4292
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5279]:
datamarket_update('limpieza_y_hogar|conservacion_de_alimentos|papel_de_aluminio', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1624 to 4989
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5280]:
datamarket_update('limpieza_y_hogar|conservacion_de_alimentos|film_transparente', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2031 to 4870
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5281]:
datamarket_update('limpieza_y_hogar|menaje|hermeticos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 3868 to 4443
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5282]:
datamarket_update('limpieza_y_hogar|menaje|vajillas_y_vasos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1098 to 4192
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5283]:
datamarket_update('limpieza_y_hogar|menaje|jarras_y_filtros_de_agua', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 565 to 3899
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5284]:
datamarket_update('limpieza_y_hogar|menaje|utensilios_de_cocina', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1669 to 2444
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5285]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|limpiametales', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 490 to 490
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5286]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|limpiador_de_alfombras_y_tapicerias', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3064 to 4023
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5287]:
datamarket_update('limpieza_y_hogar|pilas_y_bolsas_de_basura', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 137 to 2232
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5288]:
datamarket_update('limpieza_y_hogar|menaje_y_conservacion_de_alimentos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 157 to 4692
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                17 non-null     object 
 5   trademark                  18 non-null     object 
 6   trademark_propietary_flag  18 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 1

In [5289]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|limpia_muebles', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2192 to 4796
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5290]:
datamarket_update('limpieza_y_hogar|limpieza_muebles_y_multiusos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 67 to 4804
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         11 non-null     int64  
 1   supermarket                11 non-null     object 
 2   brand_category             11 non-null     object 
 3   name                       11 non-null     object 
 4   description                11 non-null     object 
 5   trademark                  11 non-null     object 
 6   trademark_propietary_flag  11 non-null     object 
 7   price                      11 non-null     float64
 8   reference_price            11 non-null     float64
 9   reference_unit             11 non-null     object 
 10  insert_date                11 non-null     object 
 11  price_corrected            11 non-null     bool   
 12  reference_price_corrected  11 non-null     bool   
 13  category_name              11 non-null     object 
 14

In [5291]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|limpiacristales_y_multiusos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 463 to 4366
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5292]:
datamarket_update('limpieza_y_hogar|limpiacristales', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 185 to 2625
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                2 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5293]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|suelos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 48 to 3346
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [5294]:
datamarket_update('limpieza_y_hogar|limpiahogar_y_friegasuelos', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1253 to 4976
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                8 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

In [5295]:
datamarket_update('limpieza_y_hogar|estropajo_bayeta_y_guantes', clasificar_category_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 402 to 4412
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5296]:
def clasificar_limpieza(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['limpieza_y_hogar|calzado|limpiador_liquido', 'limpieza_y_hogar|calzado|plantillas_de_calzado',
                  'limpieza_y_hogar|calzado|crema', 'limpieza_y_hogar|utensilios_de_limpieza_y_calzado']:
        return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Limpieza de calzado')

    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza|bolsas_de_basura':
        return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Bolsas de basura')

    if subcat in ['limpieza_y_hogar|limpieza_bano_y_wc', 'limpieza_y_hogar|productos_para_bano|wc',
                  'limpieza_y_hogar|productos_para_bano|desatascadores_y_limpia_tuberias', 'limpieza_y_hogar|productos_para_bano|limpiadores_antical_bano',
                  'limpieza_y_hogar|productos_para_bano|limpiadores_bano', 'limpieza_y_hogar|productos_para_bano|limpiajuntas']:
        return ('Limpieza y hogar', 'Limpieza baño y WC', 'Limpieza baño y WC')

    if subcat in ['limpieza_y_hogar|limpieza_cocina', 'limpieza_y_hogar|productos_para_cocina|quitagrasas',
                  'limpieza_y_hogar|productos_para_cocina|aditivos_y_limpiamaquinas', 'limpieza_y_hogar|productos_para_cocina|lavavajillas_a_maquina',
                  'limpieza_y_hogar|productos_para_cocina|vitroceramicas_e_induccion', 'limpieza_y_hogar|productos_para_cocina|limpiadores_electrodomesticos_cocina']:
        return ('Limpieza y hogar', 'Limpieza cocina', 'Limpieza cocina')


    if subcat == 'limpieza_y_hogar|detergente_y_suavizante_ropa':
        if 'manchas' in name or 'eliminador' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Quitamanchas')
        elif 'suavizante' in name or 'perfumador' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Suavizante')
        elif 'planchado' in name or 'plancha' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Planchado')
        elif 'activador' in name or 'antical' in name or 'lavadora' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Activador y antical lavadora')
        elif 'líquido' in name or 'gel' in name and 'detergente' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente líquido y gel')
        elif 'a mano' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente lavado a mano')
        else:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente en polvo y monodosis')

    if subcat in ['limpieza_y_hogar|cuidado_de_la_ropa|aditivos_y_quitamanchas', 'limpieza_y_hogar|cuidado_de_la_ropa|toallitas_atrapacolores',
                  'limpieza_y_hogar|cuidado_de_la_ropa|tinte_para_la_ropa']:
        return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Quitamanchas')

    if subcat == 'limpieza_y_hogar|cuidado_de_la_ropa|suavizantes':
        return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Suavizante')

    if subcat == 'limpieza_y_hogar|cuidado_de_la_ropa|tendido_y_planchado':
        return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Planchado')

    if subcat in ['limpieza_y_hogar|cuidado_de_la_ropa|limpiadores_y_antical_para_lavadora',  'limpieza_y_hogar|cuidado_de_la_ropa|lejias_lavadora']:
        return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Activador y antical lavadora')

    if subcat == 'limpieza_y_hogar|cuidado_de_la_ropa|detergentes':
        if 'líquido' in name or 'gel' in name and 'detergente' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente líquido y gel')
        elif 'a mano' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente lavado a mano')
        else:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente en polvo y monodosis')


    if subcat in ['limpieza_y_hogar|limpieza_vajilla', 'limpieza_y_hogar|productos_para_cocina|lavavajillas_a_maquina']:
        if 'pastillas' in name or 'máquina' in name:
            return ('Limpieza y hogar', 'Limpieza vajilla', 'Limpiavajilla a máquina')
        else:
            return ('Limpieza y hogar', 'Limpieza vajilla', 'Limpiavajilla a mano')


    if subcat == 'limpieza_y_hogar|papel_y_celulosa|panuelos':
        return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Pañuelos')

    if subcat == 'limpieza_y_hogar|papel_y_celulosa|papel_higienico':
        return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Papel higiénico')

    if subcat == 'limpieza_y_hogar|papel_y_celulosa|toallitas_gafas':
        return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Toallitas')

    if subcat == 'limpieza_y_hogar|papel_higienico_y_celulosa':
        if 'servilletas' in name or 'servilleta' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Servilletas')
        elif 'multiusos' in name or 'cocina' in name or 'hogar' in name and 'papel' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Rollo cocina')
        elif 'pañuelos' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Pañuelos')
        elif 'papel higiénico' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Papel higiénico')
        else:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Toallitas')


    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza_y_calzado':
        if 'cubo' in name or 'escurridor' in name or 'barreño' in name:
            return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Cubos y barreños')
        elif 'fregona' in name or 'escoba' in name or 'mopa' in name:
            return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Fregonas, escobas y mopas')
        else:
            return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Otros utensilios de limpieza')

    if subcat in ['limpieza_y_hogar|utensilios_de_limpieza|fregonas', 'limpieza_y_hogar|utensilios_de_limpieza|escobas_mopas_y_recogedores']:
        return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Fregonas, escobas y mopas')

    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza|cubos_de_basura':
        return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Cubos y barreños')

    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza|estropajos':
        return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Estropajo')

    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza|bayetas_microfibra_atrapapolvo':
        return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Bayeta')

    if subcat == 'limpieza_y_hogar|utensilios_de_limpieza|guantes':
        return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Guantes')

    if subcat in ['limpieza_y_hogar|utensilios_de_limpieza|plumeros_rodillos_y_recambios', 'limpieza_y_hogar|utensilios_de_limpieza|otros_utiles',
                  'limpieza_y_hogar|utensilios_de_limpieza|palos_y_sujetapalos']:
        return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Otros utensilios de limpieza')


    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|lejias_y_amoniacos', 'limpieza_y_hogar|lejia_y_liquidos_fuertes']:
        if 'lejía' in name:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Lejía')
        elif 'piscina' in name:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Tratamiento piscina')
        else:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Amoníaco y salfumán')

    if subcat == 'limpieza_y_hogar|ambientadores|aerosol_o_pistola':
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador spray')

    if subcat == 'limpieza_y_hogar|ambientadores|coche':
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador coche')

    if subcat == 'limpieza_y_hogar|ambientadores|electricos':
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador eléctrico')

    if subcat == 'limpieza_y_hogar|ambientadores|automaticos':
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador automático')

    if subcat in ['limpieza_y_hogar|ambientadores|antihumedad', 'limpieza_y_hogar|ambientadores|absorbeolores']:
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Absorbeolores y antihumedad')

    if subcat in ['limpieza_y_hogar|ambientadores|decorativos', 'limpieza_y_hogar|ambientadores|un_toque']:
        return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador decorativo y otros')

    if subcat in ['limpieza_y_hogar|productos_para_toda_la_casa|insecticidas', 'limpieza_y_hogar|insecticida_y_ambientador']:
        if 'spray' in name and 'insecticida' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Insecticida spray')
        elif 'insecticida' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Insecticida eléctrico y otros')
        elif 'spray' in name and 'ambientador' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador spray')
        elif 'coche' in name or 'auto' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador coche')
        elif 'eléctrico' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador eléctrico')
        elif 'automático' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador automático')
        elif 'absorbeolores' in name or 'antihumedad' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Absorbeolores y antihumedad')
        else:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador decorativo y otros')


    return (pd.NA, pd.NA, pd.NA)

In [5297]:
datamarket_update('limpieza_y_hogar|limpieza_bano_y_wc', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 784 to 4986
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5298]:
datamarket_update('limpieza_y_hogar|calzado|limpiador_liquido', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3521 to 3521
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5299]:
datamarket_update('limpieza_y_hogar|calzado|plantillas_de_calzado', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 570 to 3972
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5300]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza_y_calzado', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 47 to 4882
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                10 non-null     object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 14

In [5301]:
datamarket_update('limpieza_y_hogar|calzado|crema', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 74 to 2484
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14 

In [5302]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|bolsas_de_basura', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 613 to 3769
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5303]:
datamarket_update('limpieza_y_hogar|productos_para_bano|wc', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 578 to 4831
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5304]:
datamarket_update('limpieza_y_hogar|productos_para_bano|desatascadores_y_limpia_tuberias', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 681 to 2318
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5305]:
datamarket_update('limpieza_y_hogar|productos_para_bano|limpiadores_antical_bano', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1751 to 4101
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5306]:
datamarket_update('limpieza_y_hogar|productos_para_bano|limpiadores_bano', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1752 to 4217
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5307]:
datamarket_update('limpieza_y_hogar|productos_para_bano|limpiajuntas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3921 to 3921
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5308]:
datamarket_update('limpieza_y_hogar|limpieza_cocina', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 2515 to 4866
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5309]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|quitagrasas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 587 to 2374
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5310]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|aditivos_y_limpiamaquinas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 588 to 4145
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5311]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|lavavajillas_a_maquina', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 794 to 4918
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5312]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|vitroceramicas_e_induccion', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1503 to 4312
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5313]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|limpiadores_electrodomesticos_cocina', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3169 to 3169
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5314]:
datamarket_update('limpieza_y_hogar|detergente_y_suavizante_ropa', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 149 to 4577
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5315]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|aditivos_y_quitamanchas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 538 to 4232
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5316]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|toallitas_atrapacolores', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2229 to 2443
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5317]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|tinte_para_la_ropa', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 3052 to 4612
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5318]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|suavizantes', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2219 to 3523
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5319]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|tendido_y_planchado', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2820 to 4601
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5320]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|limpiadores_y_antical_para_lavadora', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 796 to 2752
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5321]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|lejias_lavadora', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 331 to 3476
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5322]:
datamarket_update('limpieza_y_hogar|cuidado_de_la_ropa|detergentes', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 849 to 4881
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5323]:
datamarket_update('limpieza_y_hogar|limpieza_vajilla', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 306 to 4950
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                9 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5324]:
datamarket_update('limpieza_y_hogar|productos_para_cocina|lavavajillas_a_maquina', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 794 to 4918
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5325]:
datamarket_update('limpieza_y_hogar|papel_y_celulosa|panuelos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 247 to 3914
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5326]:
datamarket_update('limpieza_y_hogar|papel_y_celulosa|papel_higienico', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2106 to 2680
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5327]:
datamarket_update('limpieza_y_hogar|papel_y_celulosa|toallitas_gafas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3916 to 3916
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5328]:
datamarket_update('limpieza_y_hogar|papel_higienico_y_celulosa', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 35 to 4481
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                7 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14 

In [5329]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|fregonas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 2270 to 4617
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5330]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|escobas_mopas_y_recogedores', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 200 to 3253
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5331]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|cubos_de_basura', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 260 to 4452
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5332]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|estropajos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 2580 to 4821
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5333]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|bayetas_microfibra_atrapapolvo', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1638 to 4679
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5334]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|guantes', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 434 to 4525
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5335]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|plumeros_rodillos_y_recambios', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2595 to 4140
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5336]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|otros_utiles', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 426 to 4310
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5337]:
datamarket_update('limpieza_y_hogar|utensilios_de_limpieza|palos_y_sujetapalos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1991 to 1991
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5338]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|lejias_y_amoniacos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 30 to 4336
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14 

In [5339]:
datamarket_update('limpieza_y_hogar|lejia_y_liquidos_fuertes', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 593 to 4656
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                13 non-null     object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

In [5340]:
datamarket_update('limpieza_y_hogar|ambientadores|aerosol_o_pistola', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 548 to 4104
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5341]:
datamarket_update('limpieza_y_hogar|ambientadores|coche', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 286 to 4840
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5342]:
datamarket_update('limpieza_y_hogar|ambientadores|electricos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 38 to 4518
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [5343]:
datamarket_update('limpieza_y_hogar|ambientadores|automaticos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4187 to 4187
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5344]:
datamarket_update('limpieza_y_hogar|ambientadores|antihumedad', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 742 to 742
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5345]:
datamarket_update('limpieza_y_hogar|ambientadores|absorbeolores', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1069 to 4019
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5346]:
datamarket_update('limpieza_y_hogar|ambientadores|decorativos', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1100 to 3065
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5347]:
datamarket_update('limpieza_y_hogar|ambientadores|un_toque', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 562 to 4480
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5348]:
datamarket_update('limpieza_y_hogar|productos_para_toda_la_casa|insecticidas', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 22 to 3870
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14 

In [5349]:
datamarket_update('limpieza_y_hogar|insecticida_y_ambientador', clasificar_limpieza)

<class 'pandas.core.frame.DataFrame'>
Index: 24 entries, 383 to 4719
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         24 non-null     int64  
 1   supermarket                24 non-null     object 
 2   brand_category             24 non-null     object 
 3   name                       24 non-null     object 
 4   description                23 non-null     object 
 5   trademark                  24 non-null     object 
 6   trademark_propietary_flag  24 non-null     object 
 7   price                      24 non-null     float64
 8   reference_price            24 non-null     float64
 9   reference_unit             24 non-null     object 
 10  insert_date                24 non-null     object 
 11  price_corrected            24 non-null     bool   
 12  reference_price_corrected  24 non-null     bool   
 13  category_name              24 non-null     object 
 1

# Procesamiento de la categoría "cuidado_del_hogar"

In [5350]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'cuidado_del_hogar']

Unnamed: 0,category,subcategory,subsubcategory
7,cuidado_del_hogar,utensilios_de_limpieza,
19,cuidado_del_hogar,lavavajillas,
31,cuidado_del_hogar,productos_de_limpieza,bano
117,cuidado_del_hogar,conservacion_de_alimentos,
144,cuidado_del_hogar,cuidado_de_la_ropa,
183,cuidado_del_hogar,ambientadores,
347,cuidado_del_hogar,productos_de_limpieza,hogar
457,cuidado_del_hogar,papel,
541,cuidado_del_hogar,insecticidas,
735,cuidado_del_hogar,productos_de_limpieza,cocina


In [5351]:
df_datamarket[df_datamarket['brand_category'] == 'cuidado_del_hogar|papel']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
457,25864965,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA servilleta suave 2 capas paquete 100 uds,,dia,True,1.69,1.69,ud,2023-03-15,False,False,,,
520,25864961,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA papel higiénico 2 capas paquete 12 uds,,dia,True,2.99,0.25,ud,2023-03-15,False,False,,,
1091,25864981,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA servilleta 100% reciclada 2 capas paquete 50 uds,,dia,True,1.69,1.69,ud,2023-03-15,False,False,,,
1222,25864978,dia.es,cuidado_del_hogar|papel,AMOOS papel multiusos Jumbo 2 capas 1 rollo,,amoos,False,2.99,2.99,ud,2023-03-15,False,False,,,
1876,25864957,dia.es,cuidado_del_hogar|papel,SCOTTEX papel higiénico original paquete 12 uds,,scottex,False,4.29,0.36,ud,2023-03-15,False,False,,,
2428,25864973,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA papel de cocina compacto 2 capas paquete 3 uds,,dia,True,2.49,0.83,ud,2023-03-15,False,False,,,
2511,25864964,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA servilletas blancas 1 capa paquete 200 uds,,dia,True,1.49,1.49,ud,2023-03-15,False,False,,,
3019,25864962,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA papel de cocina multiusos rollo 1 ud,,dia,True,3.59,3.59,ud,2023-03-15,False,False,,,
3607,25864971,dia.es,cuidado_del_hogar|papel,RENOVA papel de cocina olé paquete 2 uds,,renova,False,1.29,0.65,ud,2023-03-15,False,False,,,
4302,25864984,dia.es,cuidado_del_hogar|papel,DIA LA LLAMA papel higiénico reciclado 2 capas paquete 6 uds,,dia,True,3.25,0.54,ud,2023-03-15,False,False,,,


In [5352]:
def clasificar_hogar(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'cuidado_del_hogar|calzado':
        return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Limpieza de calzado')

    if subcat == 'cuidado_del_hogar|productos_de_limpieza|bano':
        return ('Limpieza y hogar', 'Limpieza baño y WC', 'Limpieza baño y WC')

    if subcat == 'cuidado_del_hogar|productos_de_limpieza|cocina':
        return ('Limpieza y hogar', 'Limpieza cocina', 'Limpieza cocina')

    if subcat == 'cuidado_del_hogar|cuidado_de_la_ropa':
        if 'manchas' in name or 'eliminador' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Quitamanchas')
        elif 'suavizante' in name or 'perfumador' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Suavizante')
        elif 'planchado' in name or 'plancha' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Planchado')
        elif 'activador' in name or 'antical' in name or 'lavadora' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Activador y antical lavadora')
        elif 'líquido' in name or 'gel' in name and 'detergente' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente líquido y gel')
        elif 'a mano' in name:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente lavado a mano')
        else:
            return ('Limpieza y hogar', 'Detergente y suavizante ropa', 'Detergente en polvo y monodosis')

    if subcat == 'cuidado_del_hogar|lavavajillas':
        if 'pastillas' in name or 'máquina' in name:
            return ('Limpieza y hogar', 'Limpieza vajilla', 'Limpiavajilla a máquina')
        else:
            return ('Limpieza y hogar', 'Limpieza vajilla', 'Limpiavajilla a mano')

    if subcat == 'cuidado_del_hogar|papel':
        if 'servilletas' in name or 'servilleta' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Servilletas')
        elif 'multiusos' in name or 'cocina' in name or 'hogar' in name and 'papel' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Rollo cocina')
        elif 'pañuelos' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Pañuelos')
        elif 'papel higiénico' in name:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Papel higiénico')
        else:
            return ('Limpieza y hogar', 'Papel higiénico y celulosa', 'Toallitas')

    if subcat == 'cuidado_del_hogar|utensilios_de_limpieza':
        if 'cubo' in name or 'escurridor' in name or 'barreño' in name:
            return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Cubos y barreños')
        elif 'fregona' in name or 'escoba' in name or 'mopa' in name:
            return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Fregonas, escobas y mopas')
        elif 'guantes' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Guantes')
        elif 'estropajo' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Estropajo')
        elif 'bayeta' in name or 'paño' in name:
            return ('Limpieza y hogar', 'Estropajo, bayeta y guantes', 'Bayeta')
        else:
          return ('Limpieza y hogar', 'Utensilios de limpieza y calzado', 'Otros utensilios de limpieza')

    if subcat == 'cuidado_del_hogar|productos_de_limpieza|hogar':
        if 'lejía' in name:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Lejía')
        elif 'piscina' in name:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Tratamiento piscina')
        elif 'amoníaco' in name or 'salfumán' in name:
            return ('Limpieza y hogar', 'Lejía y líquidos fuertes', 'Amoníaco y salfumán')
        elif 'friegasuelos' in name or 'suelo' in name:
            return ('Limpieza y hogar', 'Limpiahogar y friegasuelos', 'Friegasuelos')
        elif 'limpiacristales' in name:
            return ('Limpieza y hogar', 'Limpiacristales', 'Limpiacristales')
        elif 'muebles' in name:
            return ('Limpieza y hogar', 'Limpieza muebles y multiusos', 'Limpieza muebles')
        elif 'alfombras' in name or 'tapicerias' in name:
            return ('Limpieza y hogar', 'Limpieza muebles y multiusos', 'Multiusos y otros')
        else:
            return ('Limpieza y hogar', 'Limpiahogar y friegasuelos', 'Limpiahogar')

    if subcat == 'cuidado_del_hogar|conservacion_de_alimentos':
        return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Papel y bolsas de conservación')

    if subcat in 'cuidado_del_hogar|insecticidas':
        if 'spray' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Insecticida spray')
        else:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Insecticida eléctrico y otros')

    if subcat == 'cuidado_del_hogar|ambientadores':
        if 'spray' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador spray')
        elif 'coche' in name or 'auto' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador coche')
        elif 'eléctrico' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador eléctrico')
        elif 'automático' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador automático')
        elif 'absorbeolores' in name or 'antihumedad' in name:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Absorbeolores y antihumedad')
        else:
            return ('Limpieza y hogar', 'Insecticida y ambientador', 'Ambientador decorativo y otros')

    if subcat == 'cuidado_del_hogar|bazar':
        if 'bolsas' in name and 'basura' in name:
            return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Bolsas de basura')
        if 'pila' in name:
            return ('Limpieza y hogar', 'Pilas y bolsas de basura', 'Pilas')
        if 'papel' in name or 'bolsas' in name or 'film' in name:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Papel y bolsas de conservación')
        elif 'hermético' in name or 'molde' in name or 'recipiente' in name:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Herméticos y moldes')
        elif any(x in name for x in ['vasos', 'vaso', 'plato', 'bol', 'cucharas', 'tenedores', 'bandeja', 'mantel', 'palillos', 'pajitas', 'cuchillos']):
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Cubiertos, vajilla y mantel')
        else:
            return ('Limpieza y hogar', 'Menaje y conservación de alimentos', 'Encendedores, velas y carbón')

    return (pd.NA, pd.NA, pd.NA)

In [5353]:
datamarket_update('cuidado_del_hogar|calzado', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1252 to 3857
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5354]:
datamarket_update('cuidado_del_hogar|productos_de_limpieza|bano', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 31 to 4738
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 14

In [5355]:
datamarket_update('cuidado_del_hogar|productos_de_limpieza|cocina', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 735 to 4651
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5356]:
datamarket_update('cuidado_del_hogar|cuidado_de_la_ropa', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 35 entries, 144 to 4998
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         35 non-null     int64  
 1   supermarket                35 non-null     object 
 2   brand_category             35 non-null     object 
 3   name                       35 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  35 non-null     object 
 6   trademark_propietary_flag  35 non-null     object 
 7   price                      35 non-null     float64
 8   reference_price            35 non-null     float64
 9   reference_unit             35 non-null     object 
 10  insert_date                35 non-null     object 
 11  price_corrected            35 non-null     bool   
 12  reference_price_corrected  35 non-null     bool   
 13  category_name              35 non-null     object 
 1

In [5357]:
datamarket_update('cuidado_del_hogar|lavavajillas', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 19 to 4737
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         27 non-null     int64  
 1   supermarket                27 non-null     object 
 2   brand_category             27 non-null     object 
 3   name                       27 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  27 non-null     object 
 6   trademark_propietary_flag  27 non-null     object 
 7   price                      27 non-null     float64
 8   reference_price            27 non-null     float64
 9   reference_unit             27 non-null     object 
 10  insert_date                27 non-null     object 
 11  price_corrected            27 non-null     bool   
 12  reference_price_corrected  27 non-null     bool   
 13  category_name              27 non-null     object 
 14

In [5358]:
datamarket_update('cuidado_del_hogar|insecticidas', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 541 to 4880
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5359]:
datamarket_update('cuidado_del_hogar|conservacion_de_alimentos', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 117 to 3864
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5360]:
datamarket_update('cuidado_del_hogar|productos_de_limpieza|hogar', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 347 to 4838
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [5361]:
datamarket_update('cuidado_del_hogar|papel', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 457 to 4723
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [5362]:
datamarket_update('cuidado_del_hogar|utensilios_de_limpieza', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, 7 to 4774
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         28 non-null     int64  
 1   supermarket                28 non-null     object 
 2   brand_category             28 non-null     object 
 3   name                       28 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      28 non-null     float64
 8   reference_price            28 non-null     float64
 9   reference_unit             28 non-null     object 
 10  insert_date                28 non-null     object 
 11  price_corrected            28 non-null     bool   
 12  reference_price_corrected  28 non-null     bool   
 13  category_name              28 non-null     object 
 14 

In [5363]:
datamarket_update('cuidado_del_hogar|ambientadores', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 183 to 4693
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  17 non-null     object 
 6   trademark_propietary_flag  17 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 1

In [5364]:
datamarket_update('cuidado_del_hogar|bazar', clasificar_hogar)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 1269 to 4511
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 1

# Procesamiento de la categoría "marisco_y_pescado"

In [5365]:
current_category = df_category[df_category["category_name"] == 'Marisco y pescado']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
349,Marisco y pescado,Marisco,Marisco,350
350,Marisco y pescado,Marisco,Marisco de concha,351
351,Marisco y pescado,Marisco,Surimi y otros,352
352,Marisco y pescado,Pescado congelado,Pescado congelado,353
353,Marisco y pescado,Pescado congelado,Pescado rebozado congelado,354
354,Marisco y pescado,Pescado congelado,"Sepia, pulpo y calamar congelado",355
355,Marisco y pescado,Pescado fresco,Salmón,356
356,Marisco y pescado,Pescado fresco,Dorada,357
357,Marisco y pescado,Pescado fresco,Lubina,358
358,Marisco y pescado,Pescado fresco,Merluza,359


In [5366]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'marisco_y_pescado']

Unnamed: 0,category,subcategory,subsubcategory
161,marisco_y_pescado,pescado_en_bandeja,
228,marisco_y_pescado,marisco,
568,marisco_y_pescado,pescado_fresco,
1037,marisco_y_pescado,salazones_y_ahumados,
1649,marisco_y_pescado,sushi,


In [5367]:
def clasificar_category_pescado(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['marisco_y_pescado|pescado_fresco', 'marisco_y_pescado|pescado_en_bandeja']:
        if 'congelado' in name and 'rebozado' in name:
            return ('Marisco y pescado', 'Pescado congelado', 'Pescado rebozado congelado')
        elif 'congelado' in name and any(x in name for x in ['pulpo', 'sepia', 'calamar', 'potón', 'pota']):
            return ('Marisco y pescado', 'Pescado congelado', 'Sepia, pulpo y calamar congelado')
        elif 'congelado' in name:
            return ('Marisco y pescado', 'Pescado congelado', 'Pescado congelado')
        elif 'salmón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Salmón')
        elif 'dorada' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Dorada')
        elif 'lubina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lubina')
        elif 'merluza' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Merluza')
        elif 'bacalao' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Bacalao')
        elif 'corvina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Corvina')
        elif 'trucha' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Trucha')
        elif 'lenguado' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lenguado')
        elif 'boquerón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Boquerón')
        elif 'rodaballo' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Rodaballo')
        elif 'sardina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Sardina')
        elif 'caballa' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Caballa')
        elif any(x in name for x in ['pulpo', 'sepia', 'calamar', 'potón', 'pota']):
            return ('Marisco y pescado', 'Pescado fresco', 'Sepia, pulpo y calamar')
        else:
            return ('Marisco y pescado', 'Pescado fresco', 'Otros')

    if subcat == 'marisco_y_pescado|marisco':
        if any(x in name for x in ['mejillón', 'almeja', 'berberechos', 'navajas', 'chirla', 'cañaílla']):
            return ('Marisco y pescado', 'Marisco', 'Marisco de concha')
        else:
            return ('Marisco y pescado', 'Marisco', 'Marisco')

    if subcat == 'marisco_y_pescado|salazones_y_ahumados':
        if any(x in name for x in ['ahumada', 'ahumado']):
            return ('Marisco y pescado', 'Salazones y ahumados', 'Ahumados')
        else:
            return ('Marisco y pescado', 'Salazones y ahumados', 'Salazones')

    if subcat == 'marisco_y_pescado|sushi':
        return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')

    return (pd.NA, pd.NA, pd.NA)

In [5368]:
datamarket_update('marisco_y_pescado|sushi', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1649 to 2191
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                4 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5369]:
datamarket_update('marisco_y_pescado|salazones_y_ahumados', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1037 to 4364
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5370]:
datamarket_update('marisco_y_pescado|marisco', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 228 to 4621
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                5 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5371]:
datamarket_update('marisco_y_pescado|pescado_fresco', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 568 to 4949
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                19 non-null     object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 1

In [5372]:
datamarket_update('marisco_y_pescado|pescado_en_bandeja', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 161 to 4325
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                12 non-null     object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [5373]:
df_datamarket.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5000 non-null   int64  
 1   supermarket                5000 non-null   object 
 2   brand_category             5000 non-null   object 
 3   name                       4999 non-null   object 
 4   description                1832 non-null   object 
 5   trademark                  4921 non-null   object 
 6   trademark_propietary_flag  4921 non-null   object 
 7   price                      5000 non-null   float64
 8   reference_price            5000 non-null   float64
 9   reference_unit             5000 non-null   object 
 10  insert_date                5000 non-null   object 
 11  price_corrected            5000 non-null   bool   
 12  reference_price_corrected  5000 non-null   bool   
 13  category_name              3368 non-null   objec

# Procesamiento de la categorias "pizzas_y_platos_preparados" y platos_preparados

In [5374]:
current_category = df_category[df_category["category_name"] == 'Pizzas y platos preparados']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
404,Pizzas y platos preparados,Listo para Comer,Platos calientes,405
405,Pizzas y platos preparados,Listo para Comer,Platos fríos,406
406,Pizzas y platos preparados,Pizzas,Pizzas refrigeradas,407
407,Pizzas y platos preparados,Pizzas,Pizzas congeladas,408
408,Pizzas y platos preparados,Pizzas,Base de pizza,409
409,Pizzas y platos preparados,Pizzas,"Roscas, quiche y baguettes",410
410,Pizzas y platos preparados,Platos preparados calientes,Pasta,411
411,Pizzas y platos preparados,Platos preparados calientes,Arroz,412
412,Pizzas y platos preparados,Platos preparados calientes,Carne,413
413,Pizzas y platos preparados,Platos preparados calientes,Tortilla,414


In [5375]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'pizzas_y_platos_preparados']

Unnamed: 0,category,subcategory,subsubcategory
110,pizzas_y_platos_preparados,platos_preparados_frios,
219,pizzas_y_platos_preparados,platos_preparados_calientes,
602,pizzas_y_platos_preparados,pizzas,


In [5376]:
def clasificar_category_preparados(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'pizzas_y_platos_preparados|pizzas':
        if 'ultracongelada' in name or 'ultracongeladas' in name:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas congeladas')
        elif any(x in name for x in ['piadinas', 'base', 'bases', 'cocas', 'masa']):
            return ('Pizzas y platos preparados', 'Pizzas', 'Base de pizza')
        else:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas refrigeradas')


    if subcat == 'pizzas_y_platos_preparados|platos_preparados_frios':
        if 'ensaladilla' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Ensaladilla')
        elif 'sándwich' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Sándwich')
        elif 'hummus' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Hummus y otros')
        elif  'gazpacho' in name or 'salmorejo' in name:
           return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Gazpacho y salmorejo')
        else:
            return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')


    if subcat == 'pizzas_y_platos_preparados|platos_preparados_calientes':
        if any(x in name for x in ['pasta oriental', 'noodles orientales', 'yakisoba', 'noodles de arroz', 	'fideos orientales', 	'fideos de arroz']):
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Fideos orientales')
        elif 'tortilla' in name:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Tortilla')
        elif 'arroz' in name or 'paella' in name:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Arroz')
        elif any(x in name for x in ['fideuá', 'fideo', 'penne', 'tortiglioni', 'macarrón', 'fusilli', 'tallarines', 'spaghetti', 'nidos', 'noodles', 'tagliatelle', 'tortellini', 'ravioli', 'pasta']):
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Pasta')
        elif any(x in name for x in ['carne', 'pollo', 'pavo', 'cerdo', 'vacuno']):
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Carne')
        else:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Otros')


    return (pd.NA, pd.NA, pd.NA)

In [5377]:
datamarket_update('pizzas_y_platos_preparados|platos_preparados_calientes', clasificar_category_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 219 to 4610
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                20 non-null     object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [5378]:
datamarket_update('pizzas_y_platos_preparados|platos_preparados_frios', clasificar_category_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 110 to 4647
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                6 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5379]:
datamarket_update('pizzas_y_platos_preparados|pizzas', clasificar_category_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 602 to 4726
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         18 non-null     int64  
 1   supermarket                18 non-null     object 
 2   brand_category             18 non-null     object 
 3   name                       18 non-null     object 
 4   description                6 non-null      object 
 5   trademark                  18 non-null     object 
 6   trademark_propietary_flag  18 non-null     object 
 7   price                      18 non-null     float64
 8   reference_price            18 non-null     float64
 9   reference_unit             18 non-null     object 
 10  insert_date                18 non-null     object 
 11  price_corrected            18 non-null     bool   
 12  reference_price_corrected  18 non-null     bool   
 13  category_name              18 non-null     object 
 1

# Procesamiento de la categoría "platos_preparados"

In [5380]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'platos_preparados']

Unnamed: 0,category,subcategory,subsubcategory
396,platos_preparados,legumbres,
514,platos_preparados,verduras,
964,platos_preparados,ensaladas,
1151,platos_preparados,carne,
1203,platos_preparados,gazpachos_y_salmorejos,
1274,platos_preparados,pizzas_refrigeradas,
1801,platos_preparados,pastas_y_arroces,
2439,platos_preparados,bocadillos_y_sandwich,
2633,platos_preparados,empanadas_empanadillas_y_hojaldres,
3780,platos_preparados,tortillas,


In [5381]:
current_category = df_category[df_category["category_name"] == 'Pizzas y platos preparados']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
404,Pizzas y platos preparados,Listo para Comer,Platos calientes,405
405,Pizzas y platos preparados,Listo para Comer,Platos fríos,406
406,Pizzas y platos preparados,Pizzas,Pizzas refrigeradas,407
407,Pizzas y platos preparados,Pizzas,Pizzas congeladas,408
408,Pizzas y platos preparados,Pizzas,Base de pizza,409
409,Pizzas y platos preparados,Pizzas,"Roscas, quiche y baguettes",410
410,Pizzas y platos preparados,Platos preparados calientes,Pasta,411
411,Pizzas y platos preparados,Platos preparados calientes,Arroz,412
412,Pizzas y platos preparados,Platos preparados calientes,Carne,413
413,Pizzas y platos preparados,Platos preparados calientes,Tortilla,414


In [5382]:
df_mercadona[df_mercadona['name'].str.contains('empanadilla', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1960,Empanadillas de atún Hacendado ultracongeladas,Caja 0.5 kg,2.0,4.0,kg,Pescado rebozado,Congelados,Rebozados,False,183
3976,Empanadillas de atún Hacendado ultracongeladas,Caja 0.5 kg,2.0,4.0,kg,Pescado rebozado congelado,Marisco y pescado,Pescado congelado,False,354
4199,6 Mini empanadillas de pisto,Bolsa 0.2 kg,1.75,8.839,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4204,Empanadilla de tomate 22%,Pieza 0.12 kg,1.4,11.667,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4205,Empanadilla de espinacas 26%,Pieza 0.12 kg,1.4,11.667,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4206,Empanadilla de guisantes 13%,Pieza 0.1 kg,1.4,14.0,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4207,Empanadilla de pollo 10% y bechamel,Pieza 0.098 kg,1.3,13.266,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4208,Empanadilla de atún 16%,Pieza 0.125 kg,1.3,10.4,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383


In [5383]:
def clasificar_preparados(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'platos_preparados|gazpachos_y_salmorejos':
        return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Gazpacho y salmorejo')

    if subcat == 'platos_preparados|pizzas_refrigeradas':
        return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas refrigeradas')

    if subcat == 'platos_preparados|bocadillos_y_sandwich':
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Sándwich')

    if subcat == 'platos_preparados|empanadas_empanadillas_y_hojaldres':
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería salada')

    if subcat == 'platos_preparados|pastas_y_arroces':
        if any(x in name for x in ['pasta oriental', 'noodles orientales', 'yakisoba', 'noodles de arroz', 	'fideos orientales', 	'fideos de arroz']):
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Fideos orientales')
        elif 'sushi' in name or 'poke' in name:
            return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')
        elif 'arroz' in name or 'paella' in name:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Arroz')
        else:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Pasta')

    if subcat == 'platos_preparados|tortillas':
        return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Tortilla')

    if subcat == 'platos_preparados|carne':
        return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Carne')

    if subcat == 'platos_preparados|ensaladas':
        return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Ensaladilla')


    if subcat in ['platos_preparados|ensaladas', 'platos_preparados|gazpachos_y_salmorejos',
                  'platos_preparados|bocadillos_y_sandwich', 'platos_preparados|empanadas_empanadillas_y_hojaldres']:
        return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')

    if subcat in ['platos_preparados|carne', 'platos_preparados|tortillas',
                  'platos_preparados|pastas_y_arroces', 'platos_preparados|legumbres',
                  'platos_preparados|verduras']:
        return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos calientes')

    if subcat in ['platos_preparados|legumbres', 'platos_preparados|verduras']:
        return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Otros')



    return (pd.NA, pd.NA, pd.NA)

In [5384]:
datamarket_update('platos_preparados|gazpachos_y_salmorejos', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1203 to 3299
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5385]:
datamarket_update('platos_preparados|pizzas_refrigeradas', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1274 to 4937
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5386]:
datamarket_update('platos_preparados|bocadillos_y_sandwich', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2439 to 4051
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5387]:
datamarket_update('platos_preparados|empanadas_empanadillas_y_hojaldres', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2633 to 4823
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5388]:
datamarket_update('platos_preparados|pastas_y_arroces', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1801 to 3544
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5389]:
datamarket_update('platos_preparados|tortillas', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 3780 to 4815
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5390]:
datamarket_update('platos_preparados|carne', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1151 to 3706
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5391]:
datamarket_update('platos_preparados|ensaladas', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 964 to 964
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5392]:
datamarket_update('platos_preparados|legumbres', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 396 to 4933
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5393]:
datamarket_update('platos_preparados|verduras', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 514 to 1967
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

# Procesamiento de la categoría "maquillaje"

In [5394]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'maquillaje']

Unnamed: 0,category,subcategory,subsubcategory
66,maquillaje,labios,
73,maquillaje,colorete_y_polvos,
188,maquillaje,ojos,
333,maquillaje,bases_de_maquillaje_y_corrector,
1051,maquillaje,pinceles_y_brochas,


In [5395]:
current_category = df_category[df_category["category_name"] == 'Maquillaje']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
334,Maquillaje,Bases de maquillaje y corrector,Maquillaje fluido,335
335,Maquillaje,Bases de maquillaje y corrector,Maquillaje compacto,336
336,Maquillaje,Bases de maquillaje y corrector,Correctores y prebase,337
337,Maquillaje,Colorete y polvos,Colorete,338
338,Maquillaje,Colorete y polvos,Polvos,339
339,Maquillaje,Labios,Cuidado,340
340,Maquillaje,Labios,Perfilador labios,341
341,Maquillaje,Labios,Pintalabios fijo,342
342,Maquillaje,Labios,Pintalabios cremoso y brillos,343
343,Maquillaje,Labios,Pintalabios mate,344


In [5396]:
df_mercadona[df_mercadona['subcategory_2_nivel_name'] == 'Maquillaje compacto']

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
3710,Maquillaje base Compacta Deliplus 01 beige,None 0.011 kg,5.5,5.5,ud,Maquillaje compacto,Maquillaje,Bases de maquillaje y corrector,False,336
3711,Maquillaje base Compacta Deliplus 02 beige rosado,None 0.011 kg,5.5,5.5,ud,Maquillaje compacto,Maquillaje,Bases de maquillaje y corrector,False,336
3712,Maquillaje base Compacta Deliplus 03 beige dorado,None 0.011 kg,5.5,5.5,ud,Maquillaje compacto,Maquillaje,Bases de maquillaje y corrector,False,336
3713,Maquillaje base Compacta Deliplus 04 beige oscuro,None 0.011 kg,5.5,5.5,ud,Maquillaje compacto,Maquillaje,Bases de maquillaje y corrector,False,336


In [5397]:
def clasificar_maquillaje(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'maquillaje|pinceles_y_brochas':
        return ('Maquillaje', 'Pinceles y brochas', 'Pinceles y brochas')

    if subcat == 'maquillaje|labios':
        if 'pintalabios' in name and 'mate' in name:
            return ('Maquillaje', 'Labios', 'Pintalabios mate')
        elif 'pintalabios' in name and ('brillo' in name or 'bálsamo' in name or 'creamy' in name):
            return ('Maquillaje', 'Labios', 'Pintalabios cremoso y brillos')
        elif 'fix' in name or 'fijo' in name:
            return ('Maquillaje', 'Labios', 'Pintalabios fijo')
        elif 'perfilador' in name:
            return ('Maquillaje', 'Labios', 'Perfilador labios')
        else:
            return ('Maquillaje', 'Labios', 'Cuidado')

    if subcat == 'maquillaje|colorete_y_polvos':
        if 'polvo' in name:
            return ('Maquillaje', 'Colorete y polvos', 'Polvos')
        else:
            return ('Maquillaje', 'Colorete y polvos', 'Colorete')

    if subcat == 'maquillaje|ojos':
        if 'perfilador' in name:
            return ('Maquillaje', 'Ojos', 'Perfilador de ojos')
        elif 'pestañas' in name:
            return ('Maquillaje', 'Ojos', 'Máscara de pestañas')
        elif 'sombra' in name:
            return ('Maquillaje', 'Ojos', 'Sombra de ojos')
        else:
            return ('Maquillaje', 'Ojos', 'Cejas')

    if subcat == 'maquillaje|bases_de_maquillaje_y_corrector':
        if 'corrector' in name or 'prebase' in name or 'iluminador' in name:
            return ('Maquillaje', 'Bases de maquillaje y corrector', 'Correctores y prebase')
        elif 'compacta' in name:
            return ('Maquillaje', 'Bases de maquillaje y corrector', 'Maquillaje compacto')
        else:
            return ('Maquillaje', 'Bases de maquillaje y corrector', 'Maquillaje fluido')



    return (pd.NA, pd.NA, pd.NA)

In [5398]:
datamarket_update('maquillaje|bases_de_maquillaje_y_corrector', clasificar_maquillaje)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 333 to 4817
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 1

In [5399]:
datamarket_update('maquillaje|ojos', clasificar_maquillaje)

<class 'pandas.core.frame.DataFrame'>
Index: 17 entries, 188 to 4887
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         17 non-null     int64  
 1   supermarket                17 non-null     object 
 2   brand_category             17 non-null     object 
 3   name                       17 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  17 non-null     object 
 6   trademark_propietary_flag  17 non-null     object 
 7   price                      17 non-null     float64
 8   reference_price            17 non-null     float64
 9   reference_unit             17 non-null     object 
 10  insert_date                17 non-null     object 
 11  price_corrected            17 non-null     bool   
 12  reference_price_corrected  17 non-null     bool   
 13  category_name              17 non-null     object 
 1

In [5400]:
datamarket_update('maquillaje|colorete_y_polvos', clasificar_maquillaje)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 73 to 3754
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14 

In [5401]:
datamarket_update('maquillaje|labios', clasificar_maquillaje)

<class 'pandas.core.frame.DataFrame'>
Index: 34 entries, 66 to 4966
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         34 non-null     int64  
 1   supermarket                34 non-null     object 
 2   brand_category             34 non-null     object 
 3   name                       34 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  34 non-null     object 
 6   trademark_propietary_flag  34 non-null     object 
 7   price                      34 non-null     float64
 8   reference_price            34 non-null     float64
 9   reference_unit             34 non-null     object 
 10  insert_date                34 non-null     object 
 11  price_corrected            34 non-null     bool   
 12  reference_price_corrected  34 non-null     bool   
 13  category_name              34 non-null     object 
 14

In [5402]:
datamarket_update('maquillaje|pinceles_y_brochas', clasificar_maquillaje)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1051 to 2838
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

# Procesamiento de la categoría "la_despensa"

### Subcategoría "Alimentacíon"

In [5403]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'la_despensa']

Unnamed: 0,category,subcategory,subsubcategory
13,la_despensa,alimentacion,arroz_y_cous_cous
18,la_despensa,panaderia_bolleria_y_pasteleria,masa_fresca_y_bases
62,la_despensa,alimentacion,vitaminas_y_complementos
119,la_despensa,aperitivos,snacks
146,la_despensa,lacteos,bebida_vegetal
...,...,...,...
3493,la_despensa,alimentacion,vino_de_cocinar
3765,la_despensa,dulce_y_desayuno,dulces_y_golosinas
3844,la_despensa,conservas_sopas_y_precocinados,conservas_de_carne
4335,la_despensa,conservas_sopas_y_precocinados,conservas_de_frutas


In [5404]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'alimentacion']

Unnamed: 0,category,subcategory,subsubcategory
13,la_despensa,alimentacion,arroz_y_cous_cous
62,la_despensa,alimentacion,vitaminas_y_complementos
448,la_despensa,alimentacion,sales_y_bicarbonatos
736,la_despensa,alimentacion,aceites_y_vinagres
859,la_despensa,alimentacion,salsas_y_tomate_frito
1366,la_despensa,alimentacion,pastas
1527,la_despensa,alimentacion,semillas
1691,la_despensa,alimentacion,legumbres
1708,la_despensa,alimentacion,harinas_y_levaduras
2317,la_despensa,alimentacion,nutricion_deportiva


In [5405]:
current_category = df_category[df_category["category_name"] == "Aceite, especias y salsas"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
0,"Aceite, especias y salsas","Aceite, vinagre y sal",Aceite de oliva,1
1,"Aceite, especias y salsas","Aceite, vinagre y sal","Aceite de girasol, semillas y maíz",2
2,"Aceite, especias y salsas","Aceite, vinagre y sal",Vinagre y otros aderezos,3
3,"Aceite, especias y salsas","Aceite, vinagre y sal",Sal y bicarbonato,4
4,"Aceite, especias y salsas",Especias,Hierbas,5
5,"Aceite, especias y salsas",Especias,Colorante y pimentón,6
6,"Aceite, especias y salsas",Especias,Pimienta,7
7,"Aceite, especias y salsas",Especias,Otras especias,8
8,"Aceite, especias y salsas",Especias,Sazonadores,9
9,"Aceite, especias y salsas","Mayonesa, ketchup y mostaza",Mayonesa,10


In [5406]:
def clasificar_aceites(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|alimentacion|aceites_y_vinagres':
        if 'oliva' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de oliva')
        elif 'girasol' in name or 'semillas' in name or 'maíz' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de girasol, semillas y maíz')
        elif 'vinagre' in name or 'limón' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Vinagre y otros aderezos')

    if subcat == 'la_despensa|alimentacion|sales_y_bicarbonatos':
        return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Sal y bicarbonato')

    if subcat == 'la_despensa|alimentacion|salsas_y_tomate_frito':
        if 'tomate' in name and 'frito' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Tomate frito')
        elif 'mayonesa' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mayonesa')
        elif 'ketchup' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Ketchup')
        elif 'mostaza' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mostaza')
        elif 'alioli' in name or 'ali-oli' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Allioli')
        elif 'soja' in name or 'teriyaki' in name or 'agridulce' in name or 'chili' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas orientales')
        elif 'barbacoa' in name or 'piri piri' in name or 'burger' in name or 'curry' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para carnes')
        elif 'fresca' in name or 'pesto' in name or 'boloñesa' in name or 'carbonara' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para pasta')
        else:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Otras salsas')

    if subcat == 'la_despensa|alimentacion|especias_y_sazonadores':
        if 'sal' in name or 'bicarbonato' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Sal y bicarbonato')
        elif 'pimienta' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Pimienta')
        elif 'pimentón' in name or 'colorante' in name or 'azafrán' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Colorante y pimentón')
        elif 'sazonador' in name or 'mezcla' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Sazonadores')
        elif any(x in name for x in ['orégano', 'perejil', 'romero', 'laurel', 'tomillo', 'cilantro', 'eneldo', 'hierbas']):
            return ('Aceite, especias y salsas', 'Especias', 'Hierbas')
        else:
            return ('Aceite, especias y salsas', 'Especias', 'Otras especias')

    if subcat == 'la_despensa|alimentacion|vino_de_cocinar':
       return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Vinagre y otros aderezos')

    return (pd.NA, pd.NA, pd.NA)

In [5407]:
datamarket_update('la_despensa|alimentacion|especias_y_sazonadores', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2701 to 2847
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5408]:
datamarket_update('la_despensa|alimentacion|aceites_y_vinagres', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 736 to 4824
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5409]:
datamarket_update('la_despensa|alimentacion|vino_de_cocinar', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3493 to 3493
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5410]:
datamarket_update('la_despensa|alimentacion|salsas_y_tomate_frito', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 859 to 2412
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5411]:
datamarket_update('la_despensa|alimentacion|sales_y_bicarbonatos', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 448 to 3340
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5412]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'alimentacion']

Unnamed: 0,category,subcategory,subsubcategory
13,la_despensa,alimentacion,arroz_y_cous_cous
62,la_despensa,alimentacion,vitaminas_y_complementos
448,la_despensa,alimentacion,sales_y_bicarbonatos
736,la_despensa,alimentacion,aceites_y_vinagres
859,la_despensa,alimentacion,salsas_y_tomate_frito
1366,la_despensa,alimentacion,pastas
1527,la_despensa,alimentacion,semillas
1691,la_despensa,alimentacion,legumbres
1708,la_despensa,alimentacion,harinas_y_levaduras
2317,la_despensa,alimentacion,nutricion_deportiva


In [5413]:
current_category = df_category[df_category["category_name"] == "Arroz, legumbres y pasta"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
41,"Arroz, legumbres y pasta",Arroz,Arroz,42
42,"Arroz, legumbres y pasta",Legumbres,Garbanzos,43
43,"Arroz, legumbres y pasta",Legumbres,Alubias,44
44,"Arroz, legumbres y pasta",Legumbres,Lentejas y otros,45
45,"Arroz, legumbres y pasta",Pasta y fideos,Fideos,46
46,"Arroz, legumbres y pasta",Pasta y fideos,"Macarrones, pajaritas y hélices",47
47,"Arroz, legumbres y pasta",Pasta y fideos,Spaghetti y tallarines,48
48,"Arroz, legumbres y pasta",Pasta y fideos,Pasta rellena,49
49,"Arroz, legumbres y pasta",Pasta y fideos,Fideos orientales,50
50,"Arroz, legumbres y pasta",Pasta y fideos,Lasaña y canelones,51


In [5414]:
def clasificar_legumbres(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|alimentacion|arroz_y_cous_cous':
        return ('Arroz, legumbres y pasta', 'Arroz', 'Arroz')

    if subcat == 'la_despensa|alimentacion|legumbres':
        if 'garbanzo' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Garbanzos')
        elif 'alubia' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Alubias')
        else:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Lentejas y otros')

    if subcat == 'la_despensa|alimentacion|pastas':
        if any(x in name for x in ['fideuá', 'fideo', 'estrellas', 'maravilla', 'piñones']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos')
        elif any(x in name for x in ['pajaritas', 'penne', 'tortiglioni', 'hélices', 'macarrón', 'fusilli', 'trottole', 'tiburón']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')
        elif any(x in name for x in ['tallarines', 'spaghetti', 'nidos', 'noodles', 'tagliatelle']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Spaghetti y tallarines')
        elif any(x in name for x in ['tortellini', 'ravioli', 'gnocchi', 'girasoles', 'medialunas']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Pasta rellena')
        elif 'orientales' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos orientales')
        elif 'canelones' in name or 'lazaña' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Lasaña y canelones')
        else:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')

    return (pd.NA, pd.NA, pd.NA)

In [5415]:
datamarket_update('la_despensa|alimentacion|pastas', clasificar_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1366 to 3643
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5416]:
datamarket_update('la_despensa|alimentacion|legumbres', clasificar_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1691 to 1691
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5417]:
datamarket_update('la_despensa|alimentacion|arroz_y_cous_cous', clasificar_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 13 to 2017
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [5418]:
df_mercadona[df_mercadona['name'].str.contains('Semillas', case=False, na=False)].head()

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
402,Mezcla semillas Hacendado,Paquete 0.2 kg,1.46,7.3,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
407,Semillas de chía Hacendado,Paquete 0.15 kg,1.36,9.067,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
413,Semillas sésamo tostado Hacendado,Paquete 0.15 kg,1.5,10.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
416,Semillas lino dorado Hacendado,Paquete 0.15 kg,1.19,7.934,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
428,"Combinado frutos secos, frutas desecadas y semillas de calabaza Hacendado",Paquete 0.2 kg,2.2,11.0,kg,Cocktails,Aperitivos,Frutos secos y fruta desecada,False,38


In [5419]:
def clasificar_semillas(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|alimentacion|semillas':
        if 'cocktail' in name or 'combinado' in name:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Cocktails')
        else:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')

    if subcat == 'la_despensa|alimentacion|harinas_y_levaduras':
        if 'harina' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Harina')
        else:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Levadura y preparado repostería')

    return (pd.NA, pd.NA, pd.NA)

In [5420]:
datamarket_update('la_despensa|alimentacion|semillas', clasificar_semillas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1527 to 1527
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5421]:
datamarket_update('la_despensa|alimentacion|harinas_y_levaduras', clasificar_semillas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1708 to 4717
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5422]:
df_mercadona[df_mercadona['name'].str.contains('Comprimidos', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
2915,Comprimidos Vitaminas y Minerales Deliplus,Caja 0.0279 kg,3.0,10.153,100 g,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2917,Comprimidos efervescentes vitamina C y zinc Deliplus sabor limón,Tubo 0.08 kg,1.95,2.438,100 g,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2934,Comprimidos efervescentes magnesio Deliplus 300 mg sabor naranja,Tubo 0.078 kg,1.95,2.5,100 g,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2935,Comprimidos cabello y uñas Deliplus,Caja 0.024 kg,3.95,0.132,ud,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258


In [5423]:
df_mercadona[df_mercadona['subcategory_name'] == 'Fitoterapia'].head()

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
2912,Aceite árbol del té 100% puro Deliplus con pipeta dosificadora,Bote 0.01 l,3.0,30.0,100 ml,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2913,Colágeno soluble sabor limón Colagen,Bote 0.25 kg,5.3,2.12,100 g,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2914,Barritas con proteínas Enervit Sport sabor coco y chocolate,Caja 0.12 kg,2.95,24.584,kg,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2915,Comprimidos Vitaminas y Minerales Deliplus,Caja 0.0279 kg,3.0,10.153,100 g,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258
2916,Cápsulas Dormir Deliplus,Caja 0.024 kg,3.95,0.132,ud,Fitoterapia,Fitoterapia y parafarmacia,Fitoterapia,False,258


In [5424]:
def clasificar_vitaminas(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['la_despensa|alimentacion|vitaminas_y_complementos', 'la_despensa|alimentacion|nutricion_deportiva']:
        return ('Fitoterapia y parafarmacia', 'Fitoterapia', 'Fitoterapia')

    return (pd.NA, pd.NA, pd.NA)

In [5425]:
datamarket_update('la_despensa|alimentacion|vitaminas_y_complementos', clasificar_vitaminas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 62 to 419
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14  

In [5426]:
datamarket_update('la_despensa|alimentacion|nutricion_deportiva', clasificar_vitaminas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2317 to 2648
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

### Subcategoría "Aperitivos"

In [5427]:
current_category = df_category[df_category["category_name"] == "Aperitivos"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
32,Aperitivos,Aceitunas y encurtidos,Aceitunas verdes,33
33,Aperitivos,Aceitunas y encurtidos,Aceitunas negras,34
34,Aperitivos,Aceitunas y encurtidos,Cóctel y banderillas,35
35,Aperitivos,Aceitunas y encurtidos,Pepinillos y otros encurtidos,36
36,Aperitivos,Frutos secos y fruta desecada,Frutos secos,37
37,Aperitivos,Frutos secos y fruta desecada,Cocktails,38
38,Aperitivos,Frutos secos y fruta desecada,Fruta desecada,39
39,Aperitivos,Patatas fritas y snacks,Patatas fritas,40
40,Aperitivos,Patatas fritas y snacks,Snacks,41


In [5428]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'aperitivos']

Unnamed: 0,category,subcategory,subsubcategory
119,la_despensa,aperitivos,snacks
168,la_despensa,aperitivos,patatas_fritas
216,la_despensa,aperitivos,torreznos_y_cortezas
221,despensa,aperitivos,
254,la_despensa,aperitivos,tortitas
553,la_despensa,aperitivos,galletas_saladas
969,la_despensa,aperitivos,aceitunas_y_encurtidos
1006,la_despensa,aperitivos,palomitas
1897,la_despensa,aperitivos,frutas_desecadas
3252,la_despensa,aperitivos,frutos_secos


In [5429]:
def clasificar_aperitivos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|aperitivos|aceitunas_y_encurtidos':
        if 'aceitunas verdes' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas verdes')
        elif 'aceitunas negras' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas negras')
        elif 'abanderillas' in name or 'mix' in name or 'cóctel' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Cóctel y banderillas')
        else:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Pepinillos y otros encurtidos')

    if subcat == 'la_despensa|aperitivos|frutos_secos':
        if 'cocktail' in name or 'combinado' in name:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Cocktails')
        else:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')

    if subcat == 'la_despensa|aperitivos|patatas_fritas':
        return ('Aperitivos', 'Patatas fritas y snacks', 'Patatas fritas')

    if subcat in ['la_despensa|aperitivos|snacks', 'la_despensa|aperitivos|torreznos_y_cortezas']:
        return ('Aperitivos', 'Patatas fritas y snacks', 'Snacks')

    if subcat == 'la_despensa|aperitivos|frutas_desecadas':
        return ('Aperitivos', 'Frutos secos y fruta desecada', 'Fruta desecada')

    if subcat == 'la_despensa|aperitivos|tortitas':
        return ('Cereales y galletas', 'Tortitas', 'Tortitas')

    if subcat == 'la_despensa|aperitivos|galletas_saladas':
        return ('Cereales y galletas', 'Galletas', 'Galletas integrales y digestive')

    if subcat == 'la_despensa|aperitivos|palomitas':
        return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')


    return (pd.NA, pd.NA, pd.NA)

In [5430]:
current_category = df_category[df_category["category_name"] == 'Cereales y galletas']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
138,Cereales y galletas,Cereales,Cereales,139
139,Cereales y galletas,Cereales,Cereales integrales y muesli,140
140,Cereales y galletas,Cereales,Barritas de cereales,141
141,Cereales y galletas,Galletas,Galletas desayuno,142
142,Cereales y galletas,Galletas,Galletas integrales y digestive,143
143,Cereales y galletas,Galletas,Con chocolate y rellenas,144
144,Cereales y galletas,Galletas,Galletas surtidas,145
145,Cereales y galletas,Tortitas,Tortitas,146


In [5431]:
df_mercadona[df_mercadona['name'].str.contains('Torreznos', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
473,Torreznos sabor barbacoa Hacendado,Paquete 0.1 kg,1.55,15.5,kg,Snacks,Aperitivos,Patatas fritas y snacks,False,41
476,Torreznos Hacendado,Paquete 0.12 kg,1.65,13.75,kg,Snacks,Aperitivos,Patatas fritas y snacks,False,41


In [5432]:
datamarket_update('la_despensa|aperitivos|patatas_fritas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 168 to 4556
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5433]:
datamarket_update('la_despensa|aperitivos|aceitunas_y_encurtidos', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 969 to 969
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5434]:
datamarket_update('la_despensa|aperitivos|aceitunas_y_encurtidos', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 969 to 969
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5435]:
datamarket_update('la_despensa|aperitivos|frutos_secos', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 3252 to 3944
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5436]:
datamarket_update('la_despensa|aperitivos|snacks', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 119 to 4873
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5437]:
datamarket_update('la_despensa|aperitivos|torreznos_y_cortezas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 216 to 4199
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5438]:
datamarket_update('la_despensa|aperitivos|frutas_desecadas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1897 to 2973
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5439]:
datamarket_update('la_despensa|aperitivos|tortitas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 254 to 4206
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5440]:
datamarket_update('la_despensa|aperitivos|galletas_saladas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 553 to 1516
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5441]:
datamarket_update('la_despensa|aperitivos|palomitas', clasificar_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1006 to 4725
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

### Subcategoría "Conservas_Sopas y Precocinados"

In [5442]:
current_category = df_category[df_category["category_name"] == 'Conservas, caldos y cremas']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
190,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Atún,191
191,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Bonito,192
192,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Caballa y melva,193
193,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Sardinas,194
194,"Conservas, caldos y cremas",Atún y otras conservas de pescado,Otras conservas de pescado,195
195,"Conservas, caldos y cremas",Berberechos y mejillones,Berberechos y almejas,196
196,"Conservas, caldos y cremas",Berberechos y mejillones,Mejillones,197
197,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas verdura,198
198,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas fruta,199
199,"Conservas, caldos y cremas",Gazpacho y cremas,Gazpacho y salmorejo,200


In [5443]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'conservas_sopas_y_precocinados']

Unnamed: 0,category,subcategory,subsubcategory
195,la_despensa,conservas_sopas_y_precocinados,conservas_de_tomate
235,la_despensa,conservas_sopas_y_precocinados,tortillas_empanadillas_y_sandwiches
580,la_despensa,conservas_sopas_y_precocinados,caldos_sopas_y_pure
1365,la_despensa,conservas_sopas_y_precocinados,conservas_de_pescado_y_marisco
1411,la_despensa,conservas_sopas_y_precocinados,platos_tradicionales
1608,la_despensa,conservas_sopas_y_precocinados,conservas_de_vegetales
1625,la_despensa,conservas_sopas_y_precocinados,platos_preparados
1722,la_despensa,conservas_sopas_y_precocinados,gazpacho_y_salmorejo
1900,la_despensa,conservas_sopas_y_precocinados,pizzas
3844,la_despensa,conservas_sopas_y_precocinados,conservas_de_carne


In [5444]:
def clasificar_conservas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_pescado_y_marisco':
        if 'mejillones' in name:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Mejillones')
        elif 'berberechos' in name or 'almejas' in name:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Berberechos y almejas')
        elif 'atún' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Atún')
        elif 'bonito' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Bonito')
        elif 'caballa' in name or 'melva' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Caballa y melva')
        elif 'sardina' in name or 'sardinilla' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Sardinas')
        else:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Otras conservas de pescado')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_vegetales':
        return ('Conservas, caldos y cremas', 'Conservas de verdura y frutas', 'Conservas verdura')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|gazpacho_y_salmorejo':
        return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Gazpacho y salmorejo')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_tomate':
            return ('Conservas, caldos y cremas', 'Tomate', 'Tomate')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_frutas':
        return ('Conservas, caldos y cremas', 'Conservas de verdura y frutas', 'Conservas fruta')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|caldos_sopas_y_pure':
        if 'en pastillas' in name:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo en pastillas')
        elif 'caldo' in name:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo líquido')
        elif 'crema' in name or 'pure' in name:
            return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Cremas y puré')
        else:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Sopa')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_carne':
        if 'salchichas' in name:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Salchichas')
        elif 'magro' in name and 'adobado' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')
        elif 'magro de cerdo' in name or 'crema de Crema de jamón' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')
        else:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')


    return (pd.NA, pd.NA, pd.NA)

In [5445]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|conservas_de_carne', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 3844 to 4609
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5446]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|conservas_de_pescado_y_marisco', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1365 to 4448
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5447]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|conservas_de_vegetales', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1608 to 1608
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5448]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|gazpacho_y_salmorejo', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1722 to 4175
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5449]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|conservas_de_tomate', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 195 to 3928
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5450]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|conservas_de_frutas', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 4335 to 4741
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5451]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|caldos_sopas_y_pure', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 580 to 1471
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5452]:
df_datamarket[df_datamarket['brand_category'] == 'la_despensa|conservas_sopas_y_precocinados|conservas_de_carne']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
3844,25857563,carrefour.es,la_despensa|conservas_sopas_y_precocinados|conservas_de_carne,Crema de jamón york Iberitos 250 g.,,otras marcas,False,2.55,10.2,kg,2023-03-15,False,False,Carne,Empanados y elaborados,Empanados y elaborados
3974,25857564,carrefour.es,la_despensa|conservas_sopas_y_precocinados|conservas_de_carne,Pechuga de pavo al punto de sal Classic Carrefour pack de 2 unidades de 52 g.,,carrefour,True,2.19,26.07,kg,2023-03-15,False,False,Carne,Empanados y elaborados,Empanados y elaborados
4082,25857562,carrefour.es,la_despensa|conservas_sopas_y_precocinados|conservas_de_carne,Magro de cerdo ibérico cocido Coren 200 g.,,otras marcas,False,2.65,13.25,kg,2023-03-15,False,False,Carne,Empanados y elaborados,Empanados y elaborados
4609,25857561,carrefour.es,la_despensa|conservas_sopas_y_precocinados|conservas_de_carne,Magro de cerdo cocido Iberitos lata de 200 g.,,otras marcas,False,2.65,13.25,kg,2023-03-15,False,False,Carne,Empanados y elaborados,Empanados y elaborados


In [5453]:
df_mercadona[df_mercadona['name'].str.contains('Magro', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
377,Berenjenas de Almagro aliñada Antonio,Bote 0.8 kg,3.89,11.115,kg,Pepinillos y otros encurtidos,Aperitivos,Aceitunas y encurtidos,False,36
1269,Fiambre de magro de cerdo adobado,Paquete 0.37 kg,2.68,7.244,kg,Cerdo,Carne,Cerdo,False,130
1295,Longaniza magro fresca,Bandeja 0.535 kg,3.37,6.3,kg,Embutido,Carne,Embutido,False,133
1361,Fiambre de magro de cerdo adobado,Paquete 0.37 kg,2.68,7.244,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1511,Fiambre magro sándwich Hacendado lonchas,Paquete 0.3 kg,1.85,6.167,kg,Jamón cocido,Charcutería y quesos,Aves y jamón cocido,False,148
1513,Magro de cerdo cocido Coren,Lata 0.24 kg,2.69,11.209,kg,Jamón cocido,Charcutería y quesos,Aves y jamón cocido,False,148


In [5454]:
df_mercadona[df_mercadona['subcategory_2_nivel_name'] == 'Empanados y elaborados']

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1350,"Brochetas de pollo, pimiento verde y tocino de cerdo con varilla",Bandeja 0.37 kg,3.14,8.5,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1351,Flautas de pollo y queso,Bandeja 0.28 kg,2.2,8.059,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1352,Flautas de bacón y queso,Bandeja 0.28 kg,2.2,7.972,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1353,Filetes lomo de cerdo adobado,Bandeja 0.47 kg,3.29,7.0,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1354,Filetes lomo de cerdo adobado familiar,Bandeja 0.9 kg,6.08,6.75,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1355,San Jacobos de cerdo sin gluten,Bandeja 0.4 kg,2.52,6.3,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1356,Filetes pechuga de pollo finas hierbas marinadas,Bandeja 0.65 kg,4.81,7.4,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1357,Filetes pechuga de pollo marinadas empanadas sin gluten,Bandeja 0.36 kg,2.81,7.8,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1358,Pinchos de cerdo sin varilla,Bandeja 0.51 kg,4.05,7.95,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1359,Medallones preparado de solomillo cerdo provenzal,Bandeja 0.4 kg,3.96,9.9,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138


In [5455]:
df_category['category_name'].unique()

array(['Aceite, especias y salsas', 'Agua y refrescos', 'Aperitivos',
       'Arroz, legumbres y pasta', 'Azúcar, caramelos y chocolate',
       'Bebé', 'Bodega', 'Cacao, café e infusiones', 'Carne',
       'Cereales y galletas', 'Charcutería y quesos', 'Congelados',
       'Conservas, caldos y cremas', 'Cuidado del cabello',
       'Cuidado facial y corporal', 'Fitoterapia y parafarmacia',
       'Fruta y verdura', 'Huevos, leche y mantequilla',
       'Limpieza y hogar', 'Maquillaje', 'Marisco y pescado', 'Mascotas',
       'Panadería y pastelería', 'Pizzas y platos preparados',
       'Postres y yogures', 'Zumos'], dtype=object)

In [5456]:
current_category = df_category[df_category["category_name"] == 'Pizzas y platos preparados']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
404,Pizzas y platos preparados,Listo para Comer,Platos calientes,405
405,Pizzas y platos preparados,Listo para Comer,Platos fríos,406
406,Pizzas y platos preparados,Pizzas,Pizzas refrigeradas,407
407,Pizzas y platos preparados,Pizzas,Pizzas congeladas,408
408,Pizzas y platos preparados,Pizzas,Base de pizza,409
409,Pizzas y platos preparados,Pizzas,"Roscas, quiche y baguettes",410
410,Pizzas y platos preparados,Platos preparados calientes,Pasta,411
411,Pizzas y platos preparados,Platos preparados calientes,Arroz,412
412,Pizzas y platos preparados,Platos preparados calientes,Carne,413
413,Pizzas y platos preparados,Platos preparados calientes,Tortilla,414


In [5457]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'conservas_sopas_y_precocinados']

Unnamed: 0,category,subcategory,subsubcategory
195,la_despensa,conservas_sopas_y_precocinados,conservas_de_tomate
235,la_despensa,conservas_sopas_y_precocinados,tortillas_empanadillas_y_sandwiches
580,la_despensa,conservas_sopas_y_precocinados,caldos_sopas_y_pure
1365,la_despensa,conservas_sopas_y_precocinados,conservas_de_pescado_y_marisco
1411,la_despensa,conservas_sopas_y_precocinados,platos_tradicionales
1608,la_despensa,conservas_sopas_y_precocinados,conservas_de_vegetales
1625,la_despensa,conservas_sopas_y_precocinados,platos_preparados
1722,la_despensa,conservas_sopas_y_precocinados,gazpacho_y_salmorejo
1900,la_despensa,conservas_sopas_y_precocinados,pizzas
3844,la_despensa,conservas_sopas_y_precocinados,conservas_de_carne


Nos queda encontrar las categorías adecuadas para cuatro subsubcategorías en las que vemos productos como pizza y platos preparados.

In [5458]:
def clasificar_preparados(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|pizzas':
        if 'ultracongelada' in name or 'congelada' in name:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas congeladas')
        elif any(x in name for x in ['piadinas', 'base', 'bases', 'cocas', 'masa']):
            return ('Pizzas y platos preparados', 'Pizzas', 'Base de pizza')
        else:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas refrigeradas')

    if subcat in ['la_despensa|conservas_sopas_y_precocinados|platos_preparados', 'la_despensa|conservas_sopas_y_precocinados|platos_tradicionales']:
        if 'ensaladilla' in name or 'ensalada' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Ensaladilla')
        elif 'sándwich' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Sándwich')
        elif 'hummus' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Hummus y otros')
        else:
            return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')

    if subcat == 'la_despensa|conservas_sopas_y_precocinados|tortillas_empanadillas_y_sandwiches':
        if 'tortilla' in name:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Tortilla')
        elif 'sándwich' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Sándwich')
        else:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Otros')


    return (pd.NA, pd.NA, pd.NA)

In [5459]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|tortillas_empanadillas_y_sandwiches', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 235 to 2304
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5460]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|platos_tradicionales', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1411 to 3721
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5461]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|platos_preparados', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1625 to 3880
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5462]:
datamarket_update('la_despensa|conservas_sopas_y_precocinados|pizzas', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1900 to 4147
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

### Subcategoría "dulce_y_desayuno"

In [5463]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'dulce_y_desayuno']

Unnamed: 0,category,subcategory,subsubcategory
189,la_despensa,dulce_y_desayuno,infusiones_y_tes
652,la_despensa,dulce_y_desayuno,miel
981,la_despensa,dulce_y_desayuno,cremas_de_cacao_avellana_y_cacahuete
1052,la_despensa,dulce_y_desayuno,chocolates_turrones_y_bombones
1112,la_despensa,dulce_y_desayuno,azucar_y_edulcorantes
1456,la_despensa,dulce_y_desayuno,galletas
1655,la_despensa,dulce_y_desayuno,cafes
1676,la_despensa,dulce_y_desayuno,siropes_aromas_y_decoracion
1677,la_despensa,dulce_y_desayuno,reposteria_para_preparar
2844,la_despensa,dulce_y_desayuno,cacaos_y_cereales_solubles


In [5464]:
current_category = df_category[df_category["category_name"] == "Azúcar, caramelos y chocolate"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
51,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Azúcar,52
52,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Edulcorante y otros,53
53,"Azúcar, caramelos y chocolate",Chicles y caramelos,Chicles,54
54,"Azúcar, caramelos y chocolate",Chicles y caramelos,Caramelos,55
55,"Azúcar, caramelos y chocolate",Chocolate,Chocolate negro,56
56,"Azúcar, caramelos y chocolate",Chocolate,Chocolate con leche,57
57,"Azúcar, caramelos y chocolate",Chocolate,Chocolate blanco,58
58,"Azúcar, caramelos y chocolate",Chocolate,Chocolatinas,59
59,"Azúcar, caramelos y chocolate",Chocolate,Bombones,60
60,"Azúcar, caramelos y chocolate",Chocolate,Cremas de untar,61


In [5465]:
def clasificar_cacao(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|dulce_y_desayuno|cafes':
        if 'cápsulas' in name and 'nespresso' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Nespresso')
        elif 'cápsulas' in name and 'dolce gusto' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Dolce gusto')
        elif 'cápsulas' in name and 'tassimo' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Tassimo')
        elif 'monodosis' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Monodosis')
        elif 'molido' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café molido')
        elif 'en grano' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café en grano')
        elif 'soluble' in name:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Café soluble')
        else:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Otros')


    if subcat == 'la_despensa|dulce_y_desayuno|cacaos_y_cereales_solubles':
        if 'chocolate' in name:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Chocolate a la taza')
        else:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Cacao soluble')

    if subcat == 'la_despensa|dulce_y_desayuno|infusiones_y_tes':
        if 'té' in name:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Té')
        else:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Infusiones')

    return (pd.NA, pd.NA, pd.NA)

In [5466]:
datamarket_update('la_despensa|dulce_y_desayuno|infusiones_y_tes', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 189 to 3142
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5467]:
datamarket_update('la_despensa|dulce_y_desayuno|cacaos_y_cereales_solubles', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2844 to 3321
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5468]:
datamarket_update('la_despensa|dulce_y_desayuno|cafes', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1655 to 1724
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5469]:
def clasificar_azucar(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|dulce_y_desayuno|miel':
        return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Miel')

    if subcat == 'la_despensa|dulce_y_desayuno|cremas_de_cacao_avellana_y_cacahuete':
        return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Cremas de untar')

    if subcat == 'la_despensa|dulce_y_desayuno|azucar_y_edulcorantes':
        if 'azúcar' in name:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Azúcar')
        else:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Edulcorante y otros')

    if subcat == 'la_despensa|dulce_y_desayuno|mermeladas_membrillos_y_compotas':
        if 'mermelada' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Mermelada')
        else:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Confitura y otros')

    if subcat == 'la_despensa|dulce_y_desayuno|chocolates_turrones_y_bombones':
        if 'bombones' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Bombones')
        elif any(x in name for x in ['barritas', 'huevos', 'pasqua', 'disquitos', 'cacahuetes', 'bolas', 'huevo', 'figuras', 'figura']):
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolatinas')
        elif 'chocolate negro' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate negro')
        elif 'chocolate blanco' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate blanco')
        else:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate con leche')

    if subcat == 'la_despensa|dulce_y_desayuno|dulces_y_golosinas':
        if 'chicles' in name:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Chicles')
        elif 'caramelos' in name:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Caramelos')
        else:
            return ('Azúcar, caramelos y chocolate', 'Golosinas', 'Golosinas')

    return (pd.NA, pd.NA, pd.NA)

In [5470]:
datamarket_update('la_despensa|dulce_y_desayuno|dulces_y_golosinas', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3765 to 4155
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5471]:
datamarket_update('la_despensa|dulce_y_desayuno|chocolates_turrones_y_bombones', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1052 to 2368
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5472]:
datamarket_update('la_despensa|dulce_y_desayuno|mermeladas_membrillos_y_compotas', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2990 to 3442
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5473]:
datamarket_update('la_despensa|dulce_y_desayuno|azucar_y_edulcorantes', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1112 to 4106
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5474]:
datamarket_update('la_despensa|dulce_y_desayuno|cremas_de_cacao_avellana_y_cacahuete', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 981 to 2470
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5475]:
datamarket_update('la_despensa|dulce_y_desayuno|miel', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 652 to 3377
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5476]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'dulce_y_desayuno']

Unnamed: 0,category,subcategory,subsubcategory
189,la_despensa,dulce_y_desayuno,infusiones_y_tes
652,la_despensa,dulce_y_desayuno,miel
981,la_despensa,dulce_y_desayuno,cremas_de_cacao_avellana_y_cacahuete
1052,la_despensa,dulce_y_desayuno,chocolates_turrones_y_bombones
1112,la_despensa,dulce_y_desayuno,azucar_y_edulcorantes
1456,la_despensa,dulce_y_desayuno,galletas
1655,la_despensa,dulce_y_desayuno,cafes
1676,la_despensa,dulce_y_desayuno,siropes_aromas_y_decoracion
1677,la_despensa,dulce_y_desayuno,reposteria_para_preparar
2844,la_despensa,dulce_y_desayuno,cacaos_y_cereales_solubles


In [5477]:
def clasificar_galletas(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'la_despensa|dulce_y_desayuno|cereales_y_barritas':
        if 'barrita' in name:
            return ('Cereales y galletas', 'Cereales', 'Barritas de cereales')
        elif any(x in name for x in ['crunchy', 'copos', 'muesli', 'granola', 'integral']):
            return ('Cereales y galletas', 'Cereales', 'Cereales integrales y muesli')
        else:
            return ('Cereales y galletas', 'Cereales', 'Cereales')

    if subcat == 'la_despensa|dulce_y_desayuno|galletas':
        if any(x in name for x in ['chocolate', 'oreo', 'rellenos', 'rellenas', 'crema', 'mermelada']):
            return ('Cereales y galletas', 'Galletas', 'Con chocolate y rellenas')
        elif 'integral' in name or 'digestive' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas integrales y digestive')
        elif 'surtido' in name or 'mix' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas surtidas')
        else:
            return ('Cereales y galletas', 'Galletas', 'Galletas desayuno')


    return (pd.NA, pd.NA, pd.NA)

In [5478]:
datamarket_update('la_despensa|dulce_y_desayuno|galletas', clasificar_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1456 to 1456
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5479]:
datamarket_update('la_despensa|dulce_y_desayuno|cereales_y_barritas', clasificar_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3109 to 3109
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5480]:
df_mercadona[df_mercadona['name'].str.contains('decoración', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id


In [5481]:
def clasificar_reposteria(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|dulce_y_desayuno|siropes_aromas_y_decoracion':
        if 'siropo' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Confitura y otros')
        elif 'vela' in name:
            return ('Panadería y pastelería', 'Velas y decoración', 'Velas')
        else:
            return ('Panadería y pastelería', 'Velas y decoración', 'Decoración')

    if subcat == 'la_despensa|dulce_y_desayuno|reposteria_para_preparar':
        if 'harina' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Harina')
        elif 'masa' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Masas')
        else:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Levadura y preparado repostería')


    return (pd.NA, pd.NA, pd.NA)

In [5482]:
datamarket_update('la_despensa|dulce_y_desayuno|siropes_aromas_y_decoracion', clasificar_reposteria)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1676 to 4883
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5483]:
datamarket_update('la_despensa|dulce_y_desayuno|reposteria_para_preparar', clasificar_reposteria)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1677 to 4451
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

### Subcategoria "huevos"

In [5484]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'huevos']

Unnamed: 0,category,subcategory,subsubcategory
688,la_despensa,huevos,huevos_frescos
2561,huevos_leche_y_mantequilla,huevos,
4606,la_despensa,huevos,claras_de_huevo


In [5485]:
def clasificar_huevos(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat in ['la_despensa|huevos|huevos_frescos', 'la_despensa|huevos|claras_de_huevo']:
        return ('Huevos, leche y mantequilla', 'Huevos', 'Huevos')

    return (pd.NA, pd.NA, pd.NA)

In [5486]:
datamarket_update('la_despensa|huevos|huevos_frescos', clasificar_huevos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 688 to 4800
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5487]:
datamarket_update('la_despensa|huevos|claras_de_huevo', clasificar_huevos)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 4606 to 4895
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

### Subcaregoría "panaderia_bolleria_y_pasteleria"

In [5488]:
current_category = df_category[df_category["category_name"] == 'Panadería y pastelería']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
381,Panadería y pastelería,Bollería de horno,Bollería dulce,382
382,Panadería y pastelería,Bollería de horno,Bollería salada,383
383,Panadería y pastelería,Bollería envasada,Bollería envasada,384
384,Panadería y pastelería,Bollería envasada,Pastelitos surtidos,385
385,Panadería y pastelería,Harina y preparado repostería,Harina,386
386,Panadería y pastelería,Harina y preparado repostería,Levadura y preparado repostería,387
387,Panadería y pastelería,Harina y preparado repostería,Masas,388
388,Panadería y pastelería,Pan de horno,Barra de pan,389
389,Panadería y pastelería,Pan de horno,Pan de bocadillo,390
390,Panadería y pastelería,Pan de horno,Pan rebanado,391


In [5489]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'panaderia_bolleria_y_pasteleria']

Unnamed: 0,category,subcategory,subsubcategory
18,la_despensa,panaderia_bolleria_y_pasteleria,masa_fresca_y_bases
258,la_despensa,panaderia_bolleria_y_pasteleria,especialidades_de_pan
412,la_despensa,panaderia_bolleria_y_pasteleria,pan_perrito_y_burguer
558,la_despensa,panaderia_bolleria_y_pasteleria,pan_de_molde
1141,la_despensa,panaderia_bolleria_y_pasteleria,bolleria
1322,la_despensa,panaderia_bolleria_y_pasteleria,pan_tostado_biscottes_y_barritas
1396,la_despensa,panaderia_bolleria_y_pasteleria,pasteleria
2184,la_despensa,panaderia_bolleria_y_pasteleria,pan_rallado_y_rebozados
2632,la_despensa,panaderia_bolleria_y_pasteleria,colines_picos_y_crakers


In [5490]:
df_datamarket[df_datamarket['brand_category'] == 'la_despensa|panaderia_bolleria_y_pasteleria|pan_rallado_y_rebozados']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
2184,25857116,carrefour.es,la_despensa|panaderia_bolleria_y_pasteleria|pan_rallado_y_rebozados,Preparado para rebozar crujiente crunchy crumbs Esgir sin gluten 200 g.,,otras marcas,False,2.05,10.25,kg,2023-03-15,False,False,,,
2451,25857112,carrefour.es,la_despensa|panaderia_bolleria_y_pasteleria|pan_rallado_y_rebozados,Pan rallado Carrefour 750 g.,,carrefour,True,0.95,1.27,kg,2023-03-15,False,False,,,


In [5491]:
def clasificar_panes(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|pan_de_molde':
        return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de molde')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|masa_fresca_y_bases':
        return ('Panadería y pastelería', 'Harina y preparado repostería', 'Masas')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|pan_perrito_y_burguer':
        return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|especialidades_de_pan':
        if any(x in name for x in ['tortillas', 'hot dog', 'hamburguesa', 'pita', 'piadinas', 'bocados']):
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')
        else:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Otros panes')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|colines_picos_y_crakers':
        if any(x in name for x in ['piquitos', 'picos', 'grissini']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picos')
        elif any(x in name for x in ['rosquilletas', 'palitos', 'panes especiales']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Rosquilletas')
        else:
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picatostes')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|pan_tostado_biscottes_y_barritas':
        if 'tostado' in name:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan tostado')
        elif 'picatostes':
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picatostes')
        else:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Crakers y tartaletas')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|pan_rallado_y_rebozados':
        return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan rallado')

    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|bolleria':
        if any(x in name for x in ['empanadillas', 'saladas', 'empanada']):
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería salada')
        elif any(x in name for x in ['paquete', 'pack', 'bolsa']):
            return ('Panadería y pastelería', 'Bollería envasada', 'Bollería envasada')
        elif 'surtido' in name:
            return ('Panadería y pastelería', 'Bollería envasada', 'Pastelitos surtidos')
        else:
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería dulce')


    if subcat == 'la_despensa|panaderia_bolleria_y_pasteleria|pasteleria':
        if 'infantil' in name:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas infantiles')
        else:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas')


    return (pd.NA, pd.NA, pd.NA)

In [5492]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|pasteleria', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1396 to 2921
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5493]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|bolleria', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1141 to 4655
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5494]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|pan_rallado_y_rebozados', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2184 to 2451
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5495]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|pan_tostado_biscottes_y_barritas', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1322 to 4997
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5496]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|colines_picos_y_crakers', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2632 to 3784
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5497]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|especialidades_de_pan', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 258 to 4458
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5498]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|pan_perrito_y_burguer', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 412 to 3819
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5499]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|masa_fresca_y_bases', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 18 to 3985
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14 

In [5500]:
datamarket_update('la_despensa|panaderia_bolleria_y_pasteleria|pan_de_molde', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 558 to 3286
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

### Subcaregoría "lacteos"

In [5501]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'lacteos']

Unnamed: 0,category,subcategory,subsubcategory
146,la_despensa,lacteos,bebida_vegetal
325,la_despensa,lacteos,leche
643,la_despensa,lacteos,mantequillas_y_margarinas
1580,la_despensa,lacteos,nata
1865,la_despensa,lacteos,batidos_y_horchatas
2305,la_despensa,lacteos,listo_para_beber
3464,la_despensa,lacteos,leche_condensada_evaporada_y_en_polvo


In [5502]:
current_category = df_category[df_category["category_name"] == 'Huevos, leche y mantequilla']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
278,"Huevos, leche y mantequilla",Huevos,Huevos,279
279,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche semidesnatada,280
280,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche desnatada,281
281,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche entera,282
282,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Bebidas vegetales,283
283,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Batidos,284
284,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche Infantil,285
285,"Huevos, leche y mantequilla",Leche y bebidas vegetales,Leche condensada y otros,286
286,"Huevos, leche y mantequilla",Mantequilla y margarina,Mantequilla,287
287,"Huevos, leche y mantequilla",Mantequilla y margarina,Margarina,288


In [5503]:
df_datamarket[df_datamarket['brand_category'] == 'la_despensa|lacteos|listo_para_beber']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
2305,25856022,carrefour.es,la_despensa|lacteos|listo_para_beber,Café latte macchiatto Carrefour sin gluten 250 ml.,,carrefour,True,0.8,3.2,l,2023-03-15,False,False,,,
3262,25856027,carrefour.es,la_despensa|lacteos|listo_para_beber,Café cookies Nescafé Latte sin gluten 190 ml.,,otras marcas,False,1.73,9.11,l,2023-03-15,False,False,,,
4912,25856024,carrefour.es,la_despensa|lacteos|listo_para_beber,Café latte macchiato caramelo Carrefour sin gluten 250 ml.,,carrefour,True,0.91,3.64,l,2023-03-15,False,False,,,


In [5504]:
df_mercadona[df_mercadona['name'].str.contains('Café', case=False, na=False)].tail(20)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1126,Café soluble descafeinado Nescafé classic,Tarro 0.2 kg,7.85,39.25,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1127,Café soluble descafeinado Nescafé classic,Tarro 0.1 kg,4.95,49.5,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1128,Café soluble Espresso Creme Hacendado,Bote 0.08 kg,2.9,36.25,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1129,Café soluble cappuccino Hacendado,Bote 0.25 kg,2.75,11.0,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1130,Café soluble cappuccino caramelo Hacendado,Bote 0.25 kg,3.0,12.0,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1131,Café soluble Selection Hacendado,Bote 0.1 kg,3.9,39.0,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1132,Café soluble cappuccino sabor a avellana Hacendado,Bote 0.25 kg,2.95,11.8,kg,Café soluble,"Cacao, café e infusiones",Café soluble y otras bebidas,False,121
1133,Café con leche cappuccino Hacendado,Vaso 0.25 l,0.85,3.4,L,Bebidas frías,"Cacao, café e infusiones",Café soluble y otras bebidas,False,122
1134,Café con leche light sin lactosa Hacendado,Vaso 0.25 l,0.85,3.4,L,Bebidas frías,"Cacao, café e infusiones",Café soluble y otras bebidas,False,122
1135,Café con leche espresso Hacendado,Vaso 0.25 l,0.85,3.4,L,Bebidas frías,"Cacao, café e infusiones",Café soluble y otras bebidas,False,122


In [5505]:
def clasificar_leche(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'la_despensa|lacteos|nata':
        return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Nata')

    if subcat == 'la_despensa|lacteos|leche':
        if 'semidesnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche semidesnatada')
        elif 'desnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche desnatada')
        elif 'infantil' in name or 'preparado lácteo' in name or 'bebida láctea' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche Infantil')
        else:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche entera')

    if subcat == 'la_despensa|lacteos|batidos_y_horchatas':
        return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Batidos')


    if subcat == 'la_despensa|lacteos|bebida_vegetal':
         return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Bebidas vegetales')


    if subcat == 'la_despensa|lacteos|leche_condensada_evaporada_y_en_polvo':
        return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche condensada y otros')

    if subcat == 'la_despensa|lacteos|listo_para_beber':
        return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Bebidas frías')


    if subcat == 'la_despensa|lacteos|mantequillas_y_margarinas':
        if 'mantequilla' in name:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Mantequilla')
        else:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Margarina')

    return (pd.NA, pd.NA, pd.NA)

In [5506]:
datamarket_update('la_despensa|lacteos|mantequillas_y_margarinas', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 643 to 2841
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5507]:
datamarket_update('la_despensa|lacteos|listo_para_beber', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2305 to 4912
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5508]:
datamarket_update('la_despensa|lacteos|leche_condensada_evaporada_y_en_polvo', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 3464 to 4281
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5509]:
datamarket_update('la_despensa|lacteos|batidos_y_horchatas', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1865 to 2918
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5510]:
datamarket_update('la_despensa|lacteos|bebida_vegetal', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 146 to 4743
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5511]:
datamarket_update('la_despensa|lacteos|leche', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 325 to 2275
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5512]:
datamarket_update('la_despensa|lacteos|nata', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1580 to 2700
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

### Subcaregoría "yogures_y_postres"

In [5513]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'yogures_y_postres']

Unnamed: 0,category,subcategory,subsubcategory
359,la_despensa,yogures_y_postres,otros_postres
367,la_despensa,yogures_y_postres,yogur_natural_y_sabores
492,la_despensa,yogures_y_postres,yogures_infantiles
508,la_despensa,yogures_y_postres,natillas_cremas_y_copas
510,la_despensa,yogures_y_postres,vegetal
524,la_despensa,yogures_y_postres,proteina
767,la_despensa,yogures_y_postres,flanes
1224,la_despensa,yogures_y_postres,yogures_liquidos
1616,la_despensa,yogures_y_postres,colesterol_oseo_y_tension
1933,la_despensa,yogures_y_postres,bifidus


In [5514]:
current_category = df_category[df_category["category_name"] == 'Postres y yogures']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
421,Postres y yogures,Bífidus,Bífidus de sabores,422
422,Postres y yogures,Bífidus,Bífidus naturales,423
423,Postres y yogures,Flan y natillas,Flan,424
424,Postres y yogures,Flan y natillas,Natillas,425
425,Postres y yogures,Gelatina y otros postres,Gelatina,426
426,Postres y yogures,Gelatina y otros postres,Otros postres,427
427,Postres y yogures,Postres de soja,Postres de soja,428
428,Postres y yogures,Yogures desnatados,Yogures desnatados,429
429,Postres y yogures,Yogures griegos,Yogures griegos,430
430,Postres y yogures,Yogures líquidos,Yogures líquidos,431


In [5515]:
def clasificar_category_yogures(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'la_despensa|yogures_y_postres|vegetal':
        return ('Postres y yogures', 'Postres de soja', 'Postres de soja')


    if subcat == 'la_despensa|yogures_y_postres|yogur_natural_y_sabores':
        if 'natural' in name:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures naturales')
        elif 'griego' in name:
            return ('Postres y yogures', 'Yogures griegos', 'Yogures griegos')
        else:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures de sabores')


    if subcat == 'la_despensa|yogures_y_postres|yogures_infantiles':
        return ('Postres y yogures', 'Yogures y postres infantiles', 'Yogures y postres infantiles')


    if subcat == 'la_despensa|yogures_y_postres|yogures_liquidos':
        return ('Postres y yogures', 'Yogures líquidos', 'Yogures líquidos')

    if subcat == 'la_despensa|yogures_y_postres|bifidus':
        if 'natural' in name:
            return ('Postres y yogures', 'Bífidus', 'Bífidus naturales')
        else:
            return ('Postres y yogures', 'Bífidus', 'Bífidus de sabores')


    if subcat == 'la_despensa|yogures_y_postres|colesterol_oseo_y_tension':
        return ('Postres y yogures', 'Yogures líquidos', 'Colesterol y otros')

    if subcat == 'la_despensa|yogures_y_postres|proteina':
        return ('Postres y yogures', 'Yogures desnatados', 'Yogures desnatados')

    if subcat == 'la_despensa|yogures_y_postres|flanes':
        return ('Postres y yogures', 'Flan y natillas', 'Flan')


    if subcat == 'la_despensa|yogures_y_postres|natillas_cremas_y_copas':
        return ('Postres y yogures', 'Flan y natillas', 'Natillas')

    if subcat == 'la_despensa|yogures_y_postres|gelatina':
        return ('Postres y yogures', 'Gelatina y otros postres', 'Gelatina')

    if subcat == 'la_despensa|yogures_y_postres|otros_postres':
        return ('Postres y yogures', 'Gelatina y otros postres', 'Otros postres')


    return (pd.NA, pd.NA, pd.NA)

In [5516]:
datamarket_update('la_despensa|yogures_y_postres|vegetal', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 510 to 4553
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5517]:
datamarket_update('la_despensa|yogures_y_postres|yogur_natural_y_sabores', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 367 to 4486
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5518]:
datamarket_update('la_despensa|yogures_y_postres|otros_postres', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 359 to 3198
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5519]:
datamarket_update('la_despensa|yogures_y_postres|gelatina', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 2345 to 4793
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5520]:
datamarket_update('la_despensa|yogures_y_postres|natillas_cremas_y_copas', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 508 to 4490
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5521]:
datamarket_update('la_despensa|yogures_y_postres|flanes', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 767 to 4264
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5522]:
datamarket_update('la_despensa|yogures_y_postres|proteina', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 524 to 4790
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5523]:
datamarket_update('la_despensa|yogures_y_postres|colesterol_oseo_y_tension', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1616 to 4228
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5524]:
datamarket_update('la_despensa|yogures_y_postres|bifidus', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1933 to 4317
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5525]:
datamarket_update('la_despensa|yogures_y_postres|yogures_liquidos', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1224 to 3147
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5526]:
datamarket_update('la_despensa|yogures_y_postres|yogures_infantiles', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 492 to 3468
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

# Procesamiento de la categoría "despensa"


In [5527]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'despensa']

Unnamed: 0,category,subcategory,subsubcategory
3,despensa,conservas,conservas_vegetales
4,despensa,desayunos_y_dulces,caramelos_chicles_y_golosinas
9,despensa,salsas,mayonesa_y_otras_salsas
28,despensa,desayunos_y_dulces,infusiones
36,despensa,aceites_vinagres_y_alinos,aceites
49,despensa,sal_y_especias,
50,despensa,lacteos_y_huevos,leche
52,despensa,pastas_harinas_y_masas,pastas
60,despensa,sopas,caldos
65,despensa,lacteos_y_huevos,postres


### Subcategoria "desayunos_y_dulces"

In [5528]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'desayunos_y_dulces']

Unnamed: 0,category,subcategory,subsubcategory
4,despensa,desayunos_y_dulces,caramelos_chicles_y_golosinas
28,despensa,desayunos_y_dulces,infusiones
79,despensa,desayunos_y_dulces,chocolates_y_bombones
81,despensa,desayunos_y_dulces,cacao_y_cremas_de_cacao
84,despensa,desayunos_y_dulces,preparacion_de_postres
97,despensa,desayunos_y_dulces,cafe
112,despensa,desayunos_y_dulces,galletas
184,despensa,desayunos_y_dulces,mermeladas_miel_y_frutas_en_almibar
192,despensa,desayunos_y_dulces,bolleria
234,despensa,desayunos_y_dulces,cereales


In [5529]:
df_datamarket[df_datamarket['brand_category'] == 'despensa|desayunos_y_dulces|chocolates_y_bombones'].head()

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
79,25861853,dia.es,despensa|desayunos_y_dulces|chocolates_y_bombones,VALOR chocolate negro 70% con galleta belga tableta 200 gr,,valor,False,2.79,13.95,kg,2023-03-15,False,False,,,
143,25861855,dia.es,despensa|desayunos_y_dulces|chocolates_y_bombones,TRAPA chocolate negro intenso 70% con almendras tableta 175 gr,,trapa,False,2.29,13.09,kg,2023-03-15,False,False,,,
555,25861832,dia.es,despensa|desayunos_y_dulces|chocolates_y_bombones,DIA TEMPTATION chocolate negro 74 % con pepitas de cacao tableta 100 gr,,dia,True,1.35,13.5,kg,2023-03-15,False,False,,,
618,25861804,dia.es,despensa|desayunos_y_dulces|chocolates_y_bombones,M&M's cacahuete 45 gr,,,,1.2,26.67,kg,2023-03-15,False,False,,,
894,25861866,dia.es,despensa|desayunos_y_dulces|chocolates_y_bombones,TRAPA bombones rellenos de cereza al licor caja 120 gr,,trapa,False,1.25,10.42,kg,2023-03-15,False,False,,,


In [5530]:
current_category = df_category[df_category["category_name"] == "Azúcar, caramelos y chocolate"]
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
51,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Azúcar,52
52,"Azúcar, caramelos y chocolate",Azúcar y edulcorante,Edulcorante y otros,53
53,"Azúcar, caramelos y chocolate",Chicles y caramelos,Chicles,54
54,"Azúcar, caramelos y chocolate",Chicles y caramelos,Caramelos,55
55,"Azúcar, caramelos y chocolate",Chocolate,Chocolate negro,56
56,"Azúcar, caramelos y chocolate",Chocolate,Chocolate con leche,57
57,"Azúcar, caramelos y chocolate",Chocolate,Chocolate blanco,58
58,"Azúcar, caramelos y chocolate",Chocolate,Chocolatinas,59
59,"Azúcar, caramelos y chocolate",Chocolate,Bombones,60
60,"Azúcar, caramelos y chocolate",Chocolate,Cremas de untar,61


In [5531]:
def clasificar_cacao(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|desayunos_y_dulces|cafe':
        if 'cápsulas' in name and 'nespresso' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Nespresso')
        elif 'cápsulas' in name and 'dolce gusto' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Dolce gusto')
        elif 'cápsulas' in name and 'tassimo' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Cápsulas compatibles Tassimo')
        elif 'monodosis' in name:
            return ('Cacao, café e infusiones', 'Café cápsula y monodosis', 'Monodosis')
        elif 'molido' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café molido')
        elif 'en grano' in name:
            return ('Cacao, café e infusiones', 'Café molido y en grano', 'Café en grano')
        elif 'soluble' in name:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Café soluble')
        else:
            return ('Cacao, café e infusiones', 'Café soluble y otras bebidas', 'Otros')


    if subcat == 'despensa|desayunos_y_dulces|cacao_y_cremas_de_cacao':
        if 'chocolate' in name:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Chocolate a la taza')
        elif 'crema' in name or 'pasta' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Cremas de untar')
        else:
            return ('Cacao, café e infusiones', 'Cacao soluble y chocolate a la taza', 'Cacao soluble')

    if subcat == 'despensa|desayunos_y_dulces|infusiones':
        if 'té' in name:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Té')
        else:
            return ('Cacao, café e infusiones', 'Té e infusiones', 'Infusiones')

    return (pd.NA, pd.NA, pd.NA)

In [5532]:
datamarket_update('despensa|desayunos_y_dulces|cafe', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 33 entries, 97 to 4862
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         33 non-null     int64  
 1   supermarket                33 non-null     object 
 2   brand_category             33 non-null     object 
 3   name                       33 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      33 non-null     float64
 8   reference_price            33 non-null     float64
 9   reference_unit             33 non-null     object 
 10  insert_date                33 non-null     object 
 11  price_corrected            33 non-null     bool   
 12  reference_price_corrected  33 non-null     bool   
 13  category_name              33 non-null     object 
 14

In [5533]:
datamarket_update('despensa|desayunos_y_dulces|cacao_y_cremas_de_cacao', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 81 to 4836
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         19 non-null     int64  
 1   supermarket                19 non-null     object 
 2   brand_category             19 non-null     object 
 3   name                       19 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  19 non-null     object 
 6   trademark_propietary_flag  19 non-null     object 
 7   price                      19 non-null     float64
 8   reference_price            19 non-null     float64
 9   reference_unit             19 non-null     object 
 10  insert_date                19 non-null     object 
 11  price_corrected            19 non-null     bool   
 12  reference_price_corrected  19 non-null     bool   
 13  category_name              19 non-null     object 
 14

In [5534]:
datamarket_update('despensa|desayunos_y_dulces|infusiones', clasificar_cacao)

<class 'pandas.core.frame.DataFrame'>
Index: 17 entries, 28 to 4768
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         17 non-null     int64  
 1   supermarket                17 non-null     object 
 2   brand_category             17 non-null     object 
 3   name                       17 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  17 non-null     object 
 6   trademark_propietary_flag  17 non-null     object 
 7   price                      17 non-null     float64
 8   reference_price            17 non-null     float64
 9   reference_unit             17 non-null     object 
 10  insert_date                17 non-null     object 
 11  price_corrected            17 non-null     bool   
 12  reference_price_corrected  17 non-null     bool   
 13  category_name              17 non-null     object 
 14

In [5535]:
def clasificar_azucar(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|desayunos_y_dulces|mermeladas_miel_y_frutas_en_almibar':
        if 'miel' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Miel')
        elif 'mermelada' in name:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Mermelada')
        else:
            return ('Azúcar, caramelos y chocolate', 'Mermelada y miel', 'Confitura y otros')


    if subcat == 'despensa|desayunos_y_dulces|azucar_y_edulcorantes':
        if 'azúcar' in name:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Azúcar')
        else:
            return ('Azúcar, caramelos y chocolate', 'Azúcar y edulcorante', 'Edulcorante y otros')


    if subcat == 'despensa|desayunos_y_dulces|chocolates_y_bombones':
        if 'bombones' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Bombones')
        elif any(x in name for x in ['barritas', 'huevos', 'pasqua', 'disquitos', 'cacahuetes', 'bolas', 'huevo', 'figuras', 'figura']):
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolatinas')
        elif 'chocolate negro' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate negro')
        elif 'chocolate blanco' in name:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate blanco')
        else:
            return ('Azúcar, caramelos y chocolate', 'Chocolate', 'Chocolate con leche')

    if subcat == 'despensa|desayunos_y_dulces|caramelos_chicles_y_golosinas':
        if 'chicles' in name:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Chicles')
        elif 'caramelos' in name:
            return ('Azúcar, caramelos y chocolate', 'Chicles y caramelos', 'Caramelos')
        else:
            return ('Azúcar, caramelos y chocolate', 'Golosinas', 'Golosinas')

    return (pd.NA, pd.NA, pd.NA)

In [5536]:
datamarket_update('despensa|desayunos_y_dulces|mermeladas_miel_y_frutas_en_almibar', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 25 entries, 184 to 4421
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         25 non-null     int64  
 1   supermarket                25 non-null     object 
 2   brand_category             25 non-null     object 
 3   name                       25 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  25 non-null     object 
 6   trademark_propietary_flag  25 non-null     object 
 7   price                      25 non-null     float64
 8   reference_price            25 non-null     float64
 9   reference_unit             25 non-null     object 
 10  insert_date                25 non-null     object 
 11  price_corrected            25 non-null     bool   
 12  reference_price_corrected  25 non-null     bool   
 13  category_name              25 non-null     object 
 1

In [5537]:
datamarket_update('despensa|desayunos_y_dulces|azucar_y_edulcorantes', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 547 to 4911
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5538]:
datamarket_update('despensa|desayunos_y_dulces|chocolates_y_bombones', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, 79 to 4663
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         32 non-null     int64  
 1   supermarket                32 non-null     object 
 2   brand_category             32 non-null     object 
 3   name                       32 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  28 non-null     object 
 6   trademark_propietary_flag  28 non-null     object 
 7   price                      32 non-null     float64
 8   reference_price            32 non-null     float64
 9   reference_unit             32 non-null     object 
 10  insert_date                32 non-null     object 
 11  price_corrected            32 non-null     bool   
 12  reference_price_corrected  32 non-null     bool   
 13  category_name              32 non-null     object 
 14

In [5539]:
datamarket_update('despensa|desayunos_y_dulces|caramelos_chicles_y_golosinas', clasificar_azucar)

<class 'pandas.core.frame.DataFrame'>
Index: 44 entries, 4 to 4953
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         44 non-null     int64  
 1   supermarket                44 non-null     object 
 2   brand_category             44 non-null     object 
 3   name                       44 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  44 non-null     object 
 6   trademark_propietary_flag  44 non-null     object 
 7   price                      44 non-null     float64
 8   reference_price            44 non-null     float64
 9   reference_unit             44 non-null     object 
 10  insert_date                44 non-null     object 
 11  price_corrected            44 non-null     bool   
 12  reference_price_corrected  44 non-null     bool   
 13  category_name              44 non-null     object 
 14 

In [5540]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'desayunos_y_dulces']

Unnamed: 0,category,subcategory,subsubcategory
4,despensa,desayunos_y_dulces,caramelos_chicles_y_golosinas
28,despensa,desayunos_y_dulces,infusiones
79,despensa,desayunos_y_dulces,chocolates_y_bombones
81,despensa,desayunos_y_dulces,cacao_y_cremas_de_cacao
84,despensa,desayunos_y_dulces,preparacion_de_postres
97,despensa,desayunos_y_dulces,cafe
112,despensa,desayunos_y_dulces,galletas
184,despensa,desayunos_y_dulces,mermeladas_miel_y_frutas_en_almibar
192,despensa,desayunos_y_dulces,bolleria
234,despensa,desayunos_y_dulces,cereales


In [5541]:
def clasificar_galletas(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'despensa|desayunos_y_dulces|cereales':
        if 'barrita' in name:
            return ('Cereales y galletas', 'Cereales', 'Barritas de cereales')
        elif any(x in name for x in ['crunchy', 'copos', 'muesli', 'granola', 'integral']):
            return ('Cereales y galletas', 'Cereales', 'Cereales integrales y muesli')
        else:
            return ('Cereales y galletas', 'Cereales', 'Cereales')

    if subcat == 'despensa|desayunos_y_dulces|galletas':
        if any(x in name for x in ['chocolate', 'oreo', 'rellenos', 'rellenas', 'crema', 'mermelada']):
            return ('Cereales y galletas', 'Galletas', 'Con chocolate y rellenas')
        elif 'integral' in name or 'digestive' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas integrales y digestive')
        elif 'surtido' in name or 'mix' in name:
            return ('Cereales y galletas', 'Galletas', 'Galletas surtidas')
        else:
            return ('Cereales y galletas', 'Galletas', 'Galletas desayuno')


    return (pd.NA, pd.NA, pd.NA)

In [5542]:
datamarket_update('despensa|desayunos_y_dulces|cereales', clasificar_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 234 to 4770
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         36 non-null     int64  
 1   supermarket                36 non-null     object 
 2   brand_category             36 non-null     object 
 3   name                       36 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  36 non-null     object 
 6   trademark_propietary_flag  36 non-null     object 
 7   price                      36 non-null     float64
 8   reference_price            36 non-null     float64
 9   reference_unit             36 non-null     object 
 10  insert_date                36 non-null     object 
 11  price_corrected            36 non-null     bool   
 12  reference_price_corrected  36 non-null     bool   
 13  category_name              36 non-null     object 
 1

In [5543]:
datamarket_update('despensa|desayunos_y_dulces|galletas', clasificar_galletas)

<class 'pandas.core.frame.DataFrame'>
Index: 43 entries, 112 to 4808
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         43 non-null     int64  
 1   supermarket                43 non-null     object 
 2   brand_category             43 non-null     object 
 3   name                       43 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  43 non-null     object 
 6   trademark_propietary_flag  43 non-null     object 
 7   price                      43 non-null     float64
 8   reference_price            43 non-null     float64
 9   reference_unit             43 non-null     object 
 10  insert_date                43 non-null     object 
 11  price_corrected            43 non-null     bool   
 12  reference_price_corrected  43 non-null     bool   
 13  category_name              43 non-null     object 
 1

In [5544]:
df_mercadona[df_mercadona['name'].str.contains('decoración', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id


In [5545]:
def clasificar_reposteria(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|desayunos_y_dulces|preparacion_de_postres':
        if 'harina' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Harina')
        elif 'masa' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Masas')
        else:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Levadura y preparado repostería')


    if subcat == 'despensa|desayunos_y_dulces|bolleria':
        if any(x in name for x in ['empanadillas', 'saladas', 'empanada']):
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería salada')
        elif any(x in name for x in ['paquete', 'pack', 'bolsa']):
            return ('Panadería y pastelería', 'Bollería envasada', 'Bollería envasada')
        elif 'surtido' in name:
            return ('Panadería y pastelería', 'Bollería envasada', 'Pastelitos surtidos')
        else:
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería dulce')


    return (pd.NA, pd.NA, pd.NA)

In [5546]:
datamarket_update('despensa|desayunos_y_dulces|preparacion_de_postres', clasificar_reposteria)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 84 to 4505
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  12 non-null     object 
 6   trademark_propietary_flag  12 non-null     object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 14

In [5547]:
datamarket_update('despensa|desayunos_y_dulces|bolleria', clasificar_reposteria)

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 192 to 4987
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         36 non-null     int64  
 1   supermarket                36 non-null     object 
 2   brand_category             36 non-null     object 
 3   name                       36 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  36 non-null     object 
 6   trademark_propietary_flag  36 non-null     object 
 7   price                      36 non-null     float64
 8   reference_price            36 non-null     float64
 9   reference_unit             36 non-null     object 
 10  insert_date                36 non-null     object 
 11  price_corrected            36 non-null     bool   
 12  reference_price_corrected  36 non-null     bool   
 13  category_name              36 non-null     object 
 1

### Subcategoría "conservas"

In [5548]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'conservas']

Unnamed: 0,category,subcategory,subsubcategory
3,despensa,conservas,conservas_vegetales
133,despensa,conservas,conservas_de_pescado
1129,despensa,conservas,conservas_de_carne


In [5549]:
def clasificar_conservas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|conservas|conservas_de_pescado':
        if 'mejillones' in name:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Mejillones')
        elif 'berberechos' in name or 'almejas' in name:
            return ('Conservas, caldos y cremas', 'Berberechos y mejillones', 'Berberechos y almejas')
        elif 'atún' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Atún')
        elif 'bonito' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Bonito')
        elif 'caballa' in name or 'melva' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Caballa y melva')
        elif 'sardina' in name or 'sardinilla' in name:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Sardinas')
        else:
            return ('Conservas, caldos y cremas', 'Atún y otras conservas de pescado', 'Otras conservas de pescado')

    if subcat == 'despensa|conservas|conservas_vegetales':
        return ('Conservas, caldos y cremas', 'Conservas de verdura y frutas', 'Conservas verdura')


    if subcat == 'despensa|conservas|conservas_de_carne':
        if 'salchichas' in name:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Salchichas')
        elif 'magro' in name and 'adobado' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')
        elif 'magro de cerdo' in name or 'crema de Crema de jamón' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')
        else:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')


    return (pd.NA, pd.NA, pd.NA)

In [5550]:
datamarket_update('despensa|conservas|conservas_de_carne', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1129 to 4947
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5551]:
datamarket_update('despensa|conservas|conservas_vegetales', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 3 to 4967
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         36 non-null     int64  
 1   supermarket                36 non-null     object 
 2   brand_category             36 non-null     object 
 3   name                       36 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  36 non-null     object 
 6   trademark_propietary_flag  36 non-null     object 
 7   price                      36 non-null     float64
 8   reference_price            36 non-null     float64
 9   reference_unit             36 non-null     object 
 10  insert_date                36 non-null     object 
 11  price_corrected            36 non-null     bool   
 12  reference_price_corrected  36 non-null     bool   
 13  category_name              36 non-null     object 
 14 

In [5552]:
datamarket_update('despensa|conservas|conservas_de_pescado', clasificar_conservas)

<class 'pandas.core.frame.DataFrame'>
Index: 47 entries, 133 to 4932
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         47 non-null     int64  
 1   supermarket                47 non-null     object 
 2   brand_category             47 non-null     object 
 3   name                       47 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  47 non-null     object 
 6   trademark_propietary_flag  47 non-null     object 
 7   price                      47 non-null     float64
 8   reference_price            47 non-null     float64
 9   reference_unit             47 non-null     object 
 10  insert_date                47 non-null     object 
 11  price_corrected            47 non-null     bool   
 12  reference_price_corrected  47 non-null     bool   
 13  category_name              47 non-null     object 
 1

### Subcategorias "salsas", "aceites_vinagres_y_alinos" y "sal_y_especias"

In [5553]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'salsas']

Unnamed: 0,category,subcategory,subsubcategory
9,despensa,salsas,mayonesa_y_otras_salsas
245,despensa,salsas,tomate


In [5554]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'aceites_vinagres_y_alinos']

Unnamed: 0,category,subcategory,subsubcategory
36,despensa,aceites_vinagres_y_alinos,aceites
153,despensa,aceites_vinagres_y_alinos,vinagres_y_alinos


In [5555]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'sal_y_especias']

Unnamed: 0,category,subcategory,subsubcategory
49,despensa,sal_y_especias,


In [5556]:
def clasificar_aceites(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|aceites_vinagres_y_alinos|aceites':
        if 'oliva' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de oliva')
        else:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Aceite de girasol, semillas y maíz')

    if subcat == 'despensa|aceites_vinagres_y_alinos|vinagres_y_alinos':
        return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Vinagre y otros aderezos')


    if subcat == 'despensa|salsas|tomate':
        return ('Aceite, especias y salsas', 'Otras salsas', 'Tomate frito')


    if subcat == 'despensa|salsas|mayonesa_y_otras_salsas':
        if 'mayonesa' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mayonesa')
        elif 'ketchup' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Ketchup')
        elif 'mostaza' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Mostaza')
        elif 'alioli' in name or 'ali-oli' in name:
            return ('Aceite, especias y salsas', 'Mayonesa, ketchup y mostaza', 'Allioli')
        elif 'soja' in name or 'teriyaki' in name or 'agridulce' in name or 'chili' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas orientales')
        elif 'barbacoa' in name or 'piri piri' in name or 'burger' in name or 'curry' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para carnes')
        elif 'fresca' in name or 'pesto' in name or 'boloñesa' in name or 'carbonara' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para pasta')
        else:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Otras salsas')

    if subcat == 'despensa|sal_y_especias':
        if 'sal' in name or 'bicarbonato' in name:
            return ('Aceite, especias y salsas', 'Aceite, vinagre y sal', 'Sal y bicarbonato')
        elif 'pimienta' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Pimienta')
        elif 'pimentón' in name or 'colorante' in name or 'azafrán' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Colorante y pimentón')
        elif 'sazonador' in name or 'mezcla' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Sazonadores')
        elif any(x in name for x in ['orégano', 'perejil', 'romero', 'laurel', 'tomillo', 'cilantro', 'eneldo', 'hierbas']):
            return ('Aceite, especias y salsas', 'Especias', 'Hierbas')
        else:
            return ('Aceite, especias y salsas', 'Especias', 'Otras especias')


    return (pd.NA, pd.NA, pd.NA)

In [5557]:
datamarket_update('despensa|sal_y_especias', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 49 to 4751
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         26 non-null     int64  
 1   supermarket                26 non-null     object 
 2   brand_category             26 non-null     object 
 3   name                       26 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  26 non-null     object 
 6   trademark_propietary_flag  26 non-null     object 
 7   price                      26 non-null     float64
 8   reference_price            26 non-null     float64
 9   reference_unit             26 non-null     object 
 10  insert_date                26 non-null     object 
 11  price_corrected            26 non-null     bool   
 12  reference_price_corrected  26 non-null     bool   
 13  category_name              26 non-null     object 
 14

In [5558]:
datamarket_update('despensa|salsas|mayonesa_y_otras_salsas', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 27 entries, 9 to 4942
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         27 non-null     int64  
 1   supermarket                27 non-null     object 
 2   brand_category             27 non-null     object 
 3   name                       27 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  27 non-null     object 
 6   trademark_propietary_flag  27 non-null     object 
 7   price                      27 non-null     float64
 8   reference_price            27 non-null     float64
 9   reference_unit             27 non-null     object 
 10  insert_date                27 non-null     object 
 11  price_corrected            27 non-null     bool   
 12  reference_price_corrected  27 non-null     bool   
 13  category_name              27 non-null     object 
 14 

In [5559]:
datamarket_update('despensa|aceites_vinagres_y_alinos|vinagres_y_alinos', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 153 to 4542
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

In [5560]:
datamarket_update('despensa|salsas|tomate', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 245 to 4624
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 1

In [5561]:
datamarket_update('despensa|aceites_vinagres_y_alinos|aceites', clasificar_aceites)

<class 'pandas.core.frame.DataFrame'>
Index: 20 entries, 36 to 4716
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         20 non-null     int64  
 1   supermarket                20 non-null     object 
 2   brand_category             20 non-null     object 
 3   name                       20 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  20 non-null     object 
 6   trademark_propietary_flag  20 non-null     object 
 7   price                      20 non-null     float64
 8   reference_price            20 non-null     float64
 9   reference_unit             20 non-null     object 
 10  insert_date                20 non-null     object 
 11  price_corrected            20 non-null     bool   
 12  reference_price_corrected  20 non-null     bool   
 13  category_name              20 non-null     object 
 14

### Subcategoría "lacteos_y_huevos"

In [5562]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'lacteos_y_huevos']

Unnamed: 0,category,subcategory,subsubcategory
50,despensa,lacteos_y_huevos,leche
65,despensa,lacteos_y_huevos,postres
197,despensa,lacteos_y_huevos,huevos
384,despensa,lacteos_y_huevos,bebidas_vegetales
550,despensa,lacteos_y_huevos,yogures
953,despensa,lacteos_y_huevos,mantequilla_y_margarina
1586,despensa,lacteos_y_huevos,nata


In [5563]:
def clasificar_huevos(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|lacteos_y_huevos|huevos':
        return ('Huevos, leche y mantequilla', 'Huevos', 'Huevos')

    return (pd.NA, pd.NA, pd.NA)

In [5564]:
datamarket_update('despensa|lacteos_y_huevos|huevos', clasificar_huevos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 197 to 2810
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5565]:
def clasificar_leche(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'despensa|lacteos_y_huevos|nata':
        return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Nata')

    if subcat == 'despensa|lacteos_y_huevos|leche':
        if 'semidesnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche semidesnatada')
        elif 'desnatada' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche desnatada')
        elif 'infantil' in name or 'preparado lácteo' in name or 'bebida láctea' in name:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche Infantil')
        elif 'entera':
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche entera')
        else:
            return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Leche condensada y otros')


    if subcat == 'despensa|lacteos_y_huevos|bebidas_vegetales':
         return ('Huevos, leche y mantequilla', 'Leche y bebidas vegetales', 'Bebidas vegetales')


    if subcat == 'despensa|lacteos_y_huevos|mantequilla_y_margarina':
        if 'mantequilla' in name:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Mantequilla')
        else:
            return ('Huevos, leche y mantequilla', 'Mantequilla y margarina', 'Margarina')

    return (pd.NA, pd.NA, pd.NA)

In [5566]:
datamarket_update('despensa|lacteos_y_huevos|mantequilla_y_margarina', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 953 to 4592
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5567]:
datamarket_update('despensa|lacteos_y_huevos|bebidas_vegetales', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 384 to 2863
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [5568]:
datamarket_update('despensa|lacteos_y_huevos|leche', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 50 to 4842
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         38 non-null     int64  
 1   supermarket                38 non-null     object 
 2   brand_category             38 non-null     object 
 3   name                       38 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  38 non-null     object 
 6   trademark_propietary_flag  38 non-null     object 
 7   price                      38 non-null     float64
 8   reference_price            38 non-null     float64
 9   reference_unit             38 non-null     object 
 10  insert_date                38 non-null     object 
 11  price_corrected            38 non-null     bool   
 12  reference_price_corrected  38 non-null     bool   
 13  category_name              38 non-null     object 
 14

In [5569]:
datamarket_update('despensa|lacteos_y_huevos|nata', clasificar_leche)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1586 to 3831
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5570]:
def clasificar_category_yogures(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'despensa|lacteos_y_huevos|yogures':
        if 'natural' in name:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures naturales')
        elif 'griego' in name:
            return ('Postres y yogures', 'Yogures griegos', 'Yogures griegos')
        elif 'infantil' in name:
            return ('Postres y yogures', 'Yogures y postres infantiles', 'Yogures y postres infantiles')
        elif 'líquido' in name:
            return ('Postres y yogures', 'Yogures líquidos', 'Yogures líquidos')
        elif 'desnatado' or 'proteina' in name:
            return ('Postres y yogures', 'Yogures desnatados', 'Yogures desnatados')
        elif 'colesterol' in name:
            return ('Postres y yogures', 'Yogures líquidos', 'Colesterol y otros')
        if 'natural' in name and 'bífido' in name:
            return ('Postres y yogures', 'Bífidus', 'Bífidus naturales')
        elif 'bífido' in name:
            return ('Postres y yogures', 'Bífidus', 'Bífidus de sabores')
        else:
            return ('Postres y yogures', 'Yogures naturales y sabores', 'Yogures de sabores')


    if subcat == 'despensa|lacteos_y_huevos|postres':
        if 'soja' in name:
            return ('Postres y yogures', 'Postres de soja', 'Postres de soja')
        elif 'flan' in name:
            return ('Postres y yogures', 'Yogures griegos', 'Yogures griegos')
        elif 'infantil' in name:
            return ('Postres y yogures', 'Yogures y postres infantiles', 'Yogures y postres infantiles')
        elif 'natilla' in name:
            return ('Postres y yogures', 'Flan y natillas', 'Natillas')
        if 'gelatina' in name:
            return ('Postres y yogures', 'Gelatina y otros postres', 'Gelatina')
        else:
            return ('Postres y yogures', 'Gelatina y otros postres', 'Otros postres')


    return (pd.NA, pd.NA, pd.NA)

In [5571]:
datamarket_update('despensa|lacteos_y_huevos|postres', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 65 to 4905
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         23 non-null     int64  
 1   supermarket                23 non-null     object 
 2   brand_category             23 non-null     object 
 3   name                       23 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  22 non-null     object 
 6   trademark_propietary_flag  22 non-null     object 
 7   price                      23 non-null     float64
 8   reference_price            23 non-null     float64
 9   reference_unit             23 non-null     object 
 10  insert_date                23 non-null     object 
 11  price_corrected            23 non-null     bool   
 12  reference_price_corrected  23 non-null     bool   
 13  category_name              23 non-null     object 
 14

In [5572]:
datamarket_update('despensa|lacteos_y_huevos|yogures', clasificar_category_yogures)

<class 'pandas.core.frame.DataFrame'>
Index: 29 entries, 550 to 4993
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         29 non-null     int64  
 1   supermarket                29 non-null     object 
 2   brand_category             29 non-null     object 
 3   name                       29 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  29 non-null     object 
 6   trademark_propietary_flag  29 non-null     object 
 7   price                      29 non-null     float64
 8   reference_price            29 non-null     float64
 9   reference_unit             29 non-null     object 
 10  insert_date                29 non-null     object 
 11  price_corrected            29 non-null     bool   
 12  reference_price_corrected  29 non-null     bool   
 13  category_name              29 non-null     object 
 1

### Subcategoría "arroz_y_legumbres"

In [5573]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'arroz_y_legumbres']

Unnamed: 0,category,subcategory,subsubcategory
295,despensa,arroz_y_legumbres,legumbres
1681,despensa,arroz_y_legumbres,arroz


In [5574]:
def clasificar_legumbres(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|arroz_y_legumbres|arroz':
        return ('Arroz, legumbres y pasta', 'Arroz', 'Arroz')

    if subcat == 'despensa|arroz_y_legumbres|legumbres':
        if 'garbanzo' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Garbanzos')
        elif 'alubia' in name:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Alubias')
        else:
            return ('Arroz, legumbres y pasta', 'Legumbres', 'Lentejas y otros')


    return (pd.NA, pd.NA, pd.NA)

In [5575]:
datamarket_update('despensa|arroz_y_legumbres|arroz', clasificar_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1681 to 4766
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5576]:
datamarket_update('despensa|arroz_y_legumbres|legumbres', clasificar_legumbres)

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 295 to 4778
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         16 non-null     int64  
 1   supermarket                16 non-null     object 
 2   brand_category             16 non-null     object 
 3   name                       16 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  16 non-null     object 
 6   trademark_propietary_flag  16 non-null     object 
 7   price                      16 non-null     float64
 8   reference_price            16 non-null     float64
 9   reference_unit             16 non-null     object 
 10  insert_date                16 non-null     object 
 11  price_corrected            16 non-null     bool   
 12  reference_price_corrected  16 non-null     bool   
 13  category_name              16 non-null     object 
 1

### Subcategoría "pastas_harinas_y_masas"

In [5577]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pastas_harinas_y_masas']

Unnamed: 0,category,subcategory,subsubcategory
52,despensa,pastas_harinas_y_masas,pastas
109,despensa,pastas_harinas_y_masas,harinas_y_levaduras
280,despensa,pastas_harinas_y_masas,masas_y_hojaldres


In [5578]:
def clasificar_pastas(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|pastas_harinas_y_masas|pastas':
        if any(x in name for x in ['fideuá', 'fideo', 'estrellas', 'maravilla', 'piñones']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos')
        elif any(x in name for x in ['pajaritas', 'penne', 'tortiglioni', 'hélices', 'macarrón', 'fusilli', 'trottole', 'tiburón']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')
        elif any(x in name for x in ['tallarines', 'spaghetti', 'nidos', 'noodles', 'tagliatelle']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Spaghetti y tallarines')
        elif any(x in name for x in ['tortellini', 'ravioli', 'gnocchi', 'girasoles', 'medialunas']):
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Pasta rellena')
        elif 'orientales' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos orientales')
        elif 'canelones' in name or 'lazaña' in name:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Lasaña y canelones')
        else:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Macarrones, pajaritas y hélices')

    return (pd.NA, pd.NA, pd.NA)

In [5579]:
datamarket_update('despensa|pastas_harinas_y_masas|pastas', clasificar_pastas)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 52 to 4685
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  31 non-null     object 
 6   trademark_propietary_flag  31 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 14

In [5580]:
def clasificar_harina(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|pastas_harinas_y_masas|harinas_y_levaduras':
        if 'harina' in name:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Harina')
        else:
            return ('Panadería y pastelería', 'Harina y preparado repostería', 'Levadura y preparado repostería')

    if subcat == 'despensa|pastas_harinas_y_masas|masas_y_hojaldres':
         return ('Panadería y pastelería', 'Harina y preparado repostería', 'Masas')

    return (pd.NA, pd.NA, pd.NA)

In [5581]:
datamarket_update('despensa|pastas_harinas_y_masas|harinas_y_levaduras', clasificar_harina)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 109 to 3628
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5582]:
datamarket_update('despensa|pastas_harinas_y_masas|masas_y_hojaldres', clasificar_harina)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 280 to 3451
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

### Subcategoría "pan"

In [5583]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pan']

Unnamed: 0,category,subcategory,subsubcategory
167,despensa,pan,biscotes
343,despensa,pan,pan_de_horno
355,despensa,pan,picos_de_pan
460,despensa,pan,pan_de_molde
733,despensa,pan,pan_de_hamburguesas_y_perritos
963,despensa,pan,pan_rallado
1808,despensa,pan,snack_de_pan


In [5584]:
def clasificar_panes(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|pan|pan_de_horno':
        if 'rebanado' in name or 'rebanada' in name or 'rebanadas' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Pan rebanado')
        elif 'barra' in name or 'baguette' in name or 'barras' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Barra de pan')
        else:
            return ('Panadería y pastelería', 'Pan de horno', 'Pan de bocadillo')

    if subcat == 'despensa|pan|pan_de_molde':
        return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de molde')

    if subcat == 'despensa|pan|pan_de_hamburguesas_y_perritos':
        return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')

    if subcat == 'despensa|pan|pan_rallado':
        return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan rallado')

    if subcat in ['despensa|pan|picos_de_pan', 'despensa|pan|snack_de_pan', 'despensa|pan|biscotes']:
        if any(x in name for x in ['piquitos', 'picos', 'grissini']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picos')
        elif any(x in name for x in ['rosquilletas', 'palitos', 'pan especial']):
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Rosquilletas')
        elif 'picatostes' in name:
            return ('Panadería y pastelería', 'Picos, rosquilletas y picatostes', 'Picatostes')
        else:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Crakers y tartaletas')


    return (pd.NA, pd.NA, pd.NA)

In [5585]:
datamarket_update('despensa|pan|biscotes', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 167 to 4782
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5586]:
datamarket_update('despensa|pan|snack_de_pan', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1808 to 4762
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5587]:
datamarket_update('despensa|pan|picos_de_pan', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 355 to 4469
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 14

In [5588]:
datamarket_update('despensa|pan|pan_rallado', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 963 to 1975
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5589]:
datamarket_update('despensa|pan|pan_de_hamburguesas_y_perritos', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 733 to 3631
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5590]:
datamarket_update('despensa|pan|pan_de_molde', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 460 to 4197
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5591]:
datamarket_update('despensa|pan|pan_de_horno', clasificar_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 343 to 4630
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

### Subcategoría "sopas"

In [5592]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'sopas']

Unnamed: 0,category,subcategory,subsubcategory
60,despensa,sopas,caldos
844,despensa,sopas,sopas
1582,despensa,sopas,cremas_y_pures


In [5593]:
def clasificar_sopas(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'despensa|sopas|caldos':
        if 'en pastillas' in name:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo en pastillas')
        else:
            return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Caldo líquido')

    if subcat == 'despensa|sopas|cremas_y_pures':
        return ('Conservas, caldos y cremas', 'Gazpacho y cremas', 'Cremas y puré')

    if subcat == 'despensa|sopas|sopas':
        return ('Conservas, caldos y cremas', 'Sopa y caldo', 'Sopa')


    return (pd.NA, pd.NA, pd.NA)

In [5594]:
datamarket_update('despensa|sopas|caldos', clasificar_sopas)

<class 'pandas.core.frame.DataFrame'>
Index: 15 entries, 60 to 4878
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         15 non-null     int64  
 1   supermarket                15 non-null     object 
 2   brand_category             15 non-null     object 
 3   name                       15 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  15 non-null     object 
 6   trademark_propietary_flag  15 non-null     object 
 7   price                      15 non-null     float64
 8   reference_price            15 non-null     float64
 9   reference_unit             15 non-null     object 
 10  insert_date                15 non-null     object 
 11  price_corrected            15 non-null     bool   
 12  reference_price_corrected  15 non-null     bool   
 13  category_name              15 non-null     object 
 14

In [5595]:
datamarket_update('despensa|sopas|cremas_y_pures', clasificar_sopas)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 1582 to 4531
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 1

In [5596]:
datamarket_update('despensa|sopas|sopas', clasificar_sopas)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 844 to 4755
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 14

### Subcategoría "aperitivos"

In [5597]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'aperitivos']

Unnamed: 0,category,subcategory,subsubcategory
119,la_despensa,aperitivos,snacks
168,la_despensa,aperitivos,patatas_fritas
216,la_despensa,aperitivos,torreznos_y_cortezas
221,despensa,aperitivos,
254,la_despensa,aperitivos,tortitas
553,la_despensa,aperitivos,galletas_saladas
969,la_despensa,aperitivos,aceitunas_y_encurtidos
1006,la_despensa,aperitivos,palomitas
1897,la_despensa,aperitivos,frutas_desecadas
3252,la_despensa,aperitivos,frutos_secos


In [5598]:
df_mercadona[df_mercadona['subcategory_2_nivel_name'] == 'Frutos secos']

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
383,Nuez natural Hacendado pelada,Paquete 0.2 kg,2.7,13.5,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
384,Almendra natural Hacendado,Paquete 0.2 kg,2.3,11.5,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
385,Pistacho tostado Hacendado con sal,Paquete 0.25 kg,3.55,14.2,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
386,Pipas girasol tostadas Hacendado gigante con sal,Paquete 0.2 kg,1.2,6.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
387,Pistacho tostado Hacendado 0% sal añadida,Paquete 0.25 kg,3.55,14.2,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
388,Cacahuete frito con sal Hacendado pelado,Paquete 0.25 kg,1.0,4.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
389,Pipas girasol tostadas Hacendado gigante aguasal,Paquete 0.2 kg,1.2,6.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
390,Anacardo natural Hacendado,Paquete 0.2 kg,2.4,12.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
391,Palomitas de maíz con sal Hacendado para microondas,Paquete 0.27 kg,1.0,3.704,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37
392,Anacardo frito salado Hacendado,Paquete 0.2 kg,2.4,12.0,kg,Frutos secos,Aperitivos,Frutos secos y fruta desecada,False,37


In [5599]:
def clasificar_category_aperitivos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|aperitivos':
        if 'aceitunas verdes' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas verdes')
        elif 'aceitunas negras' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Aceitunas negras')
        elif 'abanderillas' in name or 'mix' in name or 'cóctel' in name:
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Cóctel y banderillas')
        elif any(x in name for x in ['pepinillos', 'piparra', 'jalapeños', 'berenjenas', 'alcaparrones', 'ajos', 'guindillas', 'cebollitas', 'picadillo', 'cebolla', 'alcaparras']):
            return ('Aperitivos', 'Aceitunas y encurtidos', 'Pepinillos y otros encurtidos')
        elif 'patata' in name or 'patatas' in name or 'patatinas' in name:
            return ('Aperitivos', 'Patatas fritas y snacks', 'Patatas fritas')
        elif 'dátiles' in name or 'pasas' in name or 'desecados' in name or 'deshidratado' in name or 'albaricoque' in name or 'arándanos' in name:
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Fruta desecada')
        elif 'cocktail' in name or 'combinado' in name and ('fruta' in name or 'tostado' in name or 'frutos' in name or 'semillas' in name):
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Cocktails')
        elif any(x in name for x in ['avellana', 'almendra ', 'nuez', 'cacahuete', 'pistacho', 'semillas ', 'pipas', 'anacardo', 'nueces', 'palomitas', 'frutos secos']):
            return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')
        else:
            return ('Aperitivos', 'Patatas fritas y snacks', 'Snacks')

    return (pd.NA, pd.NA, pd.NA)

In [5600]:
datamarket_update('despensa|aperitivos', clasificar_category_aperitivos)

<class 'pandas.core.frame.DataFrame'>
Index: 31 entries, 221 to 4648
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         31 non-null     int64  
 1   supermarket                31 non-null     object 
 2   brand_category             31 non-null     object 
 3   name                       31 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  30 non-null     object 
 6   trademark_propietary_flag  30 non-null     object 
 7   price                      31 non-null     float64
 8   reference_price            31 non-null     float64
 9   reference_unit             31 non-null     object 
 10  insert_date                31 non-null     object 
 11  price_corrected            31 non-null     bool   
 12  reference_price_corrected  31 non-null     bool   
 13  category_name              31 non-null     object 
 1

### Subcategoría "dieteticos"

In [5601]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'dieteticos']

Unnamed: 0,category,subcategory,subsubcategory
395,despensa,dieteticos,complementos_nutricionales
945,despensa,dieteticos,perdida_de_peso
3641,despensa,dieteticos,semillas


In [5602]:
def clasificar_dieteticos(row):
    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|dieteticos|semillas':
        return ('Aperitivos', 'Frutos secos y fruta desecada', 'Frutos secos')

    if subcat in ['despensa|dieteticos|complementos_nutricionales', 'despensa|dieteticos|perdida_de_peso']:
        return ('Fitoterapia y parafarmacia', 'Fitoterapia', 'Fitoterapia')

    return (pd.NA, pd.NA, pd.NA)

In [5603]:
datamarket_update('despensa|dieteticos|semillas', clasificar_dieteticos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3641 to 3641
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5604]:
datamarket_update('despensa|dieteticos|complementos_nutricionales', clasificar_dieteticos)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 395 to 4994
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  10 non-null     object 
 6   trademark_propietary_flag  10 non-null     object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5605]:
datamarket_update('despensa|dieteticos|perdida_de_peso', clasificar_dieteticos)

<class 'pandas.core.frame.DataFrame'>
Index: 13 entries, 945 to 4561
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         13 non-null     int64  
 1   supermarket                13 non-null     object 
 2   brand_category             13 non-null     object 
 3   name                       13 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  13 non-null     object 
 6   trademark_propietary_flag  13 non-null     object 
 7   price                      13 non-null     float64
 8   reference_price            13 non-null     float64
 9   reference_unit             13 non-null     object 
 10  insert_date                13 non-null     object 
 11  price_corrected            13 non-null     bool   
 12  reference_price_corrected  13 non-null     bool   
 13  category_name              13 non-null     object 
 1

### Subcategoría "cocina_internacional"

In [5606]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'cocina_internacional']

Unnamed: 0,category,subcategory,subsubcategory
1066,despensa,cocina_internacional,mejicana
1289,despensa,cocina_internacional,otras
3309,despensa,cocina_internacional,oriental


In [5607]:
df_mercadona[df_mercadona['name'].str.contains('curry', case=False, na=False)].tail(20)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
62,Curry Hacendado,Bote 0.057 kg,1.3,22.808,kg,Otras especias,"Aceite, especias y salsas",Especias,False,8
116,Salsa curry Tikka Masala Hacendado picante,Paquete 0.18 kg,2.0,11.112,kg,Salsas para carnes,"Aceite, especias y salsas",Otras salsas,False,15
4534,Pollo al curry Hacendado con arroz basmati,Bandeja 0.28 kg,2.85,10.179,kg,Arroz,Pizzas y platos preparados,Platos preparados calientes,False,412


In [5608]:
df_datamarket[df_datamarket['brand_category'] == 'despensa|cocina_internacional|otras']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
1289,25862828,dia.es,despensa|cocina_internacional|otras,MAGGI fideos fusian India curry sobre 118 gr,,maggi,False,2.09,17.71,kg,2023-03-15,False,False,,,


In [5609]:
def clasificar_intern(row):
    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat in ['despensa|cocina_internacional|oriental', 'despensa|cocina_internacional|otras']:
        if 'soja' in name or 'teriyaki' in name or 'agridulce' in name or 'chili' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas orientales')
        elif 'curry' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Salsas para carnes')
        else:
            return ('Arroz, legumbres y pasta', 'Pasta y fideos', 'Fideos orientales')

    if subcat == 'despensa|cocina_internacional|mejicana':
        if 'salsa' in name or 'guacamole' in name:
            return ('Aceite, especias y salsas', 'Otras salsas', 'Otras salsas')
        elif 'sazonador' in name:
            return ('Aceite, especias y salsas', 'Especias', 'Sazonadores')
        else:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')

    return (pd.NA, pd.NA, pd.NA)

In [5610]:
datamarket_update('despensa|cocina_internacional|oriental', clasificar_intern)

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 3309 to 4728
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         9 non-null      int64  
 1   supermarket                9 non-null      object 
 2   brand_category             9 non-null      object 
 3   name                       9 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      9 non-null      float64
 8   reference_price            9 non-null      float64
 9   reference_unit             9 non-null      object 
 10  insert_date                9 non-null      object 
 11  price_corrected            9 non-null      bool   
 12  reference_price_corrected  9 non-null      bool   
 13  category_name              9 non-null      object 
 1

In [5611]:
datamarket_update('despensa|cocina_internacional|otras', clasificar_intern)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 1289 to 1289
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5612]:
datamarket_update('despensa|cocina_internacional|mejicana', clasificar_intern)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 1066 to 3660
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 1

### Subcategoría "pates_y_foies"

In [5613]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pates_y_foies']

Unnamed: 0,category,subcategory,subsubcategory
1359,despensa,pates_y_foies,


In [5614]:
df_mercadona[df_mercadona['name'].str.contains('foie', case=False, na=False)].tail(20)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1191,Foie gras de pato fresco,Paquete 0.135 kg,9.45,70.0,kg,Pavo y otras aves,Carne,Aves y pollo,False,127
1646,Foie gras entero de pato Hacendado,Paquete 0.1 kg,8.3,83.0,kg,Paté,Charcutería y quesos,Paté y sobrasada,False,157
1647,Bloc foie gras de pato Hacendado,Paquete 0.1 kg,5.25,52.5,kg,Paté,Charcutería y quesos,Paté y sobrasada,False,157


In [5615]:
def clasificar_foie(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'despensa|pates_y_foies':
        if 'sobrasada' in name:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Sobrasada')
        else:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Paté')


    return (pd.NA, pd.NA, pd.NA)

In [5616]:
datamarket_update('despensa|pates_y_foies', clasificar_foie)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1359 to 4066
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

# Procesamiento de la categoría "productos_frescos"

In [5617]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'productos_frescos']

Unnamed: 0,category,subcategory,subsubcategory
1,productos_frescos,platos_preparados,fritos
43,productos_frescos,carniceria,cordero
59,productos_frescos,platos_preparados,platos_de_verdura
103,productos_frescos,pescaderia,bacalao_y_salazones
105,productos_frescos,panaderia_tradicional,pasteleria_y_reposteria
...,...,...,...
3575,productos_frescos,pescaderia,preparados_y_elaborados_de_pescado_y_marisco
4018,productos_frescos,frutas,fruta_de_temporada
4202,productos_frescos,platos_preparados,cocina_internacional
4212,productos_frescos,quesos,azules_y_roquefort


### Subcategoría "carniceria"

In [5618]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'carniceria']

Unnamed: 0,category,subcategory,subsubcategory
43,productos_frescos,carniceria,cordero
115,productos_frescos,carniceria,aves_y_pollo
364,productos_frescos,carniceria,empanados
474,productos_frescos,carniceria,embutidos_frescos
821,productos_frescos,carniceria,hamburguesas
985,productos_frescos,carniceria,otras_carnes
1260,productos_frescos,carniceria,vacuno
1593,productos_frescos,carniceria,carne_picada
2392,productos_frescos,carniceria,cerdo
2825,productos_frescos,carniceria,conejo


In [5619]:
current_category = df_category[df_category["category_name"] == 'Carne']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
125,Carne,Arreglos,Arreglos,126
126,Carne,Aves y pollo,Pavo y otras aves,127
127,Carne,Aves y pollo,Pollo,128
128,Carne,Carne congelada,Carne congelada,129
129,Carne,Cerdo,Cerdo,130
130,Carne,Conejo y cordero,Conejo,131
131,Carne,Conejo y cordero,Cordero,132
132,Carne,Embutido,Embutido,133
133,Carne,Hamburguesas y picadas,Hamburguesas,134
134,Carne,Hamburguesas y picadas,Picadas y otros,135


In [5620]:
def clasificar_category_carne(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|carniceria|aves_y_pollo':
        if 'pollo' in name:
            return ('Carne', 'Aves y pollo', 'Pollo')
        else:
            return ('Carne', 'Aves y pollo', 'Pavo y otras aves')

    if subcat == 'productos_frescos|carniceria|vacuno':
        return ('Carne', 'Vacuno', 'Vacuno')

    if subcat == 'productos_frescos|carniceria|cerdo':
        return ('Carne', 'Cerdo', 'Cerdo')

    if subcat == 'productos_frescos|carniceria|preparados_y_arreglos_de_carne':
        return ('Carne', 'Arreglos', 'Arreglos')

    if subcat == 'productos_frescos|carniceria|embutidos_frescos':
        return ('Carne', 'Embutido', 'Embutido')

    if subcat == 'productos_frescos|carniceria|conejo':
        return ('Carne', 'Conejo y cordero', 'Conejo')

    if subcat == 'productos_frescos|carniceria|cordero':
        return ('Carne', 'Conejo y cordero', 'Cordero')

    if subcat == 'productos_frescos|carniceria|carne_picada':
        return ('Carne', 'Hamburguesas y picadas', 'Picadas y otros')

    if subcat == 'productos_frescos|carniceria|hamburguesas':
        return ('Carne', 'Hamburguesas y picadas', 'Hamburguesas')

    if subcat in ['productos_frescos|carniceria|otras_carnes', 'productos_frescos|carniceria|vacuno',
                  'productos_frescos|carniceria|serdo', 'productos_frescos|carniceria|conejo',
                  'productos_frescos|carniceria|cordero', 'productos_frescos|carniceria|preparados_y_arreglos_de_carne']:
       if 'congelado' in name or 'congelada' in name:
           return ('Carne', 'Carne congelada', 'Carne congelada')
       else:
           return ('Carne', 'Hamburguesas y picadas', 'Picadas y otros')

    if subcat == 'productos_frescos|carniceria|empanados':
        if 'congelado' in name:
            return ('Carne', 'Empanados y elaborados', 'Empanados y rebozados congelados')
        else:
            return ('Carne', 'Empanados y elaborados', 'Empanados y elaborados')


    return (pd.NA, pd.NA, pd.NA)

In [5621]:
datamarket_update('productos_frescos|carniceria|empanados', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 364 to 364
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 14 

In [5622]:
datamarket_update('productos_frescos|carniceria|carne_picada', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1593 to 2803
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5623]:
datamarket_update('productos_frescos|carniceria|hamburguesas', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 821 to 4029
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5624]:
datamarket_update('productos_frescos|carniceria|aves_y_pollo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 115 to 2635
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5625]:
datamarket_update('productos_frescos|carniceria|vacuno', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1260 to 4500
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

In [5626]:
datamarket_update('productos_frescos|carniceria|cerdo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2392 to 2392
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5627]:
datamarket_update('productos_frescos|carniceria|preparados_y_arreglos_de_carne', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3344 to 3344
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5628]:
datamarket_update('productos_frescos|carniceria|embutidos_frescos', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 474 to 2011
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5629]:
datamarket_update('productos_frescos|carniceria|conejo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2825 to 3951
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5630]:
datamarket_update('productos_frescos|carniceria|cordero', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 43 to 1872
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14 

In [5631]:
datamarket_update('productos_frescos|carniceria|otras_carnes', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 985 to 4013
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

### Subcategoría "pescaderia"

In [5632]:
current_category = df_category[df_category["category_name"] == 'Marisco y pescado']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
349,Marisco y pescado,Marisco,Marisco,350
350,Marisco y pescado,Marisco,Marisco de concha,351
351,Marisco y pescado,Marisco,Surimi y otros,352
352,Marisco y pescado,Pescado congelado,Pescado congelado,353
353,Marisco y pescado,Pescado congelado,Pescado rebozado congelado,354
354,Marisco y pescado,Pescado congelado,"Sepia, pulpo y calamar congelado",355
355,Marisco y pescado,Pescado fresco,Salmón,356
356,Marisco y pescado,Pescado fresco,Dorada,357
357,Marisco y pescado,Pescado fresco,Lubina,358
358,Marisco y pescado,Pescado fresco,Merluza,359


In [5633]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pescaderia']

Unnamed: 0,category,subcategory,subsubcategory
103,productos_frescos,pescaderia,bacalao_y_salazones
850,productos_frescos,pescaderia,marisco_almejas_y_mejillones
897,productos_frescos,pescaderia,ahumados_y_huevas
1246,productos_frescos,pescaderia,pulpo_calamar_y_sepia
1370,productos_frescos,pescaderia,gulas_surimi_elaborados
2594,productos_frescos,pescaderia,pescado_fresco
3575,productos_frescos,pescaderia,preparados_y_elaborados_de_pescado_y_marisco


In [5634]:
df_mercadona[df_mercadona['name'].str.contains('surimi', case=False, na=False)].head(20)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1892,Palitos de surimi Hacendado ultracongelados,Paquete 0.6 kg,2.5,4.167,kg,Marisco de concha y otros,Congelados,Marisco,False,178
1956,Aritos de surimi a la romana Hacendado ultracongelados,Paquete 0.6 kg,2.95,4.917,kg,Pescado rebozado,Congelados,Rebozados,False,183
1963,Muslitos de surimi Hacendado ultracongelados,Paquete 0.45 kg,2.6,5.778,kg,Pescado rebozado,Congelados,Rebozados,False,183
3942,Palitos de surimi Hacendado,Paquete 0.46 kg,1.99,4.327,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3943,Palitos de surimi Hacendado ultracongelados,Paquete 0.6 kg,2.5,4.167,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3944,Muslitos de surimi Hacendado ultracongelados,Paquete 0.45 kg,2.6,5.778,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3946,Delicias del mar de surimi Hacendado,Bandeja 0.25 kg,1.99,7.96,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3972,Aritos de surimi a la romana Hacendado ultracongelados,Paquete 0.6 kg,2.95,4.917,kg,Pescado rebozado congelado,Marisco y pescado,Pescado congelado,False,354


In [5635]:
df_datamarket[df_datamarket['brand_category'] == 'productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
3575,25854581,carrefour.es,productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco,Preparado de paella de marisco congelado 600 g,,otras marcas,False,4.7,7.83,kg,2023-03-15,False,False,,,
4377,25854578,carrefour.es,productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco,Preparado de marisco para paella Carrefour 400 g.,,carrefour,True,2.35,5.87,kg,2023-03-15,False,False,,,
4879,25854584,carrefour.es,productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco,Mejillón tigre relleno congelado Carrefour 400 g,,carrefour,True,3.95,9.87,kg,2023-03-15,False,False,,,


In [5636]:
def clasificar_category_pescado(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|pescaderia|pescado_fresco':
        if 'salmón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Salmón')
        elif 'dorada' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Dorada')
        elif 'lubina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lubina')
        elif 'merluza' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Merluza')
        elif 'bacalao' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Bacalao')
        elif 'corvina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Corvina')
        elif 'trucha' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Trucha')
        elif 'lenguado' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lenguado')
        elif 'boquerón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Boquerón')
        elif 'rodaballo' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Rodaballo')
        elif 'sardina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Sardina')
        elif 'caballa' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Caballa')
        elif any(x in name for x in ['pulpo', 'sepia', 'calamar', 'potón', 'pota']):
            return ('Marisco y pescado', 'Pescado fresco', 'Sepia, pulpo y calamar')
        else:
            return ('Marisco y pescado', 'Pescado fresco', 'Otros')

    if subcat == 'productos_frescos|pescaderia|pulpo_calamar_y_sepia':
        return ('Marisco y pescado', 'Pescado fresco', 'Sepia, pulpo y calamar')

    if subcat == 'productos_frescos|pescaderia|marisco_almejas_y_mejillones':
        if any(x in name for x in ['mejillón', 'almeja', 'berberechos', 'navajas', 'chirla', 'cañaílla']):
            return ('Marisco y pescado', 'Marisco', 'Marisco de concha')
        else:
            return ('Marisco y pescado', 'Marisco', 'Marisco')

    if subcat == 'productos_frescos|pescaderia|ahumados_y_huevas':
        return ('Marisco y pescado', 'Salazones y ahumados', 'Ahumados')


    if subcat == 'productos_frescos|pescaderia|bacalao_y_salazones':
        return ('Marisco y pescado', 'Salazones y ahumados', 'Salazones')

    if subcat == 'productos_frescos|pescaderia|gulas_surimi_elaborados':
        return ('Marisco y pescado', 'Marisco', 'Surimi y otros')

    if subcat == 'productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco':
        return ('Marisco y pescado', 'Pescado congelado', 'Pescado rebozado congelado')

    return (pd.NA, pd.NA, pd.NA)

In [5637]:
datamarket_update('productos_frescos|pescaderia|preparados_y_elaborados_de_pescado_y_marisco', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 3575 to 4879
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5638]:
datamarket_update('productos_frescos|pescaderia|gulas_surimi_elaborados', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 1370 to 4167
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 1

In [5639]:
datamarket_update('productos_frescos|pescaderia|bacalao_y_salazones', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 103 to 2737
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5640]:
datamarket_update('productos_frescos|pescaderia|ahumados_y_huevas', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 897 to 4122
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5641]:
datamarket_update('productos_frescos|pescaderia|marisco_almejas_y_mejillones', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 850 to 4698
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5642]:
datamarket_update('productos_frescos|pescaderia|pulpo_calamar_y_sepia', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1246 to 4795
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5643]:
datamarket_update('productos_frescos|pescaderia|pescado_fresco', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2594 to 4671
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

### Subcategoría "panaderia_tradicional"

In [5644]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'panaderia_tradicional']

Unnamed: 0,category,subcategory,subsubcategory
105,productos_frescos,panaderia_tradicional,pasteleria_y_reposteria
665,productos_frescos,panaderia_tradicional,bolleria_tradicional
1308,productos_frescos,panaderia_tradicional,pan_tradicional
1755,productos_frescos,panaderia_tradicional,empanadas_y_hojaldres


In [5645]:
df_mercadona[df_mercadona['subcategory_2_nivel_name'] == 'Pan de bocadillo']

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
4299,Panecillo de queso 18%,None 0.085 kg,0.47,5.53,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4300,4 Panes chapata de cristal,None 0.284 kg,1.32,4.648,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4301,Pan mini semillas,None 0.09 kg,0.33,3.667,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4302,5 Barras de pan 51% integral,None 0.625 kg,1.36,2.176,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4303,Panecillo harina integral 50% de trigo sin sal añadida,None 0.08 kg,0.33,4.125,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4304,6 Panes de leche 3%,Pack-6 0.48 kg,1.51,3.146,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4305,6 Panes pulguitas sin aditivos,Pack-6 0.24 kg,1.04,4.334,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4306,Panecillo de trigo integral 30%,None 0.07 kg,0.37,5.286,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4307,6 Panes de centeno 51%,Pack-6 0.47 kg,1.47,3.162,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390
4308,11 Panecillos,None 0.495 kg,1.14,2.304,kg,Pan de bocadillo,Panadería y pastelería,Pan de horno,False,390


In [5646]:
df_mercadona[df_mercadona['name'].str.contains('empanada', case=False, na=False)].head(20)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1208,Filetes pechuga de pollo marinadas empanadas sin gluten,Bandeja 0.36 kg,2.81,7.8,kg,Pollo,Carne,Aves y pollo,False,128
1227,Lagrimitas de pollo al limón empanadas sin gluten,Bandeja 0.34 kg,2.75,8.1,kg,Pollo,Carne,Aves y pollo,False,128
1357,Filetes pechuga de pollo marinadas empanadas sin gluten,Bandeja 0.36 kg,2.81,7.8,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1366,Lagrimitas de pollo al limón empanadas sin gluten,Bandeja 0.34 kg,2.75,8.1,kg,Empanados y elaborados,Carne,Empanados y elaborados,False,138
1957,Merluza empanada pan fino Hacendado ultracongelada,Paquete 0.51 kg,3.95,7.746,kg,Pescado rebozado,Congelados,Rebozados,False,183
1959,Varitas de merluza empanadas Hacendado ultracongeladas,Paquete 0.4 kg,3.95,9.875,kg,Pescado rebozado,Congelados,Rebozados,False,183
1961,Rabas empanadas Hacendado ultracongeladas,Paquete 0.5 kg,4.25,8.5,kg,Pescado rebozado,Congelados,Rebozados,False,183
1965,Figuritas de merluza empanadas Hacendado ultracongeladas,Paquete 0.5 kg,3.55,7.1,kg,Pescado rebozado,Congelados,Rebozados,False,183
3973,Merluza empanada pan fino Hacendado ultracongelada,Paquete 0.51 kg,3.95,7.746,kg,Pescado rebozado congelado,Marisco y pescado,Pescado congelado,False,354
3975,Varitas de merluza empanadas Hacendado ultracongeladas,Paquete 0.4 kg,3.95,9.875,kg,Pescado rebozado congelado,Marisco y pescado,Pescado congelado,False,354


In [5647]:
def clasificar_category_panes(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|panaderia_tradicional|pan_tradicional':
        if 'rebanado' in name or 'rebanada' in name or 'rebanadas' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Pan rebanado')
        elif 'barra' in name or 'baguette' in name or 'barras' in name:
            return ('Panadería y pastelería', 'Pan de horno', 'Barra de pan')
        elif 'de molde' in name:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de molde')
        elif any(x in name for x in ['tortillas', 'hot dog', 'hamburguesa', 'pita', 'piadinas', 'bocados']):
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Pan de hamburguesa y wrap')
        elif 'tostado' in name:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan tostado')
        elif 'rallado' in name:
            return ('Panadería y pastelería', 'Pan tostado y rallado', 'Pan rallado')
        else:
            return ('Panadería y pastelería', 'Pan de molde y otras especialidades', 'Otros panes')

    if subcat in ['productos_frescos|panaderia_tradicional|bolleria_tradicional', 'productos_frescos|panaderia_tradicional|empanadas_y_hojaldres'] :
        if any(x in name for x in ['empanadillas', 'saladas', 'empanada']):
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería salada')
        elif any(x in name for x in ['paquete', 'pack', 'bolsa']):
            return ('Panadería y pastelería', 'Bollería envasada', 'Bollería envasada')
        elif 'surtido' in name:
            return ('Panadería y pastelería', 'Bollería envasada', 'Pastelitos surtidos')
        else:
            return ('Panadería y pastelería', 'Bollería de horno', 'Bollería dulce')

    if subcat == 'productos_frescos|panaderia_tradicional|pasteleria_y_reposteria':
        if 'infantil' in name:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas infantiles')
        else:
            return ('Panadería y pastelería', 'Tartas y pasteles', 'Tartas')


    return (pd.NA, pd.NA, pd.NA)

In [5648]:
datamarket_update('productos_frescos|panaderia_tradicional|pan_tradicional', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1308 to 3633
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5649]:
datamarket_update('productos_frescos|panaderia_tradicional|bolleria_tradicional', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 665 to 4247
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5650]:
datamarket_update('productos_frescos|panaderia_tradicional|empanadas_y_hojaldres', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1755 to 3949
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5651]:
datamarket_update('productos_frescos|panaderia_tradicional|pasteleria_y_reposteria', clasificar_category_panes)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 105 to 4749
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

### Subcategoría "frutas"

In [5652]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'frutas']

Unnamed: 0,category,subcategory,subsubcategory
135,productos_frescos,frutas,manzanas_y_peras
202,frescos,frutas,citricos
337,frescos,frutas,platanos
611,frescos,frutas,frutas_tropicales
783,productos_frescos,frutas,platanos_y_bananas
1325,productos_frescos,frutas,pinas_kiwis_aguacates_y_tropicales
1716,frescos,frutas,peras
1732,productos_frescos,frutas,frutos_del_bosque
2505,productos_frescos,frutas,naranjas_y_otros_citricos
3319,frescos,frutas,manzanas


In [5653]:
def clasificar_fruta(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|frutas|naranjas_y_otros_citricos':
        return ('Fruta y verdura', 'Fruta', 'Cítricos')

    if subcat == 'productos_frescos|frutas|pinas_kiwis_aguacates_y_tropicales':
        return ('Fruta y verdura', 'Fruta', 'Fruta tropical')

    if subcat == 'productos_frescos|frutas|manzanas_y_peras':
        return ('Fruta y verdura', 'Fruta', 'Manzana y pera')

    if subcat in ['productos_frescos|frutas|platanos_y_bananas', 'productos_frescos|frutas|uvas']:
        return ('Fruta y verdura', 'Fruta', 'Plátano y uva')

    if subcat == 'productos_frescos|frutas|frutos_del_bosque':
        return ('Fruta y verdura', 'Fruta', 'Otras frutas')

    if subcat == 'productos_frescos|frutas|fruta_de_temporada':
        if 'melón' in name or 'sandía' in name:
            return ('Fruta y verdura', 'Fruta', 'Melón y sandía')
        else:
            return ('Fruta y verdura', 'Fruta', 'Otras frutas')


    return (pd.NA, pd.NA, pd.NA)

In [5654]:
datamarket_update('productos_frescos|frutas|naranjas_y_otros_citricos', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2505 to 4273
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5655]:
datamarket_update('productos_frescos|frutas|fruta_de_temporada', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4018 to 4018
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5656]:
datamarket_update('productos_frescos|frutas|frutos_del_bosque', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1732 to 2852
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5657]:
datamarket_update('productos_frescos|frutas|uvas', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3390 to 4184
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5658]:
datamarket_update('productos_frescos|frutas|platanos_y_bananas', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 783 to 3028
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5659]:
datamarket_update('productos_frescos|frutas|manzanas_y_peras', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 135 to 4409
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5660]:
datamarket_update('productos_frescos|frutas|pinas_kiwis_aguacates_y_tropicales', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 1325 to 4509
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 1

### Subcategoría "verduras_y_hortalizas"

In [5661]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'verduras_y_hortalizas']

Unnamed: 0,category,subcategory,subsubcategory
29,congelados,verduras_y_hortalizas,
34,frescos,verduras_y_hortalizas,tomates_pimientos_y_pepinos
51,frescos,verduras_y_hortalizas,verduras_y_ensaladas_preparadas
147,productos_frescos,verduras_y_hortalizas,hortalizas
269,productos_frescos,verduras_y_hortalizas,hierbas_aromaticas_y_especias
751,frescos,verduras_y_hortalizas,otras_verduras
832,frescos,verduras_y_hortalizas,ajos_cebollas_y_puerros
935,frescos,verduras_y_hortalizas,patatas_y_zanahorias
937,productos_frescos,verduras_y_hortalizas,ensaladas_y_verduras_preparadas
952,productos_frescos,verduras_y_hortalizas,setas_y_hongos


In [5662]:
def clasificar_verdura(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|verduras_y_hortalizas|setas_y_hongos':
        return ('Fruta y verdura', 'Verdura', 'Setas y champiñones')

    if subcat == 'productos_frescos|verduras_y_hortalizas|ensaladas_y_verduras_preparadas':
        return ('Fruta y verdura', 'Lechuga y ensalada preparada', 'Ensalada preparada')

    if subcat == 'productos_frescos|verduras_y_hortalizas|patatas_cebollas_y_ajos':
        if 'patata' in name:
            return ('Fruta y verdura', 'Verdura', 'Patata')
        else:
            return ('Fruta y verdura', 'Verdura', 'Cebolla y ajo')

    if subcat == 'productos_frescos|verduras_y_hortalizas|tomates_y_pepinos':
        if 'tomate' in name or 'tomates' in name:
            return ('Fruta y verdura', 'Verdura', 'Tomate')
        else:
            return ('Fruta y verdura', 'Verdura', 'Pepino y zanahoria')

    if subcat == 'productos_frescos|verduras_y_hortalizas|hierbas_aromaticas_y_especias':
        return ('Fruta y verdura', 'Verdura', 'Hierbas aromáticas')

    if subcat == 'productos_frescos|verduras_y_hortalizas|hortalizas':
        if 'patata' in name:
            return ('Fruta y verdura', 'Verdura', 'Patata')
        elif 'zanahoria' in name or 'pepino' in name:
            return ('Fruta y verdura', 'Verdura', 'Pepino y zanahoria')
        elif 'tomate' in name:
            return ('Fruta y verdura', 'Verdura', 'Tomate')
        elif 'calabacín' in name or 'pimiento' in name:
            return ('Fruta y verdura', 'Verdura', 'Calabacín y pimiento')
        elif any(x in name for x in ['brócoli', 'coliflores', 'col', 'coliflor', 'repollo']):
            return ('Fruta y verdura', 'Verdura', 'Repollo y col')
        elif any(x in name for x in ['ajo', 'cebolla', 'puerro']):
            return ('Fruta y verdura', 'Verdura', 'Cebolla y ajo')
        elif 'setas' in name or 'champiñones' in name:
            return ('Fruta y verdura', 'Verdura', 'Setas y champiñones')
        elif any(x in name for x in ['perejil', 'jengibre', 'cilantro', 'albahaca', 'hierbabuena', 'cebollino']):
            return ('Fruta y verdura', 'Verdura', 'Hierbas aromáticas')
        elif 'al vapor' in name:
            return ('Fruta y verdura', 'Verdura', 'Verduras al vapor')
        else:
            return ('Fruta y verdura', 'Verdura', 'Otras verduras y hortalizas')


    return (pd.NA, pd.NA, pd.NA)

In [5663]:
datamarket_update('productos_frescos|verduras_y_hortalizas|setas_y_hongos', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 952 to 4044
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5664]:
datamarket_update('productos_frescos|verduras_y_hortalizas|ensaladas_y_verduras_preparadas', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 937 to 4545
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5665]:
datamarket_update('productos_frescos|verduras_y_hortalizas|patatas_cebollas_y_ajos', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1279 to 4740
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5666]:
datamarket_update('productos_frescos|verduras_y_hortalizas|tomates_y_pepinos', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2495 to 3766
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5667]:
datamarket_update('productos_frescos|verduras_y_hortalizas|hierbas_aromaticas_y_especias', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 269 to 4923
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5668]:
datamarket_update('productos_frescos|verduras_y_hortalizas|hortalizas', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 147 to 2403
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

### Subcategoría "charcuteria_y_quesos_al_corte"

In [5669]:
current_category = df_category[df_category["category_name"] == 'Charcutería y quesos']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
146,Charcutería y quesos,Aves y jamón cocido,Pavo y otros,147
147,Charcutería y quesos,Aves y jamón cocido,Jamón cocido,148
148,Charcutería y quesos,Bacón y salchichas,Bacón,149
149,Charcutería y quesos,Bacón y salchichas,Salchichas,150
150,Charcutería y quesos,Chopped y mortadela,Chopped,151
151,Charcutería y quesos,Chopped y mortadela,Mortadela,152
152,Charcutería y quesos,Embutido curado,Salchichón,153
153,Charcutería y quesos,Embutido curado,Chorizo,154
154,Charcutería y quesos,Embutido curado,Lomo y otros,155
155,Charcutería y quesos,Jamón serrano,Jamón serrano,156


In [5670]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'charcuteria_y_quesos_al_corte']

Unnamed: 0,category,subcategory,subsubcategory
2292,productos_frescos,charcuteria_y_quesos_al_corte,quesos_internacionales
2521,productos_frescos,charcuteria_y_quesos_al_corte,quesos_nacionales


In [5671]:
df_datamarket[df_datamarket['brand_category'] == 'productos_frescos|charcuteria_y_quesos_al_corte|quesos_internacionales']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
2292,25855115,carrefour.es,productos_frescos|charcuteria_y_quesos_al_corte|quesos_internacionales,Queso parmesano reggiano Parmareggio 150 g.,,otras marcas,False,5.35,35.67,kg,2023-03-15,False,False,,,


In [5672]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'quesos']

Unnamed: 0,category,subcategory,subsubcategory
179,productos_frescos,quesos,gouda_y_emmental
211,productos_frescos,quesos,curado
366,productos_frescos,quesos,rallados
485,productos_frescos,quesos,fundidos
513,productos_frescos,quesos,queso_fresco
812,productos_frescos,quesos,otros_internacionales
852,productos_frescos,quesos,anejo
1033,productos_frescos,quesos,brie_y_camembert
1636,productos_frescos,quesos,crema_de_queso
2751,productos_frescos,quesos,tierno


In [5673]:
def clasificar_quesos(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|quesos|queso_fresco':
        return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso fresco')

    if subcat in ['productos_frescos|quesos|azules_y_roquefort', 'productos_frescos|quesos|brie_y_camembert']:
        return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso roquefort, camembert y cabra')

    if subcat in ['productos_frescos|quesos|crema_de_queso', 'productos_frescos|quesos|fundidos']:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso untable')

    if subcat in ['productos_frescos|quesos|anejo', 'productos_frescos|quesos|curado']:
        return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso curado')

    if subcat == 'productos_frescos|quesos|semicurado':
        return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso semicurado')

    if subcat in ['productos_frescos|quesos|tierno', 'productos_frescos|quesos|gouda_y_emmental']:
        return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso tierno')


    if subcat == 'productos_frescos|quesos|rallados':
        return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso rallado')

    if subcat in ['productos_frescos|charcuteria_y_quesos_al_corte|quesos_internacionales','productos_frescos|quesos|con_denominacion_origen',
                  'productos_frescos|charcuteria_y_quesos_al_corte|quesos_nacionales', 'productos_frescos|quesos|otros_internacionales']:
        if 'añejo' in name or 'curado' in name or 'viejo' in name or'grana padano' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso curado')
        elif 'semicurado' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso semicurado')
        else:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso tierno')

    if subcat in ['productos_frescos|quesos|gouda_y_emmental', 'productos_frescos|quesos|tierno',
                  'productos_frescos|quesos|semicurado', 'productos_frescos|quesos|anejo',
                  'productos_frescos|charcuteria_y_quesos_al_corte|quesos_internacionales','productos_frescos|quesos|con_denominacion_origen',
                  'productos_frescos|charcuteria_y_quesos_al_corte|quesos_nacionales', 'productos_frescos|quesos|otros_internacionales']:
        if 'lonchas' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso lonchas')
        elif 'rallado' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso rallado')
        elif 'porciones' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso en porciones')

    return (pd.NA, pd.NA, pd.NA)

In [5674]:
datamarket_update('productos_frescos|quesos|queso_fresco', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 513 to 4137
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5675]:
datamarket_update('productos_frescos|quesos|azules_y_roquefort', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4212 to 4212
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5676]:
datamarket_update('productos_frescos|quesos|brie_y_camembert', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1033 to 4645
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5677]:
datamarket_update('productos_frescos|quesos|crema_de_queso', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1636 to 2239
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5678]:
datamarket_update('productos_frescos|quesos|fundidos', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 485 to 3669
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5679]:
datamarket_update('productos_frescos|quesos|anejo', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 852 to 4619
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5680]:
datamarket_update('productos_frescos|quesos|semicurado', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2872 to 2872
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5681]:
datamarket_update('productos_frescos|quesos|tierno', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 2751 to 4262
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5682]:
datamarket_update('productos_frescos|quesos|rallados', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 366 to 4053
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5683]:
datamarket_update('productos_frescos|charcuteria_y_quesos_al_corte|quesos_internacionales', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2292 to 2292
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5684]:
datamarket_update('productos_frescos|quesos|con_denominacion_origen', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3034 to 3393
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5685]:
datamarket_update('productos_frescos|charcuteria_y_quesos_al_corte|quesos_nacionales', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2521 to 2521
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5686]:
datamarket_update('productos_frescos|quesos|otros_internacionales', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 812 to 3546
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5687]:
datamarket_update('productos_frescos|quesos|gouda_y_emmental', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 179 to 4847
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         6 non-null      int64  
 1   supermarket                6 non-null      object 
 2   brand_category             6 non-null      object 
 3   name                       6 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      6 non-null      float64
 8   reference_price            6 non-null      float64
 9   reference_unit             6 non-null      object 
 10  insert_date                6 non-null      object 
 11  price_corrected            6 non-null      bool   
 12  reference_price_corrected  6 non-null      bool   
 13  category_name              6 non-null      object 
 14

In [5688]:
datamarket_update('productos_frescos|quesos|curado', clasificar_quesos)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 211 to 2262
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

### Subcategoría "charcuteria"

In [5689]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'charcuteria']

Unnamed: 0,category,subcategory,subsubcategory
711,productos_frescos,charcuteria,mortadela_y_chopped
992,productos_frescos,charcuteria,salchichas
1184,productos_frescos,charcuteria,bacon_y_panceta
1191,productos_frescos,charcuteria,foie_pates_y_sobrasadas
1375,productos_frescos,charcuteria,jamon_serrano_e_iberico_pieza
1428,productos_frescos,charcuteria,jamon_cocido_y_lacon
1529,productos_frescos,charcuteria,chorizo_lomo_y_otros
1540,productos_frescos,charcuteria,jamon_serrano_e_iberico_envasado
1585,productos_frescos,charcuteria,salchichon_salami_y_fuet
2195,productos_frescos,charcuteria,fiambre_de_pavo_y_pollo


In [5690]:
df_datamarket[df_datamarket['brand_category'] == 'productos_frescos|charcuteria|tablas_y_surtidos']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
4869,25855103,carrefour.es,productos_frescos|charcuteria|tablas_y_surtidos,Paleta en lonchas bodega Señorio de Ioar 300 g.,,otras marcas,False,4.19,13.97,kg,2023-03-15,False,False,,,


In [5691]:
def clasificar_category_charcuteria(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat in ['productos_frescos|charcuteria|jamon_serrano_e_iberico_pieza', 'productos_frescos|charcuteria|jamon_serrano_e_iberico_envasado',
                  'productos_frescos|charcuteria|tablas_y_surtidos']:
        return ('Charcutería y quesos', 'Jamón serrano', 'Jamón serrano')

    if subcat == 'productos_frescos|charcuteria|jamon_cocido_y_lacon':
        return ('Charcutería y quesos', 'Aves y jamón cocido', 'Jamón cocido')

    if subcat == 'productos_frescos|charcuteria|fiambre_de_pavo_y_pollo':
        return ('Charcutería y quesos', 'Aves y jamón cocido', 'Pavo y otros')

    if subcat == 'productos_frescos|charcuteria|mortadela_y_chopped':
        if 'mortadela' in name or 'galantina' in name:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Mortadela')
        else:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Chopped')

    if subcat == 'productos_frescos|charcuteria|bacon_y_panceta':
        return ('Charcutería y quesos', 'Bacón y salchichas', 'Bacón')

    if subcat == 'productos_frescos|charcuteria|salchichas':
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Salchichas')

    if subcat == 'productos_frescos|charcuteria|salchichon_salami_y_fuet':
        return ('Charcutería y quesos', 'Embutido curado', 'Salchichón')

    if subcat == 'productos_frescos|charcuteria|chorizo_lomo_y_otros':
        if 'chorizo' in name:
            return ('Charcutería y quesos', 'Embutido curado', 'Chorizo')
        else:
            return ('Charcutería y quesos', 'Embutido curado', 'Lomo y otros')

    if subcat == 'productos_frescos|charcuteria|foie_pates_y_sobrasadas':
        if 'sobrasada' in name:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Sobrasada')
        else:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Paté')


    return (pd.NA, pd.NA, pd.NA)

In [5692]:
datamarket_update('productos_frescos|charcuteria|foie_pates_y_sobrasadas', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1191 to 2912
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5693]:
datamarket_update('productos_frescos|charcuteria|chorizo_lomo_y_otros', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1529 to 2608
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5694]:
datamarket_update('productos_frescos|charcuteria|salchichon_salami_y_fuet', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1585 to 4482
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5695]:
datamarket_update('productos_frescos|charcuteria|bacon_y_panceta', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1184 to 1946
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5696]:
datamarket_update('productos_frescos|charcuteria|mortadela_y_chopped', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 711 to 4764
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 14

In [5697]:
datamarket_update('productos_frescos|charcuteria|fiambre_de_pavo_y_pollo', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 2195 to 2195
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5698]:
datamarket_update('productos_frescos|charcuteria|jamon_cocido_y_lacon', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1428 to 4978
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 1

In [5699]:
datamarket_update('productos_frescos|charcuteria|jamon_serrano_e_iberico_pieza', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1375 to 2616
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5700]:
datamarket_update('productos_frescos|charcuteria|tablas_y_surtidos', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4869 to 4869
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5701]:
datamarket_update('productos_frescos|charcuteria|jamon_serrano_e_iberico_envasado', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 1540 to 3688
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         3 non-null      int64  
 1   supermarket                3 non-null      object 
 2   brand_category             3 non-null      object 
 3   name                       3 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  3 non-null      object 
 6   trademark_propietary_flag  3 non-null      object 
 7   price                      3 non-null      float64
 8   reference_price            3 non-null      float64
 9   reference_unit             3 non-null      object 
 10  insert_date                3 non-null      object 
 11  price_corrected            3 non-null      bool   
 12  reference_price_corrected  3 non-null      bool   
 13  category_name              3 non-null      object 
 1

In [5702]:
datamarket_update('productos_frescos|charcuteria|salchichas', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 992 to 4297
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

### Subcategoría "platos_preparados"

In [5703]:
current_category = df_category[df_category["category_name"] == 'Pizzas y platos preparados']
current_category

Unnamed: 0,category_name,subcategory_name,subcategory_2_nivel_name,category_id
404,Pizzas y platos preparados,Listo para Comer,Platos calientes,405
405,Pizzas y platos preparados,Listo para Comer,Platos fríos,406
406,Pizzas y platos preparados,Pizzas,Pizzas refrigeradas,407
407,Pizzas y platos preparados,Pizzas,Pizzas congeladas,408
408,Pizzas y platos preparados,Pizzas,Base de pizza,409
409,Pizzas y platos preparados,Pizzas,"Roscas, quiche y baguettes",410
410,Pizzas y platos preparados,Platos preparados calientes,Pasta,411
411,Pizzas y platos preparados,Platos preparados calientes,Arroz,412
412,Pizzas y platos preparados,Platos preparados calientes,Carne,413
413,Pizzas y platos preparados,Platos preparados calientes,Tortilla,414


In [5704]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'platos_preparados']

Unnamed: 0,category,subcategory,subsubcategory
1,productos_frescos,platos_preparados,fritos
59,productos_frescos,platos_preparados,platos_de_verdura
650,productos_frescos,platos_preparados,platos_de_carne_y_pescado
658,productos_frescos,platos_preparados,ensaladas_gazpachos_y_sandwiches
693,productos_frescos,platos_preparados,arroces
2888,productos_frescos,platos_preparados,pastas_y_pizzas
4202,productos_frescos,platos_preparados,cocina_internacional


In [5705]:
df_mercadona[df_mercadona['name'].str.contains('Espinacas', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
564,Pasta fresca ravioli ricotta y espinacas Hacendado,Bandeja 0.25 kg,1.8,7.2,kg,Pasta rellena,"Arroz, legumbres y pasta",Pasta y fideos,False,49
1317,Burger de pavo y espinacas,Bandeja 0.24 kg,2.2,9.167,kg,Hamburguesas,Carne,Hamburguesas y picadas,False,134
2034,Espinacas a la crema Hacendado ultracongeladas,Paquete 0.45 kg,2.2,4.889,kg,Verdura,Congelados,Verdura,False,189
3119,Espinacas cortadas,Paquete 0.3 kg,1.24,4.134,kg,Otras verduras y hortalizas,Fruta y verdura,Verdura,False,277
4205,Empanadilla de espinacas 26%,Pieza 0.12 kg,1.4,11.667,kg,Bollería salada,Panadería y pastelería,Bollería de horno,False,383
4524,Lasaña de espinacas y requesón Hacendado,Bandeja 0.35 kg,3.0,8.572,kg,Pasta,Pizzas y platos preparados,Platos preparados calientes,False,411
4576,Mini hamburguesas vegetales Hacendado con espinacas y zanahoria,Paquete 0.2 kg,2.4,12.0,kg,Otros,Pizzas y platos preparados,Platos preparados calientes,False,417


In [5706]:
df_datamarket[df_datamarket['brand_category'] == 'productos_frescos|platos_preparados|arroces']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
693,25855474,carrefour.es,productos_frescos|platos_preparados|arroces,Arroz negro Carrefour 350 g,,carrefour,True,2.85,8.14,kg,2023-03-15,False,False,,,
1733,25855478,carrefour.es,productos_frescos|platos_preparados|arroces,Paella 300 g,,otras marcas,False,4.4,14.67,kg,2023-03-15,False,False,,,
2048,25855477,carrefour.es,productos_frescos|platos_preparados|arroces,Paella Valenciana 300 g,,otras marcas,False,2.99,9.97,kg,2023-03-15,False,False,,,
2877,25855473,carrefour.es,productos_frescos|platos_preparados|arroces,Paella mixta 350 g,,otras marcas,False,2.85,8.14,kg,2023-03-15,False,False,,,


In [5707]:
def clasificar_preparados(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'productos_frescos|platos_preparados|ensaladas_gazpachos_y_sandwiches':
        if 'gazpacho' in name or 'salmorejo' in name:
            return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Gazpacho y salmorejo')
    if 'ensalada' in name or 'ensaladilla' in name:
        return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Ensaladilla')
    else:
        return ('Pizzas y platos preparados', 'Platos preparados fríos', 'Sándwich')


    if subcat == 'productos_frescos|platos_preparados|arroces':
        return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Arroz')


    if subcat == 'productos_frescos|platos_preparados|platos_de_carne_y_pescado':
        if 'carne' in name or 'pollo' in name or 'pavo' in name or 'cerdo' in name or 'vacuno' in name:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Carne')
        else:
            return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos calientes')


    if subcat == 'productos_frescos|platos_preparados|pastas_y_pizzas':
        if any(x in name for x in ['pasta oriental', 'noodles orientales', 'yakisoba', 'noodles de arroz', 	'fideos orientales', 	'fideos de arroz']):
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Fideos orientales')
        elif 'pizza' in name and 'refrigerada' in name:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas refrigeradas')
        elif 'pizza' in name and 'congelada' in name:
            return ('Pizzas y platos preparados', 'Pizzas', 'Pizzas congeladas')
        else:
            return ('Pizzas y platos preparados', 'Platos preparados calientes', 'Pasta')


    if subcat in ['productos_frescos|platos_preparados|platos_de_verdura', 'productos_frescos|platos_preparados|arroces',
                  'productos_frescos|platos_preparados|platos_de_carne_y_pescado', 'productos_frescos|platos_preparados|fritos']:
        return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos calientes')


    if subcat in ['productos_frescos|platos_preparados|cocina_internacional', 'productos_frescos|platos_preparados|ensaladas_gazpachos_y_sandwiches']:
        return ('Pizzas y platos preparados', 'Listo para Comer', 'Platos fríos')

    return (pd.NA, pd.NA, pd.NA)

In [5708]:
datamarket_update('productos_frescos|platos_preparados|ensaladas_gazpachos_y_sandwiches', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 658 to 2518
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5709]:
datamarket_update('productos_frescos|platos_preparados|arroces', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 693 to 2877
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5710]:
datamarket_update('productos_frescos|platos_preparados|platos_de_carne_y_pescado', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 650 to 4565
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5711]:
datamarket_update('productos_frescos|platos_preparados|pastas_y_pizzas', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 2888 to 3862
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5712]:
datamarket_update('productos_frescos|platos_preparados|platos_de_verdura', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 59 to 3624
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  5 non-null      object 
 6   trademark_propietary_flag  5 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14 

In [5713]:
datamarket_update('productos_frescos|platos_preparados|fritos', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 1 to 4974
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14  

In [5714]:
datamarket_update('productos_frescos|platos_preparados|cocina_internacional', clasificar_preparados)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4202 to 4202
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

# Procesamiento de la categoría "frescos"

In [5715]:
df_categorias_datamarket[df_categorias_datamarket['category'] == 'frescos']

Unnamed: 0,category,subcategory,subsubcategory
34,frescos,verduras_y_hortalizas,tomates_pimientos_y_pepinos
51,frescos,verduras_y_hortalizas,verduras_y_ensaladas_preparadas
76,frescos,charcuteria_y_quesos,curados
80,frescos,charcuteria_y_quesos,cocidos
202,frescos,frutas,citricos
337,frescos,frutas,platanos
388,frescos,carne,pavo
406,frescos,charcuteria_y_quesos,quesos
432,frescos,pescado_y_marisco,pescado_y_marisco_fresco
455,frescos,carne,pollo


### Subcategoría "frutas"

In [5716]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'frutas']

Unnamed: 0,category,subcategory,subsubcategory
135,productos_frescos,frutas,manzanas_y_peras
202,frescos,frutas,citricos
337,frescos,frutas,platanos
611,frescos,frutas,frutas_tropicales
783,productos_frescos,frutas,platanos_y_bananas
1325,productos_frescos,frutas,pinas_kiwis_aguacates_y_tropicales
1716,frescos,frutas,peras
1732,productos_frescos,frutas,frutos_del_bosque
2505,productos_frescos,frutas,naranjas_y_otros_citricos
3319,frescos,frutas,manzanas


In [5717]:
def clasificar_fruta(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'frescos|frutas|citricos':
        return ('Fruta y verdura', 'Fruta', 'Cítricos')

    if subcat == 'frescos|frutas|frutas_tropicales':
        return ('Fruta y verdura', 'Fruta', 'Fruta tropical')

    if subcat in ['frescos|frutas|manzanas', 'frescos|frutas|peras']:
        return ('Fruta y verdura', 'Fruta', 'Manzana y pera')

    if subcat in ['frescos|frutas|platanos', 'frescos|frutas|uvas']:
        return ('Fruta y verdura', 'Fruta', 'Plátano y uva')

    return (pd.NA, pd.NA, pd.NA)

In [5718]:
datamarket_update('frescos|frutas|citricos', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 202 to 2811
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5719]:
datamarket_update('frescos|frutas|frutas_tropicales', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 611 to 2433
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5720]:
datamarket_update('frescos|frutas|manzanas', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3319 to 3805
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5721]:
datamarket_update('frescos|frutas|peras', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1716 to 4981
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5722]:
datamarket_update('frescos|frutas|platanos', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 337 to 2957
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5723]:
datamarket_update('frescos|frutas|uvas', clasificar_fruta)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 3888 to 3888
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

### Subcategoría "verduras_y_hortalizas"

In [5724]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'verduras_y_hortalizas']

Unnamed: 0,category,subcategory,subsubcategory
29,congelados,verduras_y_hortalizas,
34,frescos,verduras_y_hortalizas,tomates_pimientos_y_pepinos
51,frescos,verduras_y_hortalizas,verduras_y_ensaladas_preparadas
147,productos_frescos,verduras_y_hortalizas,hortalizas
269,productos_frescos,verduras_y_hortalizas,hierbas_aromaticas_y_especias
751,frescos,verduras_y_hortalizas,otras_verduras
832,frescos,verduras_y_hortalizas,ajos_cebollas_y_puerros
935,frescos,verduras_y_hortalizas,patatas_y_zanahorias
937,productos_frescos,verduras_y_hortalizas,ensaladas_y_verduras_preparadas
952,productos_frescos,verduras_y_hortalizas,setas_y_hongos


In [5725]:
def clasificar_verdura(row):

    name = row['name'].lower()
    subcat = row['brand_category']


    if subcat == 'frescos|verduras_y_hortalizas|setas_y_champinones':
        return ('Fruta y verdura', 'Verdura', 'Setas y champiñones')

    if subcat == 'frescos|verduras_y_hortalizas|ajos_cebollas_y_puerros':
        return ('Fruta y verdura', 'Verdura', 'Cebolla y ajo')

    if subcat == 'frescos|verduras_y_hortalizas|lechugas_escarolas_y_endivias':
        return ('Fruta y verdura', 'Lechuga y ensalada preparada', 'Lechuga')

    if subcat == 'frescos|verduras_y_hortalizas|verduras_y_ensaladas_preparadas':
        return ('Fruta y verdura', 'Lechuga y ensalada preparada', 'Ensalada preparada')

    if subcat == 'frescos|verduras_y_hortalizas|patatas_y_zanahorias':
        if 'patata' in name or 'patatas' in name:
            return ('Fruta y verdura', 'Verdura', 'Patata')
        else:
            return ('Fruta y verdura', 'Verdura', 'Pepino y zanahoria')

    if subcat == 'frescos|verduras_y_hortalizas|tomates_pimientos_y_pepinos':
        if 'tomate' in name or 'tomates' in name:
            return ('Fruta y verdura', 'Verdura', 'Tomate')
        elif 'pepino' in name or 'pepinos' in name:
            return ('Fruta y verdura', 'Verdura', 'Pepino y zanahoria')
        else:
            return ('Fruta y verdura', 'Verdura', 'Calabacín y pimiento')

    if subcat == 'frescos|verduras_y_hortalizas|calabacin_calabaza_y_berenjena':
        if 'calabacín' in name or 'calabacines' in name:
            return ('Fruta y verdura', 'Verdura', 'Calabacín y pimiento')
        else:
            return ('Fruta y verdura', 'Verdura', 'Otras verduras y hortalizas')

    if subcat == 'frescos|verduras_y_hortalizas|brocolis_y_coliflores':
        if any(x in name for x in ['brócoli', 'coliflores', 'col', 'coliflor', 'repollo']):
            return ('Fruta y verdura', 'Verdura', 'Repollo y col')
        else:
            return ('Fruta y verdura', 'Verdura', 'Otras verduras y hortalizas')

    if subcat == 'frescos|verduras_y_hortalizas|otras_verduras':
        if any(x in name for x in ['perejil', 'jengibre', 'cilantro', 'albahaca', 'hierbabuena', 'cebollino']):
            return ('Fruta y verdura', 'Verdura', 'Hierbas aromáticas')
        elif 'al vapor' in name:
            return ('Fruta y verdura', 'Verdura', 'Verduras al vapor')
        else:
            return ('Fruta y verdura', 'Verdura', 'Otras verduras y hortalizas')

    return (pd.NA, pd.NA, pd.NA)

In [5726]:
datamarket_update('frescos|verduras_y_hortalizas|otras_verduras', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 751 to 4885
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5727]:
datamarket_update('frescos|verduras_y_hortalizas|setas_y_champinones', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4098 to 4098
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

In [5728]:
datamarket_update('frescos|verduras_y_hortalizas|ajos_cebollas_y_puerros', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 832 to 1914
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5729]:
datamarket_update('frescos|verduras_y_hortalizas|brocolis_y_coliflores', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3654 to 4472
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5730]:
datamarket_update('frescos|verduras_y_hortalizas|calabacin_calabaza_y_berenjena', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3188 to 4470
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5731]:
datamarket_update('frescos|verduras_y_hortalizas|tomates_pimientos_y_pepinos', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 34 to 4825
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 14

In [5732]:
datamarket_update('frescos|verduras_y_hortalizas|patatas_y_zanahorias', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, 935 to 3966
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         5 non-null      int64  
 1   supermarket                5 non-null      object 
 2   brand_category             5 non-null      object 
 3   name                       5 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      5 non-null      float64
 8   reference_price            5 non-null      float64
 9   reference_unit             5 non-null      object 
 10  insert_date                5 non-null      object 
 11  price_corrected            5 non-null      bool   
 12  reference_price_corrected  5 non-null      bool   
 13  category_name              5 non-null      object 
 14

In [5733]:
datamarket_update('frescos|verduras_y_hortalizas|verduras_y_ensaladas_preparadas', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 51 to 4706
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  6 non-null      object 
 6   trademark_propietary_flag  6 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14 

In [5734]:
datamarket_update('frescos|verduras_y_hortalizas|lechugas_escarolas_y_endivias', clasificar_verdura)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 1720 to 3913
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  0 non-null      object 
 6   trademark_propietary_flag  0 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

### Subcategoría "Carne"

In [5735]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'carne']

Unnamed: 0,category,subcategory,subsubcategory
388,frescos,carne,pavo
455,frescos,carne,pollo
471,frescos,carne,cerdo
621,frescos,carne,vacuno
1151,platos_preparados,carne,
4120,frescos,carne,mixto


In [5736]:
def clasificar_category_carne(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'frescos|carne|pollo':
        return ('Carne', 'Aves y pollo', 'Pollo')

    if subcat == 'frescos|carne|pavo':
        return ('Carne', 'Aves y pollo', 'Pavo y otras aves')

    if subcat == 'frescos|carne|vacuno':
        return ('Carne', 'Vacuno', 'Vacuno')

    if subcat == 'frescos|carne|cerdo':
        return ('Carne', 'Cerdo', 'Cerdo')

    if subcat == 'frescos|carne|mixto':
        return ('Carne', 'Hamburguesas y picadas', 'Picadas y otros')

    return (pd.NA, pd.NA, pd.NA)

In [5737]:
datamarket_update('frescos|carne|pollo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 455 to 4954
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         10 non-null     int64  
 1   supermarket                10 non-null     object 
 2   brand_category             10 non-null     object 
 3   name                       10 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      10 non-null     float64
 8   reference_price            10 non-null     float64
 9   reference_unit             10 non-null     object 
 10  insert_date                10 non-null     object 
 11  price_corrected            10 non-null     bool   
 12  reference_price_corrected  10 non-null     bool   
 13  category_name              10 non-null     object 
 1

In [5738]:
datamarket_update('frescos|carne|pavo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 388 to 4324
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

In [5739]:
datamarket_update('frescos|carne|vacuno', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, 621 to 3328
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         7 non-null      int64  
 1   supermarket                7 non-null      object 
 2   brand_category             7 non-null      object 
 3   name                       7 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  7 non-null      object 
 6   trademark_propietary_flag  7 non-null      object 
 7   price                      7 non-null      float64
 8   reference_price            7 non-null      float64
 9   reference_unit             7 non-null      object 
 10  insert_date                7 non-null      object 
 11  price_corrected            7 non-null      bool   
 12  reference_price_corrected  7 non-null      bool   
 13  category_name              7 non-null      object 
 14

In [5740]:
datamarket_update('frescos|carne|cerdo', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 471 to 4906
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         12 non-null     int64  
 1   supermarket                12 non-null     object 
 2   brand_category             12 non-null     object 
 3   name                       12 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  9 non-null      object 
 6   trademark_propietary_flag  9 non-null      object 
 7   price                      12 non-null     float64
 8   reference_price            12 non-null     float64
 9   reference_unit             12 non-null     object 
 10  insert_date                12 non-null     object 
 11  price_corrected            12 non-null     bool   
 12  reference_price_corrected  12 non-null     bool   
 13  category_name              12 non-null     object 
 1

In [5741]:
datamarket_update('frescos|carne|mixto', clasificar_category_carne)

<class 'pandas.core.frame.DataFrame'>
Index: 1 entries, 4120 to 4120
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         1 non-null      int64  
 1   supermarket                1 non-null      object 
 2   brand_category             1 non-null      object 
 3   name                       1 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  1 non-null      object 
 6   trademark_propietary_flag  1 non-null      object 
 7   price                      1 non-null      float64
 8   reference_price            1 non-null      float64
 9   reference_unit             1 non-null      object 
 10  insert_date                1 non-null      object 
 11  price_corrected            1 non-null      bool   
 12  reference_price_corrected  1 non-null      bool   
 13  category_name              1 non-null      object 
 1

### Subcategoria "pescado_y_marisco"

In [5742]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'pescado_y_marisco']

Unnamed: 0,category,subcategory,subsubcategory
41,congelados,pescado_y_marisco,
432,frescos,pescado_y_marisco,pescado_y_marisco_fresco
656,frescos,pescado_y_marisco,gulas_y_surimi
1185,frescos,pescado_y_marisco,ahumados_y_salazones


In [5743]:
df_mercadona[df_mercadona['name'].str.contains('surimi', case=False, na=False)].head(50)

Unnamed: 0,name,description,price,reference_price,reference_unit,subcategory_2_nivel_name,category_name,subcategory_name,price_corrected,category_id
1892,Palitos de surimi Hacendado ultracongelados,Paquete 0.6 kg,2.5,4.167,kg,Marisco de concha y otros,Congelados,Marisco,False,178
1956,Aritos de surimi a la romana Hacendado ultracongelados,Paquete 0.6 kg,2.95,4.917,kg,Pescado rebozado,Congelados,Rebozados,False,183
1963,Muslitos de surimi Hacendado ultracongelados,Paquete 0.45 kg,2.6,5.778,kg,Pescado rebozado,Congelados,Rebozados,False,183
3942,Palitos de surimi Hacendado,Paquete 0.46 kg,1.99,4.327,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3943,Palitos de surimi Hacendado ultracongelados,Paquete 0.6 kg,2.5,4.167,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3944,Muslitos de surimi Hacendado ultracongelados,Paquete 0.45 kg,2.6,5.778,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3946,Delicias del mar de surimi Hacendado,Bandeja 0.25 kg,1.99,7.96,kg,Surimi y otros,Marisco y pescado,Marisco,False,352
3972,Aritos de surimi a la romana Hacendado ultracongelados,Paquete 0.6 kg,2.95,4.917,kg,Pescado rebozado congelado,Marisco y pescado,Pescado congelado,False,354


In [5744]:
def clasificar_category_pescado(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'frescos|pescado_y_marisco|pescado_y_marisco_fresco':
        if 'congelado' in name and 'rebozado' in name:
            return ('Marisco y pescado', 'Pescado congelado', 'Pescado rebozado congelado')
        elif 'congelado' in name and any(x in name for x in ['pulpo', 'sepia', 'calamar', 'potón', 'pota']):
            return ('Marisco y pescado', 'Pescado congelado', 'Sepia, pulpo y calamar congelado')
        elif 'congelado' in name:
            return ('Marisco y pescado', 'Pescado congelado', 'Pescado congelado')
        elif 'salmón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Salmón')
        elif 'dorada' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Dorada')
        elif 'lubina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lubina')
        elif 'merluza' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Merluza')
        elif 'bacalao' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Bacalao')
        elif 'corvina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Corvina')
        elif 'trucha' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Trucha')
        elif 'lenguado' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Lenguado')
        elif 'boquerón' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Boquerón')
        elif 'rodaballo' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Rodaballo')
        elif 'sardina' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Sardina')
        elif 'caballa' in name:
            return ('Marisco y pescado', 'Pescado fresco', 'Caballa')
        elif any(x in name for x in ['pulpo', 'sepia', 'calamar', 'potón', 'pota']):
            return ('Marisco y pescado', 'Pescado fresco', 'Sepia, pulpo y calamar')
        elif any(x in name for x in ['mejillón', 'almeja', 'berberechos', 'navajas', 'chirla', 'cañaílla']):
            return ('Marisco y pescado', 'Marisco', 'Marisco de concha')
        elif 'langostino' in name or 'gamba' in name:
            return ('Marisco y pescado', 'Marisco', 'Marisco')
        else:
            return ('Marisco y pescado', 'Pescado fresco', 'Otros')

    if subcat == 'frescos|pescado_y_marisco|ahumados_y_salazones':
        if any(x in name for x in ['ahumada', 'ahumado']):
            return ('Marisco y pescado', 'Salazones y ahumados', 'Ahumados')
        else:
            return ('Marisco y pescado', 'Salazones y ahumados', 'Salazones')

    if subcat == 'frescos|pescado_y_marisco|gulas_y_surimi':
        return ('Marisco y pescado', 'Marisco', 'Surimi y otros')

    return (pd.NA, pd.NA, pd.NA)

In [5745]:
datamarket_update('frescos|pescado_y_marisco|gulas_y_surimi', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 656 to 2213
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 14

In [5746]:
datamarket_update('frescos|pescado_y_marisco|ahumados_y_salazones', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1185 to 3595
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         8 non-null      int64  
 1   supermarket                8 non-null      object 
 2   brand_category             8 non-null      object 
 3   name                       8 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  8 non-null      object 
 6   trademark_propietary_flag  8 non-null      object 
 7   price                      8 non-null      float64
 8   reference_price            8 non-null      float64
 9   reference_unit             8 non-null      object 
 10  insert_date                8 non-null      object 
 11  price_corrected            8 non-null      bool   
 12  reference_price_corrected  8 non-null      bool   
 13  category_name              8 non-null      object 
 1

In [5747]:
datamarket_update('frescos|pescado_y_marisco|pescado_y_marisco_fresco', clasificar_category_pescado)

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 432 to 4507
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4 non-null      int64  
 1   supermarket                4 non-null      object 
 2   brand_category             4 non-null      object 
 3   name                       4 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  4 non-null      object 
 6   trademark_propietary_flag  4 non-null      object 
 7   price                      4 non-null      float64
 8   reference_price            4 non-null      float64
 9   reference_unit             4 non-null      object 
 10  insert_date                4 non-null      object 
 11  price_corrected            4 non-null      bool   
 12  reference_price_corrected  4 non-null      bool   
 13  category_name              4 non-null      object 
 14

### Subcategoría "charcuteria_y_quesos"

In [5748]:
df_categorias_datamarket[df_categorias_datamarket['subcategory'] == 'charcuteria_y_quesos']

Unnamed: 0,category,subcategory,subsubcategory
76,frescos,charcuteria_y_quesos,curados
80,frescos,charcuteria_y_quesos,cocidos
406,frescos,charcuteria_y_quesos,quesos
3313,frescos,charcuteria_y_quesos,foie_pates_y_sobrasadas


In [5749]:
df_datamarket[df_datamarket['brand_category'] == 'frescos|charcuteria_y_quesos|cocidos']

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name
80,25860567,dia.es,frescos|charcuteria_y_quesos|cocidos,PAVOFRÍO fiambre de pechuga de pavo en lonchas sobre 70 gr,,pavofrío,False,1.0,14.29,kg,2023-03-15,False,False,,,
93,25860584,dia.es,frescos|charcuteria_y_quesos|cocidos,ELPOZO pechuga de pavo en finas lonchas envase 115 gr,,elpozo,False,1.5,13.04,kg,2023-03-15,False,False,,,
145,25860523,dia.es,frescos|charcuteria_y_quesos|cocidos,DIA NUESTRA ALACENA jamón cocido extra finas lonchas envase 2 x 225 gr,,dia,True,3.39,7.53,kg,2023-03-15,False,False,,,
273,25860541,dia.es,frescos|charcuteria_y_quesos|cocidos,DIA NUESTRA ALACENA jamón de pavo en finas lonchas envase 200 gr,,dia,True,1.79,8.95,kg,2023-03-15,False,False,,,
522,25860525,dia.es,frescos|charcuteria_y_quesos|cocidos,DIA NUESTRA ALACENA pechuga de pavo braseado finas lonchas envase 200 gr,,dia,True,2.45,12.25,kg,2023-03-15,False,False,,,
722,25860564,dia.es,frescos|charcuteria_y_quesos|cocidos,DIA NUESTRA ALACENA pechuga de pavo sandwich envase 250 gr,,dia,True,2.59,10.36,kg,2023-03-15,False,False,,,
1120,25860537,dia.es,frescos|charcuteria_y_quesos|cocidos,CAMPOFRIO salchichas frankfurt de pavo pack 3 x 140 gr,,campofrio,False,1.99,4.74,kg,2023-03-15,False,False,,,
1334,25860515,dia.es,frescos|charcuteria_y_quesos|cocidos,ELPOZO salchichas king de pavo envase 330 gr,,elpozo,False,2.29,6.94,kg,2023-03-15,False,False,,,
1371,25860574,dia.es,frescos|charcuteria_y_quesos|cocidos,PAVOFRÍO fiambre de pechuga de pavo braseada envase 70 gr,,pavofrío,False,1.0,14.29,kg,2023-03-15,False,False,,,
1612,25860562,dia.es,frescos|charcuteria_y_quesos|cocidos,ELPOZO bacon en lonchas envase 110 gr,,elpozo,False,1.5,13.64,kg,2023-03-15,False,False,,,


In [5750]:
def clasificar_category_charcuteria(row):

    name = row['name'].lower()
    subcat = row['brand_category']

    if subcat == 'frescos|charcuteria_y_quesos|quesos':
        if 'fresco' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso fresco')
        elif 'roquefort' in name or 'camembert' in name or 'cabra' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso roquefort, camembert y cabra')
        elif 'untable' in name or 'fundido' in name:
            return ('Charcutería y quesos', 'Queso untable y fresco', 'Queso untable')
        elif 'añejo' in name or 'curado' in name or 'viejo' in name or'grana padano' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso curado')
        elif 'semicurado' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso semicurado')
        elif 'tierno' in name:
            return ('Charcutería y quesos', 'Queso curado, semicurado y tierno', 'Queso tierno')
        elif 'lonchas' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso lonchas')
        elif 'rallado' in name:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso rallado')
        else:
            return ('Charcutería y quesos', 'Queso lonchas, rallado y en porciones', 'Queso en porciones')

    if subcat == 'frescos|charcuteria_y_quesos|curados':
        if 'jamón' in name or 'paleta' in name:
            return ('Charcutería y quesos', 'Jamón serrano', 'Jamón serrano')
        elif 'salchichón' in name or 'longaniza' in name or 'pepperoni' in name or'salami' in name:
            return ('Charcutería y quesos', 'Embutido curado', 'Salchichón')
        elif 'chorizo' in name:
            return ('Charcutería y quesos', 'Embutido curado', 'Chorizo')
        else:
            return ('Charcutería y quesos', 'Embutido curado', 'Lomo y otros')

    if subcat == 'frescos|charcuteria_y_quesos|cocidos':
        if 'jamón' in name:
            return ('Charcutería y quesos', 'Aves y jamón cocido', 'Jamón cocido')
        elif 'pavo' in name or 'pollo' in name:
            return ('Charcutería y quesos', 'Aves y jamón cocido', 'Pavo y otros')
        elif 'mortadela' in name or 'galantina' in name:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Mortadela')
        elif 'chopped' in name:
            return ('Charcutería y quesos', 'Chopped y mortadela', 'Chopped')
        elif 'bacón' in name or 'panceta' in name:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Bacón')
        else:
            return ('Charcutería y quesos', 'Bacón y salchichas', 'Salchichas')

    if subcat == 'frescos|charcuteria_y_quesos|foie_pates_y_sobrasadas':
        if 'sobrasada' in name:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Sobrasada')
        else:
            return ('Charcutería y quesos', 'Paté y sobrasada', 'Paté')

    return (pd.NA, pd.NA, pd.NA)

In [5751]:
datamarket_update('frescos|charcuteria_y_quesos|quesos', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 406 to 4810
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         38 non-null     int64  
 1   supermarket                38 non-null     object 
 2   brand_category             38 non-null     object 
 3   name                       38 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  38 non-null     object 
 6   trademark_propietary_flag  38 non-null     object 
 7   price                      38 non-null     float64
 8   reference_price            38 non-null     float64
 9   reference_unit             38 non-null     object 
 10  insert_date                38 non-null     object 
 11  price_corrected            38 non-null     bool   
 12  reference_price_corrected  38 non-null     bool   
 13  category_name              38 non-null     object 
 1

In [5752]:
datamarket_update('frescos|charcuteria_y_quesos|foie_pates_y_sobrasadas', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 3313 to 4734
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         2 non-null      int64  
 1   supermarket                2 non-null      object 
 2   brand_category             2 non-null      object 
 3   name                       2 non-null      object 
 4   description                0 non-null      object 
 5   trademark                  2 non-null      object 
 6   trademark_propietary_flag  2 non-null      object 
 7   price                      2 non-null      float64
 8   reference_price            2 non-null      float64
 9   reference_unit             2 non-null      object 
 10  insert_date                2 non-null      object 
 11  price_corrected            2 non-null      bool   
 12  reference_price_corrected  2 non-null      bool   
 13  category_name              2 non-null      object 
 1

In [5753]:
datamarket_update('frescos|charcuteria_y_quesos|cocidos', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 80 to 4622
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         26 non-null     int64  
 1   supermarket                26 non-null     object 
 2   brand_category             26 non-null     object 
 3   name                       26 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  26 non-null     object 
 6   trademark_propietary_flag  26 non-null     object 
 7   price                      26 non-null     float64
 8   reference_price            26 non-null     float64
 9   reference_unit             26 non-null     object 
 10  insert_date                26 non-null     object 
 11  price_corrected            26 non-null     bool   
 12  reference_price_corrected  26 non-null     bool   
 13  category_name              26 non-null     object 
 14

In [5754]:
datamarket_update('frescos|charcuteria_y_quesos|curados', clasificar_category_charcuteria)

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 76 to 4485
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         23 non-null     int64  
 1   supermarket                23 non-null     object 
 2   brand_category             23 non-null     object 
 3   name                       23 non-null     object 
 4   description                0 non-null      object 
 5   trademark                  23 non-null     object 
 6   trademark_propietary_flag  23 non-null     object 
 7   price                      23 non-null     float64
 8   reference_price            23 non-null     float64
 9   reference_unit             23 non-null     object 
 10  insert_date                23 non-null     object 
 11  price_corrected            23 non-null     bool   
 12  reference_price_corrected  23 non-null     bool   
 13  category_name              23 non-null     object 
 14

# Fusión de productos Datamarket con la jerarquía de categorías de Mercadona

Realizamos una union entre el DataFrame de productos de Datamarket(df_datamarket) y el diccionario maestro de categorías de Mercadona (df_category), utilizando como claves las columnas category_name, subcategory_name y subcategory_2_nivel_name. Al utilizar how='left', garantizamos que todos los productos de Datamarket se mantengan en el DataFrame final, y solo se añadan las columnas coincidentes desde el diccionario de Mercadona.


In [5755]:
df_datamarket = df_datamarket.merge(df_category, on=['category_name', 'subcategory_name', 'subcategory_2_nivel_name'], how='left')
df_datamarket.head()

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name,category_id
0,25869112,mercadona.es,huevos_leche_y_mantequilla|mantequilla_y_margarina,Margarina Flora Original,Tarrina,otras marcas,False,3.45,8.625,kg,2023-03-15,False,False,"Huevos, leche y mantequilla",Mantequilla y margarina,Margarina,288.0
1,25855500,carrefour.es,productos_frescos|platos_preparados|fritos,Nuggets de pechuga de pollo 250 g,,otras marcas,False,3.1,12.4,kg,2023-03-15,False,False,Pizzas y platos preparados,Platos preparados fríos,Sándwich,421.0
2,25867724,mercadona.es,conservas_caldos_y_cremas|berberechos_y_mejillones,Mejillones picantes en escabeche Hacendado pequeños,Lata,hacendado,True,1.65,23.914,kg,2023-03-15,False,False,"Conservas, caldos y cremas",Berberechos y mejillones,Mejillones,197.0
3,25862155,dia.es,despensa|conservas|conservas_vegetales,CARRETILLA espárragos blancos extra 8/12 lata 150 gr,,carretilla,False,3.05,20.33,kg,2023-03-15,False,False,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas verdura,198.0
4,25861733,dia.es,despensa|desayunos_y_dulces|caramelos_chicles_y_golosinas,DIA ICEBERG chicle sabor sandía sin azúcar bolsa 45 gr,,dia,True,0.99,22.1,kg,2023-03-15,False,False,"Azúcar, caramelos y chocolate",Golosinas,Golosinas,62.0



Como resultado, ahora cada producto dispone tanto de su clasificación original (category, subcategory) como de la jerarquía estandarizada (category_name, subcategory_name, subcategory_2_nivel_name), lo cual permite trabajar con una estructura común en los análisis comparativos.

In [5756]:
df_datamarket[['category_name', 'subcategory_name', 'subcategory_2_nivel_name']].drop_duplicates().shape[0]

411

In [5757]:
# Filtramos las filas donde category_name está vacío (NaN)
brand_category_sin_category_name = df_datamarket[df_datamarket['category_name'].isna()]['brand_category'].unique()

# Mostramos el resultado: cantidad de brand_category sin category_name
print(f"Cantidad de brand_category sin category_name: {len(brand_category_sin_category_name)}")
brand_category_sin_category_name

Cantidad de brand_category sin category_name: 14


array(['limpieza_y_hogar|papeleria|cuadernos_y_carpetas',
       'limpieza_y_hogar|papeleria|boligrafos_y_correctores',
       'limpieza_y_hogar|papeleria|pequeno_accesorio',
       'limpieza_y_hogar|bazar|barbacoas_y_accesorios',
       'limpieza_y_hogar|bazar|bombillas_y_tubos',
       'limpieza_y_hogar|papeleria|marcadores',
       'limpieza_y_hogar|papeleria|lapices_y_accesorios',
       'limpieza_y_hogar|menaje|cafeteras_y_accesorios',
       'limpieza_y_hogar|bazar|herramientas',
       'limpieza_y_hogar|papeleria|archivadores',
       'limpieza_y_hogar|bazar|pegamentos_y_siliconas',
       'limpieza_y_hogar|bazar|jardineria',
       'limpieza_y_hogar|papeleria|colorear',
       'limpieza_y_hogar|bazar|automovil'], dtype=object)

Como resultado del análisis, hemos identificado 14 categorías del datamarket (brand_category) para las cuales no se encontró una correspondencia en la clasificación de Mercadona. Esto se debe a que estos productos no están presentes en Mercadona y no forman parte del segmento de productos de supermercado. Por lo tanto, para el análisis posterior de los datos, estas filas serán excluidas del dataset de trabajo, ya que no aportan valor analítico al proyecto.

In [5763]:
df_datamarket = df_datamarket[df_datamarket['category_name'].notna()]

In [5759]:
df_datamarket.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4968 entries, 0 to 4999
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   id                         4968 non-null   int64  
 1   supermarket                4968 non-null   object 
 2   brand_category             4968 non-null   object 
 3   name                       4967 non-null   object 
 4   description                1832 non-null   object 
 5   trademark                  4889 non-null   object 
 6   trademark_propietary_flag  4889 non-null   object 
 7   price                      4968 non-null   float64
 8   reference_price            4968 non-null   float64
 9   reference_unit             4968 non-null   object 
 10  insert_date                4968 non-null   object 
 11  price_corrected            4968 non-null   bool   
 12  reference_price_corrected  4968 non-null   bool   
 13  category_name              4968 non-null   object 
 1

In [5767]:
df_datamarket['category_id'] = df_datamarket['category_id'].astype(int)

In [5770]:
df_datamarket.to_csv('df_datamarket_merge.csv', index=False, encoding='utf-8-sig')

In [5771]:
df_datamarket.head()

Unnamed: 0,id,supermarket,brand_category,name,description,trademark,trademark_propietary_flag,price,reference_price,reference_unit,insert_date,price_corrected,reference_price_corrected,category_name,subcategory_name,subcategory_2_nivel_name,category_id
0,25869112,mercadona.es,huevos_leche_y_mantequilla|mantequilla_y_margarina,Margarina Flora Original,Tarrina,otras marcas,False,3.45,8.625,kg,2023-03-15,False,False,"Huevos, leche y mantequilla",Mantequilla y margarina,Margarina,288
1,25855500,carrefour.es,productos_frescos|platos_preparados|fritos,Nuggets de pechuga de pollo 250 g,,otras marcas,False,3.1,12.4,kg,2023-03-15,False,False,Pizzas y platos preparados,Platos preparados fríos,Sándwich,421
2,25867724,mercadona.es,conservas_caldos_y_cremas|berberechos_y_mejillones,Mejillones picantes en escabeche Hacendado pequeños,Lata,hacendado,True,1.65,23.914,kg,2023-03-15,False,False,"Conservas, caldos y cremas",Berberechos y mejillones,Mejillones,197
3,25862155,dia.es,despensa|conservas|conservas_vegetales,CARRETILLA espárragos blancos extra 8/12 lata 150 gr,,carretilla,False,3.05,20.33,kg,2023-03-15,False,False,"Conservas, caldos y cremas",Conservas de verdura y frutas,Conservas verdura,198
4,25861733,dia.es,despensa|desayunos_y_dulces|caramelos_chicles_y_golosinas,DIA ICEBERG chicle sabor sandía sin azúcar bolsa 45 gr,,dia,True,0.99,22.1,kg,2023-03-15,False,False,"Azúcar, caramelos y chocolate",Golosinas,Golosinas,62
