In [1]:
import pandas as pd

In [2]:
cosmetics = pd.read_csv('cosmetics.csv')

In [4]:
cosmetics.head(2)

Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",1,1,1,1,1
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1


In [5]:
# Eliminamos la columna 'Ingredients'
cosmetics = cosmetics.drop(['Ingredients'], axis = 1)

In [7]:
cosmetics.head(1)

Unnamed: 0,Label,Brand,Name,Price,Rank,Combination,Dry,Normal,Oily,Sensitive
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,1,1,1,1,1


### Creo una copia de Columnas Name y Brand

In [8]:
name_brand_original = cosmetics[['Name', 'Brand']].copy()

In [9]:
name_brand_original.head(2)

Unnamed: 0,Name,Brand
0,Crème de la Mer,LA MER
1,Facial Treatment Essence,SK-II


### Separamos columna 'Name' en rangos

In [10]:
def assign_group_by_index(index):
    # Definimos los rangos de índices
    index_ranges = {
        'Group_1': range(0, 100),
        'Group_2': range(100, 200),
        'Group_3': range(200, 300),
        'Group_4': range(300, 400),
        'Group_5': range(400, 500),
        'Group_6': range(500, 600),
        'Group_7': range(600, 700),
        'Group_8': range(700, 800),
        'Group_9': range(800, 900),
        'Group_10': range(900, 1000),
        'Group_11': range(1000, 1100),
        'Group_12': range(1200, 1300),
        'Group_13': range(1300, 1400),
        'Group_14': range(1400, 1500)
    }
      # Obtenemos el grupo para el índice
    return next((group for group, indices in index_ranges.items() if index in indices), 'Unknown')

In [11]:
# Aplicamos la función a la columna 'Name'
cosmetics['Name_Group'] = cosmetics.index.to_series().apply(assign_group_by_index)

In [12]:
# Aplicamos la función a la columna 'Brand'
cosmetics['Brand_Group'] = cosmetics.index.to_series().apply(assign_group_by_index)

In [13]:
# Creamos las nuevas columnas
subset_columns = ['Name', 'Brand', 'Name_Group', 'Brand_Group']
print(cosmetics[subset_columns])

                                                   Name  \
0                                       Crème de la Mer   
1                              Facial Treatment Essence   
2                            Protini™ Polypeptide Cream   
3                           The Moisturizing Soft Cream   
4         Your Skin But Better™ CC+™ Cream with SPF 50+   
...                                                 ...   
1467  Yoghurt Nourishing Fluid Veil Face Sunscreen B...   
1468  Daily Deflector™ Waterlight Broad Spectrum SPF...   
1469                            Self Tan Dry Oil SPF 50   
1470                   Pro Light Self Tan Bronzing Mist   
1471  DERMAPROTECT Daily Defense Broad Spectrum SPF 50+   

                              Brand Name_Group Brand_Group  
0                            LA MER    Group_1     Group_1  
1                             SK-II    Group_1     Group_1  
2                    DRUNK ELEPHANT    Group_1     Group_1  
3                            LA MER    Group_1 

In [14]:
# Creamos un diccionario de mapeo para los grupos
group_mapping = {
    'Unknown': 0,
    'Group_1': 1,
    'Group_2': 2,
    'Group_3': 3,
    'Group_4': 4,
    'Group_5': 5,
    'Group_6': 6,
    'Group_7': 7,
    'Group_8': 8,
    'Group_9': 9,
    'Group_10': 10,
    'Group_11': 11,
    'Group_12': 12,
    'Group_13': 13,
    'Group_14': 14,

}

In [15]:
# Aplicamos el mapeo a las columnas 'Name_Group' y 'Brand_Group'
cosmetics['Name_Group_Num'] = cosmetics['Name_Group'].map(group_mapping)
cosmetics['Brand_Group_Num'] = cosmetics['Brand_Group'].map(group_mapping)

In [16]:
# Eliminamos las columnas originales 'Name' y 'Brand'
cosmetics = cosmetics.drop(['Name', 'Brand'], axis=1)

In [17]:
# Eliminamos las columnas creadas antes del mapeo 'Name_Group' y 'Brand_Group'
cosmetics = cosmetics.drop(['Name_Group', 'Brand_Group'], axis=1)

In [18]:
cosmetics.head(5)

Unnamed: 0,Label,Price,Rank,Combination,Dry,Normal,Oily,Sensitive,Name_Group_Num,Brand_Group_Num
0,Moisturizer,175,4.1,1,1,1,1,1,1,1
1,Moisturizer,179,4.1,1,1,1,1,1,1,1
2,Moisturizer,68,4.4,1,1,1,1,0,1,1
3,Moisturizer,175,3.8,1,1,1,1,1,1,1
4,Moisturizer,38,4.1,1,1,1,1,1,1,1


In [19]:
original_cosmetics = pd.concat([cosmetics, name_brand_original], axis=1)

In [20]:
original_cosmetics.head(2)

Unnamed: 0,Label,Price,Rank,Combination,Dry,Normal,Oily,Sensitive,Name_Group_Num,Brand_Group_Num,Name,Brand
0,Moisturizer,175,4.1,1,1,1,1,1,1,1,Crème de la Mer,LA MER
1,Moisturizer,179,4.1,1,1,1,1,1,1,1,Facial Treatment Essence,SK-II


### Creamos etiquetas numéricas para 'Price'

In [21]:
# Definimos los límites de los rangos y las etiquetas para cada rango
bins = [0, 50, 100, 150, 200, 250, 300, 350, 400]
labels = [1, 2, 3, 4, 5, 6, 7, 8]

In [22]:
# Creamos una nueva columna 'Price_Category' con las etiquetas de los rangos
original_cosmetics['Price_Category'] = pd.cut(original_cosmetics['Price'], bins=bins, labels=labels, include_lowest=True)

In [23]:
original_cosmetics.head(1)

Unnamed: 0,Label,Price,Rank,Combination,Dry,Normal,Oily,Sensitive,Name_Group_Num,Brand_Group_Num,Name,Brand,Price_Category
0,Moisturizer,175,4.1,1,1,1,1,1,1,1,Crème de la Mer,LA MER,4


In [24]:
# Eliminamos la columna original 'Price'
original_cosmetics = original_cosmetics.drop(['Price'], axis=1)

### Creamos etiquetas numéricas para Rank

In [25]:
# Convertimos los valores de 'Rank' a cadena antes crear etiquetas numéricas
original_cosmetics['Rank'] = original_cosmetics['Rank'].astype(str)

In [26]:
# Definimos los intervalos para el binning
bins = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]

In [27]:
# Definimos los labels para cada intervalo
labels = [1, 2, 3, 4, 5]

In [28]:
# Aplicamos el binning y asignamos el resultado a una nueva columna 'Rank_Num'
original_cosmetics['Rank_Num'] = pd.cut(original_cosmetics['Rank'].astype(float), bins=bins, labels=labels, include_lowest=True)

In [29]:
# Eliminamos la columna original 'Rank'
original_cosmetics = original_cosmetics.drop(['Rank'], axis=1)

In [30]:
original_cosmetics.head(2)

Unnamed: 0,Label,Combination,Dry,Normal,Oily,Sensitive,Name_Group_Num,Brand_Group_Num,Name,Brand,Price_Category,Rank_Num
0,Moisturizer,1,1,1,1,1,1,1,Crème de la Mer,LA MER,4,5
1,Moisturizer,1,1,1,1,1,1,1,Facial Treatment Essence,SK-II,4,5


In [31]:
original_cosmetics.to_csv('cosmetics_final.csv', index = False)