In [16]:
import pandas as pd
import re


# Load the CSV file into a DataFrame
df = pd.read_csv('Acqua_di_Parma.csv')


# Ensure full column content is displayed
pd.set_option('display.max_colwidth', None)


# 1) Extract normalized gender info (handles "for women and men", "for men and women", "for women", "for men")
gender_pattern = r'\b(for (?:women\s+and\s+men|men\s+and\s+women|women|men))\b'

df['gender'] = (
    df['name']
      .str.extract(gender_pattern, flags=re.IGNORECASE, expand=False)
      .str.lower()
      .str.replace(r'\s+', ' ', regex=True)
      .str.strip()
      .str.replace('for men and women', 'for women and men', regex=False)  # normalize variant
)

# 2) Remove gender info from the 'name' column (also trims common separators before it)
remove_pattern = r'[\s\-–—,:]*\bfor\s+(?:women\s+and\s+men|men\s+and\s+women|women|men)\b'
df['name'] = (
    df['name']
      .str.replace(remove_pattern, '', flags=re.IGNORECASE, regex=True)
      .str.replace(r'\s{2,}', ' ', regex=True)      # collapse double spaces
      .str.strip(' -–—,:')                          # strip leftover separators at ends
      .str.strip()
)


In [17]:
# Sort rows by values in the 'rating' column (ascending by default)
df_sorted = df.sort_values(by='rating', ascending=False)

# Display the first few rows of the sorted DataFrame
df_sorted


Unnamed: 0,brand,name,rating,votes,url,last_crawled,gender
17,Acqua di Parma,Colonia Leather Eau de Cologne Concentrée Acqua di Parma,4.42,1521,https://www.fragrantica.com/perfume/Acqua-di-Parma/Colonia-Leather-Eau-de-Cologne-Concentree-25349.html,2025-10-20T22:34:12.810069,
23,Acqua di Parma,Mirto Di Panarea Forte Special Edition Eau De Toilette Acqua di Parma,4.38,93,https://www.fragrantica.com/perfume/Acqua-di-Parma/Mirto-Di-Panarea-Forte-Special-Edition-Eau-De-Toilette-73942.html,2025-10-20T22:34:50.955832,for women and men
0,Acqua di Parma,Note di Colonia II Acqua di Parma,4.37,121,https://www.fragrantica.com/perfume/Acqua-di-Parma/Note-di-Colonia-II-39625.html,2025-10-20T22:32:28.152127,for women and men
8,Acqua di Parma,Note di Colonia IV Acqua di Parma,4.36,163,https://www.fragrantica.com/perfume/Acqua-di-Parma/Note-di-Colonia-IV-46376.html,2025-10-20T22:33:17.097317,for women and men
24,Acqua di Parma,Acqua di Parma Colonia Edizione Centenario Acqua di Parma,4.33,6,https://www.fragrantica.com/perfume/Acqua-di-Parma/Acqua-di-Parma-Colonia-Edizione-Centenario-42460.html,2025-10-20T22:34:56.895540,for women and men
3,Acqua di Parma,Acqua di Parma Profumo Acqua di Parma,4.29,276,https://www.fragrantica.com/perfume/Acqua-di-Parma/Acqua-di-Parma-Profumo-1683.html,2025-10-20T22:32:46.553673,
7,Acqua di Parma,Fico di Amalfi La Riserva Acqua di Parma,4.29,164,https://www.fragrantica.com/perfume/Acqua-di-Parma/Fico-di-Amalfi-La-Riserva-107110.html,2025-10-20T22:33:10.434834,for women and men
38,Acqua di Parma,Oud Eau de Parfum Acqua di Parma,4.28,1378,https://www.fragrantica.com/perfume/Acqua-di-Parma/Oud-Eau-de-Parfum-55998.html,2025-10-20T22:36:23.717983,for women and men
2,Acqua di Parma,Acqua di Parma Colonia Assoluta Acqua di Parma,4.23,1178,https://www.fragrantica.com/perfume/Acqua-di-Parma/Acqua-di-Parma-Colonia-Assoluta-1682.html,2025-10-20T22:32:40.311285,for women and men
27,Acqua di Parma,Arancia di Capri La Riserva Acqua di Parma,4.19,91,https://www.fragrantica.com/perfume/Acqua-di-Parma/Arancia-di-Capri-La-Riserva-107109.html,2025-10-20T22:35:14.532969,for women and men
