In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from deep_translator import GoogleTranslator

In [2]:
rq = requests.get('https://aziza.tn/fr/home')
rq.status_code

200

In [3]:
soup = BeautifulSoup(rq.content, 'html.parser')

In [18]:
data = [] 
products = soup.find_all('li', class_='product-item')
for product in products:
    # print(products)
    name_tag = product.find('a', class_='product-item-link')
    price_tag = product.find('span', class_='price')
    availability_tag = product.find('div', class_='product-item-actions')
    promo_tag = product.find('div', class_='decro')

    name = name_tag.text.strip() if name_tag else 'N/A'
    price = price_tag.text.strip().replace(',','') if price_tag else 0
    availability = availability_tag.text.strip() if availability_tag else 'N/A'
    promo = promo_tag.text.strip() if promo_tag else 'No Promo'

    data.append({'Product_Name': name,
            'Price': float(price),
           'Availability_Status': availability,
            'Promotion': promo})

# print(promo)
# for product in products:
#     name = product.find('h2', class_='h3 product-title')
#     price = product.find('span', class_='price')

#     product_name = name.text.strip() if name else 'N/A'
#     product_price = price.text.strip() if price else 'N/A'

#     print(f"Name: {product_name} | Price: {product_price}")

In [19]:
df = pd.DataFrame(data)

In [29]:
df.drop('Availability_Status',axis=1,inplace=True)

In [34]:
df.head(20)

Unnamed: 0,Product_Name,Price,Promotion,Category
0,Set of 2 sauce distributors,1100.0,No Promo,Kitchen & Cooking Tools
1,Builder rest,1500.0,No Promo,Other
2,Bucket with cover,1980.0,No Promo,Kitchen & Cooking Tools
3,Spaghetti,2000.0,No Promo,"Pasta, Rice & Dry Goods"
4,Basket 3 compartments,2000.0,No Promo,Other
5,Storage box,2190.0,No Promo,Kitchen & Cooking Tools
6,LED lamp,2980.0,No Promo,Electronics & Gadgets
7,Set 3 pots,2990.0,No Promo,Kitchen & Cooking Tools
8,Basmati rice,4890.0,No Promo,"Pasta, Rice & Dry Goods"
9,Set 3 conservation boxes,6980.0,No Promo,Kitchen & Cooking Tools


In [31]:
df.to_json('product.json',index=False)

In [21]:
df = df[df['Price'] != 0]

In [22]:
df.reset_index(drop=True,inplace=True)

In [23]:
df.isnull().sum()

Product_Name           0
Price                  0
Availability_Status    0
Promotion              0
dtype: int64

In [56]:
df['Promotion'].value_counts()

Promotion
No Promo    226
-15%          4
-29%          4
-23%          3
-24%          3
-6%           2
-33%          2
-20%          2
-12%          2
-21%          2
-11%          1
-10%          1
-17%          1
-26%          1
-28%          1
-31%          1
-30%          1
Name: count, dtype: int64

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 257 entries, 0 to 256
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Product_Name         257 non-null    object 
 1   Price                257 non-null    float64
 2   Availability_Status  257 non-null    object 
 3   Promotion            257 non-null    object 
dtypes: float64(1), object(3)
memory usage: 8.2+ KB


In [26]:
def translate_text(text):
    try:
        return GoogleTranslator(source='fr', target='en').translate(text)
    except Exception as e:
        return f"Error: {e}"

In [27]:
df['Product_Name'] = df['Product_Name'].apply(translate_text)

In [16]:
import plotly.express as px

In [32]:
def categorize_product(name):
    name = name.lower()
    if any(keyword in name for keyword in ["sauce", "bucket", "box", "tray", "pot", "knife", "spoon", "glass", "mug", "bottle", "pitcher", "storage", "oven", "pan", "colander", "creperie"]):
        return "Kitchen & Cooking Tools"
    elif any(keyword in name for keyword in ["dumbbell", "yoga", "fitness", "wrist", "ball", "bike", "helmet", "step", "gym", "bodybuilding", "resistance", "roulette"]):
        return "Fitness & Sports Equipment"
    elif any(keyword in name for keyword in ["smartwatch", "bluetooth", "kit", "speaker", "lamp", "keyboard", "mouse", "helmet", "drumfit"]):
        return "Electronics & Gadgets"
    elif any(keyword in name for keyword in ["shampoo", "gel", "deodorant", "cream", "omega", "ricotta"]):
        return "Personal Care"
    elif any(keyword in name for keyword in ["dishwashing", "laundry", "bleach", "powder", "soap", "deodorant", "wc", "cleaner"]):
        return "Household Supplies"
    elif "yogurt" in name or "cheese" in name:
        return "Dairy & Yogurt"
    elif any(keyword in name for keyword in ["cookie", "cake", "waf", "tart", "biscuit", "browniz"]):
        return "Snacks & Cookies"
    elif any(keyword in name for keyword in ["juice", "drink", "nectar", "cocktail", "water"]):
        return "Drinks"
    elif any(keyword in name for keyword in ["spaghetti", "rice", "basmati"]):
        return "Pasta, Rice & Dry Goods"
    elif any(keyword in name for keyword in ["salami", "tuna", "chicken", "salam"]):
        return "Meat, Tuna & Protein"
    elif any(keyword in name for keyword in ["mayonnaise", "harissa", "sauce", "cream"]):
        return "Condiments & Sauces"
    elif any(keyword in name for keyword in ["jam", "preparation", "butter", "almond", "coffee", "vermicelli"]):
        return "Breakfast & Jam"
    elif any(keyword in name for keyword in ["ice", "dessert", "chamia", "pacifier", "bar"]):
        return "Ice Cream & Desserts"
    elif any(keyword in name for keyword in ["children", "panties", "baby"]):
        return "Baby & Childcare"
    elif "coffee" in name or "mug" in name:
        return "Coffee & Tea"
    else:
        return "Other"



In [33]:
df["Category"] = df["Product_Name"].apply(categorize_product)

In [54]:
avg_price_category = df.groupby('Category')['Price'].mean().reset_index()
avg_price_category.columns = ['Category','Average Price']
avg_price_category

Unnamed: 0,Category,Average Price
0,Baby & Childcare,32970.0
1,Breakfast & Jam,11111.666667
2,Condiments & Sauces,5390.0
3,Dairy & Yogurt,2001.052632
4,Drinks,4124.583333
5,Electronics & Gadgets,53553.75
6,Fitness & Sports Equipment,67217.142857
7,Household Supplies,6370.0
8,Ice Cream & Desserts,1942.5
9,Kitchen & Cooking Tools,7710.952381


In [55]:
fig = px.bar(avg_price_category, x='Category', y='Average Price', title='Average Price per Product Category')
fig.show()
