In [1]:
# import all required library
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
# define a function for scraping product link
def scrape_product_links(url, max_links):
    product_links = []
    page = 1
    while len(product_links) < max_links:
        # Send a GET request to the URL
        response = requests.get(url.format(page))
        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all product links with class 'product-item-link'
        all_links = soup.find_all('a', class_='product-item-link')
        # Extract product links
        for link in all_links:
            product_links.append(link['href'])
            if len(product_links) == max_links:
                break
        # Check if there are more pages
        next_page_link = soup.find('a', class_='next')
        if next_page_link:
            page += 1
            url = next_page_link['href']
        else:
            break
    return product_links

test_url = "https://www.aarong.com/wedding/men?p={}"
# test the fuction for checking it works well
scrape_product_links(test_url, 5)


['https://www.aarong.com/catalog/product/view/id/1858789/s/brown-brush-painted-and-printed-cotton-coaty-03f231104002/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1886865/s/multicolour-textured-cotton-coaty-03e231104027/category/64/',
 'https://www.aarong.com/catalog/product/view/id/2517635/s/teal-blue-embroidered-endi-silk-panjabi-15e240220163/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1848118/s/purple-embroidered-endi-silk-panjabi-15e230232032/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1857676/s/persian-plum-cotton-slim-fit-panjabi-pajama-set-15e230360317/category/64/']

In [18]:
# use those url for scraping 
men_url = "https://www.aarong.com/wedding/men?p={}"
newmen_url = "https://www.aarong.com/men/men-new-arrivals?p={}"
kurta_url = "https://www.aarong.com/men/short-kurta?p={}"
kids_url = "https://www.aarong.com/kids/kids-new-arrivals?p={}"
weddingwomen_url = "https://www.aarong.com/wedding/women?p={}"
eidwomen_url = 'https://www.aarong.com/eid-24/women?p={}'

# url templates for pagination
url_list = [men_url, newmen_url, kurta_url, kids_url, weddingwomen_url, eidwomen_url]

# scrape at least 5 product links for each URL
all_links = []
for url in url_list:
    links = scrape_product_links(url, 75)
    all_links.extend(links)

# print the product links for test perpose
all_links[:5]

['https://www.aarong.com/catalog/product/view/id/1858789/s/brown-brush-painted-and-printed-cotton-coaty-03f231104002/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1886865/s/multicolour-textured-cotton-coaty-03e231104027/category/64/',
 'https://www.aarong.com/catalog/product/view/id/2517635/s/teal-blue-embroidered-endi-silk-panjabi-15e240220163/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1848118/s/purple-embroidered-endi-silk-panjabi-15e230232032/category/64/',
 'https://www.aarong.com/catalog/product/view/id/1857676/s/persian-plum-cotton-slim-fit-panjabi-pajama-set-15e230360317/category/64/']

In [19]:
# create a object for storing the data
productname = []
productprice = []
productspecification = []
productdescription = []

for link in all_links:
    try:
        # creating a path for scraping the specific information
        response = requests.get(link)
        soup = BeautifulSoup(response.content, 'html.parser')
        main = soup.find('div', class_='page-wrapper')
        column = main.find('div', class_='column')
        pro_info = column.find('div' , class_ = 'product-info-main')
        pro_des  = pro_info.find('div', class_ = 'product info detailed')


        # Find product name
        pro_name = soup.find('h1', class_='page-title')
        if pro_name:
            productname.append(pro_name.get_text(strip=True))
        else:
            productname.append(None)

        # Find product price
        pro_price = soup.find('div', class_='price-box price-final_price')
        if pro_price:
            productprice.append(pro_price.get_text(strip=True))
        else:
            productprice.append(None)

        # Find product description
        pro_description = soup.find('div', class_='product attribute description').find('div', class_='value')
        if pro_description:
            productdescription.append(pro_description.get_text(strip=True))
        else:
            productdescription.append(None)

        # Find product specification
        pro_specification = soup.find('div', class_='additional-attributes-wrapper table-wrapper').find_all('td', class_='col data')
        specification = [spe.get_text(strip=True) for spe in pro_specification]
        productspecification.append(specification)

    except Exception as e:
        print(f'Error scraping {link}: {e}')

# create a dataframe as df
df = pd.DataFrame({'name': productname, 'price': productprice, 'specification': productspecification, 'description': productdescription})
df.head()


Unnamed: 0,name,price,specification,description
0,Brown Brush Painted and Printed Cotton Coaty,"As low asTk 1,818.60","[Cotton, Screen Print, Band Collar, Front Pock...",Brown brush painted cotton coaty with black pr...
1,Multicolour Textured Cotton Coaty,"As low asTk 1,334.88","[Cotton, Band Collar, Sleeveless, Hand Wash Wi...","Dusty pink, maroon and black textured cotton c..."
2,Teal Blue Embroidered Endi Silk Panjabi,"As low asTk 10,195.35","[Endi Silk, Exclusive Hand Embroidery, Band Co...",Teal blue endi silk panjabi with plum embroide...
3,Purple Embroidered Endi Silk Panjabi,"As low asTk 7,967.44","[Endi Silk, Hand Embroidery, Band Collar, Side...",Purple endi silk panjabi with blue embroidery....
4,Persian Plum Cotton Slim Fit Panjabi Pajama Set,"As low asTk 4,613.95","[Brown, Cotton, Cotton, Hand Embroidery, Chino...",Persian plum textured cotton slim fit panjabi ...


In [22]:
df.to_csv('outfits.csv',index=False)

In [23]:
newdf = pd.read_csv('outfits.csv')
newdf.head()

Unnamed: 0,name,price,specification,description
0,Brown Brush Painted and Printed Cotton Coaty,"As low asTk 1,818.60","['Cotton', 'Screen Print', 'Band Collar', 'Fro...",Brown brush painted cotton coaty with black pr...
1,Multicolour Textured Cotton Coaty,"As low asTk 1,334.88","['Cotton', 'Band Collar', 'Sleeveless', 'Hand ...","Dusty pink, maroon and black textured cotton c..."
2,Teal Blue Embroidered Endi Silk Panjabi,"As low asTk 10,195.35","['Endi Silk', 'Exclusive Hand Embroidery', 'Ba...",Teal blue endi silk panjabi with plum embroide...
3,Purple Embroidered Endi Silk Panjabi,"As low asTk 7,967.44","['Endi Silk', 'Hand Embroidery', 'Band Collar'...",Purple endi silk panjabi with blue embroidery....
4,Persian Plum Cotton Slim Fit Panjabi Pajama Set,"As low asTk 4,613.95","['Brown', 'Cotton', 'Cotton', 'Hand Embroidery...",Persian plum textured cotton slim fit panjabi ...


In [25]:
len(newdf['name'].value_counts())

398