## Alibaba Web Scraping Using Python - Part Two

#### Import required libraries













In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

#### Define request headers to mimic a browser

In [2]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
    "Accept-Encoding": "gzip, deflate",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "DNT": "1",
    "Connection": "close",
    "Upgrade-Insecure-Requests": "1"
}

##### Initialize lists to store scraped data





In [3]:
product_names = []
prices = []
min_orders = []

##### Define the number of pages to scrape

In [4]:
num_pages = 5  # Change this value to scrape more pages

#### Loop through multiple pages


In [5]:
for page_num in range(1, num_pages + 1):
    url = f'https://www.alibaba.com/showroom/t--shirts_{page_num}.html'
    print(f"Scraping page {page_num}...")  # Log progress
    
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()  # Raise HTTP errors (if any)
    except requests.RequestException as e:
        print(f"Error fetching page {page_num}: {e}")
        continue  # Skip to the next page if an error occurs

    # Parse the page content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract product names
    product_names.extend(
        [h2.get_text(strip=True) for h2 in soup.find_all('h2', style="display:inline")]
    )

    # Extract prices
    prices.extend(
        [price.get_text(strip=True) for price in soup.find_all('div', {'data-component': 'ProductPrice'})]
    )

    # Extract minimum order quantities
    min_orders.extend(
        [moq.get_text(strip=True).replace('Min. Order: ', '') for moq in soup.find_all('div', {'data-component': 'ProductMoq'})]
    )

print("Scraping completed!")

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping completed!


#### Create a DataFrame






In [6]:
df = pd.DataFrame({
    'Product Name': product_names,
    'Prices': prices,
    'Min Orders': min_orders
})

In [7]:
df.head()

Unnamed: 0,Product Name,Prices,Min Orders
0,Hot Drill Rhinestone Oversized Tshirt Custom Y...,$1.80-2.65,10 pieces
1,ODM 220 GSM 100 Cotton Plain Short Sleeve Over...,$2.98-8.56,50 pieces
2,Custom Logo Puff Printed Tee Shirt Blank Plain...,$4.00-5.00,20 pieces
3,Hot Sale Summer Light Weight Bamboo Fiber Crew...,$2.41-6.93,50 pieces
4,High Quality 100% Cotton Custom Tshirt Men Wit...,$6.98-9.56,10 pieces


#### Save the data to a CSV file


In [8]:
output_path = r"alibaba_data_scraped_part2.csv"
df.to_csv(output_path, index=False)