# Ebay Product Data Fetching

I have created this data fetching file for those curious or willing to go for the Ebay official product metadata to train their own models.
I have created and used synthetic data of my own for my model training.

In [None]:
import requests
import base64
import csv
import time
import os

# eBay API Credentials
CLIENT_ID = "client_id"
CLIENT_SECRET = "client_secret"

def get_ebay_access_token():
    credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
    encoded_credentials = base64.b64encode(credentials.encode()).decode()
    
    url = "https://api.ebay.com/identity/v1/oauth2/token"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
        "Authorization": f"Basic {encoded_credentials}"
    }
    body = {
        "grant_type": "client_credentials",
        "scope": "https://api.ebay.com/oauth/api_scope"
    }
    
    response = requests.post(url, headers=headers, data=body)
    
    if response.status_code == 200:
        return response.json().get("access_token")
    else:
        print("❌ Error getting access token:", response.text)
        return None

# Get Access Token
EBAY_ACCESS_TOKEN = get_ebay_access_token()
EBAY_BASE_URL = "https://api.ebay.com/buy/browse/v1/item_summary/search"

CATEGORY_MAPPING = {
    "electronics": ["laptop", "phone", "tablet", "tv", "camera"],
    "fashion": ["shirt", "jeans", "dress", "shoes", "jacket"],
    "home": ["sofa", "table", "lamp", "bed", "chair"],
    "toys": ["lego", "doll", "puzzle", "board game", "action figure"],
    "sports": ["football", "tennis", "basketball", "racket", "helmet"],
    "automotive": ["car", "tire", "engine", "oil", "motorcycle"]
}

def categorize_product(title):
    """Assigns a category based on keywords in the product title."""
    title_lower = title.lower()
    for category, keywords in CATEGORY_MAPPING.items():
        if any(keyword in title_lower for keyword in keywords):
            return category
    return "other"

def load_existing_product_ids(filename="ebay_products_partial.csv"):
    """Loads existing product IDs from the CSV file to avoid duplicates."""
    existing_ids = set()
    if os.path.exists(filename):
        with open(filename, "r", newline="", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                existing_ids.add(row["item_id"])
    return existing_ids

def fetch_all_ebay_products(categories, max_results=100000, save_interval=1000, filename="ebay_products_partial.csv"):
    headers = {
        "Authorization": f"Bearer {EBAY_ACCESS_TOKEN}",
        "X-EBAY-C-MARKETPLACE-ID": "EBAY_US",
        "Content-Type": "application/json"
    }
    
    existing_ids = load_existing_product_ids(filename)
    all_products = []
    results_per_request = 50
    total_fetched = 0
    
    for category in categories:
        offset = 0
        while total_fetched < max_results:
            params = {
                "q": category,
                "limit": results_per_request,
                "offset": offset
            }
            
            response = requests.get(EBAY_BASE_URL, headers=headers, params=params)
            
            if response.status_code == 200:
                data = response.json()
                items = data.get("itemSummaries", [])
                
                new_items = [item for item in items if item.get("itemId") not in existing_ids]
                all_products.extend(new_items)
                existing_ids.update(item.get("itemId") for item in new_items)
                total_fetched += len(new_items)
                
                if len(items) < results_per_request:
                    break  # No more pages available
                
                offset += results_per_request  # Move to next page
                time.sleep(1)  # Sleep to prevent rate limiting
                
                # Save every 'save_interval' products
                if total_fetched % save_interval == 0:
                    save_products_to_csv(new_items, filename)
                    print(f"💾 Auto-saved {total_fetched} new products.")
            else:
                print(f"❌ API Error for category {category}:", response.text)
                break
    
    return all_products[:max_results]

def save_products_to_csv(products, filename="ebay_products_partial.csv"):
    fieldnames = ["title", "category", "price", "currency", "image", "item_id"]
    
    file_exists = os.path.exists(filename)
    with open(filename, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        
        for item in products:
            title = item.get("title", "N/A")
            category_name = categorize_product(title)
            
            writer.writerow({
                "title": title,
                "category": category_name,
                "price": item.get("price", {}).get("value", "N/A"),
                "currency": item.get("price", {}).get("currency", "N/A"),
                "image": item.get("image", {}).get("imageUrl", "N/A"),
                "item_id": item.get("itemId", "N/A")
            })
    print(f"✅ Appended {len(products)} new products to {filename}")

# Fetch and Save Data
categories = ["electronics", "fashion", "home", "toys", "sports", "automotive"]  # Add more if needed
products = fetch_all_ebay_products(categories, max_results=100000)
save_products_to_csv(products)