# Extracting Dyson items data on ebay.ie and ebay.com

In [1]:
import requests
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import datetime
import pycountry
import xml.etree.ElementTree as ET

# Load environment variables for the eBay API access token
load_dotenv()
access_token = os.getenv('access_token')

**Categories**

In [2]:
dyson_items = {
    'vacuum_cleaner': {
        'keyword': 'dyson vacuum cleaner',
        'category_id': 20614,
        'category_name': 'Vacuum Cleaners',
        'min_price': 80
    },

    'hairdryer': {
        'keyword': 'dyson hairdryer',
        'category_id': 11858,
        'category_name': 'Hair Dryers',
        'min_price': 80
    },

    'airwrap': {
        'keyword': 'dyson airwrap',
        'category_id': 177659,
        'category_name': 'Straighteners & Curling Tongs',
        'min_price': 80
    },

    'straightener': {
        'keyword': 'dyson straightener',
        'category_id': 177659,
        'category_name': 'Straighteners & Curling Tongs',
        'min_price': 60
    },

    'fan': {
        'keyword': 'dyson fan',
        'category_id': 43510,
        'category_name': 'Air Purifiers',
        'min_price': 60
    },

}

## Search Dyson item ids on ebay.ie

In [3]:
# Search function to get item IDs
def search_ebay_items(keyword, category_id, min_price, marketplace):
    url = 'https://api.ebay.com/buy/browse/v1/item_summary/search'

    headers = {
        'Authorization': f'Bearer {access_token}',
        'X-EBAY-C-MARKETPLACE-ID': marketplace
    }
    params = {
        'q': keyword,
        'filter': (
            "buyingOptions:{FIXED_PRICE},"
            "brand:{Dyson},"
            f"price:[{min_price}..1000],"
            "conditionIds:{1000|1500|1750|2000|2010|2020|2030|2500|2750|3000|4000|5000},"
            "sellerAccountTypes:{BUSINESS}"
        ),
        'category_ids': category_id,
        'limit': 200 
    }

    
    offset = 0
    all_items_id = []
    
    while True:
        # Update offset in the parameters
        params['offset'] = offset     

        # Make the API request
        response = requests.get(url, headers=headers, params=params)
        
        # Check for successful response
        if response.status_code == 200:
            data = response.json()
            items = data.get('itemSummaries', [])
            
            # Break if no more items are returned
            if not items:
                break
            

            items_id = [item['itemId'] for item in items]
            
            # Append the retrieved items to all_items
            all_items_id.extend(items_id)
            
            # Increment offset for next set of items
            offset += len(items_id)
            
            # Optional: print progress
            print(f"Retrieved {len(items_id)} items, Total so far: {len(all_items_id)}")  

            if len(items_id) < params['limit']:
                break
        else:
            print("Error:", response.status_code, response.text)
            break
    
    print(len(all_items_id))
    return all_items_id, min_price, marketplace
    

In [4]:
# Get detailed info for each item using getItem endpoint
def get_item_details(item_id):
    url = f'https://api.ebay.com/buy/browse/v1/item/{item_id}'
    headers = {
        'Authorization': f'Bearer {access_token}',
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching details for item {item_id}: {response.status_code, response.text}")
        return None

In [5]:
def get_country_name(country_code):
    country = pycountry.countries.get(alpha_2=country_code)
    if country:
        return country.name
    else:
        return None

In [6]:
# Extract seller feedback for the specific item
# Define the XML request for GetFeedback
def create_get_feedback_xml(user_id, item_id):
    xml = f"""<?xml version="1.0" encoding="utf-8"?>
    <GetFeedbackRequest xmlns="urn:ebay:apis:eBLBaseComponents">
        <RequesterCredentials>
            <eBayAuthToken>{access_token}</eBayAuthToken>
        </RequesterCredentials>
        <UserID>{user_id}</UserID>
        <ItemID>{item_id}</ItemID>
        <FeedbackType>FeedbackReceivedAsSeller</FeedbackType>
        <DetailLevel>ReturnAll</DetailLevel>
        <Pagination>
            <EntriesPerPage>25</EntriesPerPage>
            <PageNumber>1</PageNumber>
        </Pagination>
    </GetFeedbackRequest>"""
    return xml


def get_feedback(user_id, item_id):
    url = "https://api.ebay.com/ws/api.dll"
    headers = {
        "Content-Type": "text/xml",
        "X-EBAY-API-CALL-NAME": "GetFeedback",
        "X-EBAY-API-SITEID": "0",
        "X-EBAY-API-COMPATIBILITY-LEVEL": "967"
    }

    xml_body = create_get_feedback_xml(user_id, item_id)
    # print(xml_body)
    response = requests.post(url, headers=headers, data=xml_body)

    if response.status_code == 200:
        return response.text
    else:
        print("Request failed:", response.status_code, response.text)
        return None
    
    
def parse_feedback_response(user_id, item_id):
    xml_data = get_feedback(user_id, item_id)

    # print(xml_data)
    
    root = ET.fromstring(xml_data)
    namespace = "{urn:ebay:apis:eBLBaseComponents}"
    
    # Feedback Details
    feedback_details = []
    for feedback in root.findall(f'.//{namespace}FeedbackDetail'):
        feedback_info = {
            'ItemID': feedback.find(f'{namespace}ItemID').text,
            'CommentingUser': feedback.find(f'{namespace}CommentingUser').text,
            'CommentingUserScore': feedback.find(f'{namespace}CommentingUserScore').text,
            'CommentText': feedback.find(f'{namespace}CommentText').text,
            'CommentTime': feedback.find(f'{namespace}CommentTime').text,
            'CommentType': feedback.find(f'{namespace}CommentType').text
        }
        feedback_details.append(feedback_info)
    
    return feedback_details

In [7]:
def filtering_ebay_items(items_id, min_price, marketplace):
    filtered_items = []
    reviews = []
    i = 0
    valid_category_ids = [d['category_id'] for _, d in dyson_items.items()] 
    for item_id in items_id:
        item_details = get_item_details(item_id)
        
        # extract item details only for items with price > min_price
        if float(item_details['price']['value']) > min_price and item_details.get('brand') == 'Dyson' and \
            int(item_details.get('categoryId')) in valid_category_ids:
            
            item = {}
            # item['item_id'] = item_details.get('itemId')
            item['item_id'] = item_details.get('legacyItemId') 
            print(item['item_id'])   
            item['title'] = item_details.get('title')
            item['brand'] = item_details.get('brand')
            item['url'] = item_details.get('itemWebUrl')

            item['price_usd'] = item_details['price']['value']
            item['original_price_usd'] = item_details.get('marketingPrice', {}).get('originalPrice', {}).get('value')
            item['discount_percentage'] = item_details.get('marketingPrice', {}).get('discountPercentage')
            item['discount_amount_usd'] = item_details.get('marketingPrice', {}).get('discountAmount', {}).get('value')

            item['category_id'] = item_details.get('categoryId')
            item['category_name'] = item_details.get('categoryPath', '').split('|')[-1]  # Last part of 'categoryPath'

            item['condition_id'] = item_details.get('conditionId')
            item['condition_name'] = item_details.get('condition')

            item['item_location_country_id'] = item_details.get('itemLocation', {}).get('country').strip()
            item['item_location_country'] = get_country_name(item['item_location_country_id'])
            item['item_location_city'] = item_details.get('itemLocation', {}).get('city').strip()
            if item['item_location_city'] == item['item_location_country_id']:
                item['item_location_city'] = None
            if item['item_location_city']:
                item['item_location_city'] = item['item_location_city'].capitalize()

            item['number_sold'] = item_details['estimatedAvailabilities'][0].get('estimatedSoldQuantity')
            item['number_available'] = item_details.get('estimatedAvailabilities', [{}])[0].get('estimatedRemainingQuantity')

            item['seller_username'] = item_details.get('seller', {}).get('username')
            item['seller_positive_feedback_percentage'] = item_details.get('seller', {}).get('feedbackPercentage')
            item['seller_feedback_score'] = item_details.get('seller', {}).get('feedbackScore')

            if marketplace == 'EBAY_IE':
                shipped_to_ie = True
            else:
                shipped_to_ie = False
                # Check if Ireland is in the regionIncluded list and not in regionExcluded list
                if 'shipToLocations' in item_details and 'regionIncluded' in item_details['shipToLocations']:
                    for region in item_details['shipToLocations']['regionIncluded']:
                        if region.get('regionName') in ('Ireland', 'Worldwide', 'Europe', 'European Union'):
                            shipped_to_ie = True
                            break
                    for region in item_details['shipToLocations']['regionExcluded']:
                        if region.get('regionName') == 'Ireland':
                            shipped_to_ie = False
                            break

            item['shipped_to_ie'] = shipped_to_ie 
            item['shipping_cost_usd'] = item_details.get('shippingOptions', [{}])[0].get('shippingCost', {}).get('value')

            feedback_list = parse_feedback_response(item['seller_username'], item['item_id'])
            positive_feedback_num = 0
            positive_feedback_rate = None
            if feedback_list:
                for feedback in feedback_list:
                    reviews.append(feedback)
                    if feedback.get('CommentType') == 'Positive':
                        positive_feedback_num += 1
                positive_feedback_rate = positive_feedback_num / len(feedback_list) * 100
            
            item['item_positive_feedback_percentage'] = positive_feedback_rate
            item['marketplace_id'] = item_details.get('listingMarketplaceId')
            item['searching_marketplace'] = marketplace
            item['date_time'] = datetime.datetime.now()

            filtered_items.append(item)
            print(item)

    df_items = pd.DataFrame(filtered_items)
    df_reviews = pd.DataFrame(reviews)
    return df_items, df_reviews

## Extracting items data for each category (eBay IE)

**vacuum_cleaners**

In [9]:
df_vacuum_cleaners_ie, df_vacuum_cleaners_reviews_ie = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['vacuum_cleaner'].get('keyword'),
        dyson_items['vacuum_cleaner'].get('category_id'),
        dyson_items['vacuum_cleaner'].get('min_price'),
        'EBAY_IE'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 126 items, Total so far: 1726
1726
315431242700
{'item_id': '315431242700', 'title': 'Dyson Ball Multi Floor Cylinder Vacuum', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/315431242700', 'price_usd': '179.17', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '20614', 'category_name': 'Vacuum Cleaners', 'condition_id': '2500', 'condition_name': 'Seller refurbished', 'item_location_country_id': 'IE', 'item_location_country': 'Ireland', 'item_location_city': None, 'number_sold': 22, 'number_available': 2, 'seller_username': 'dysonoutlet.ie', 'seller_positive_feedback_percentage': '97.2', 'seller_feedback_score':

In [11]:
df_vacuum_cleaners_ie.to_csv('vacuum_cleaners_ie.csv')
df_vacuum_cleaners_reviews_ie.to_csv('vacuum_cleaners_reviews_ie.csv')

**hairdryers**

In [10]:
df_hairdryers_ie, df_hairdryers_reviews_ie = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['hairdryer'].get('keyword'),
        dyson_items['hairdryer'].get('category_id'),
        dyson_items['hairdryer'].get('min_price'),
        'EBAY_IE'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 137 items, Total so far: 937
937
156397172176
{'item_id': '156397172176', 'title': 'Dyson Supersonic Hair Dryer For Women (Purple/Grey) Pristine Condition with Box', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/156397172176?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKnQ6mX4%2FPmKBeYJzMb5gGcWe4XuADH%2FrAwt5MWIa%2B9rYor9qpNXuiQHdlr6CLxio1S%2Fp3Dmh%2Fady6Lqqq56ETS2mRfMm2CuSrJ4zaclwuYGBRfAhgzgAk%2FaMChAH112d3g1nccZTtXOMW3sdw9iz12iZR7RY0h04DEwbmXoqkm6vKF5lTOb%2F8jM1j4otAifd5A%3D', 'price_usd': '258.66', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '11858', 'category_name': 'Hair Dryers', 'condition_id': '3000', 'condition_name': 'Used', 'item_location_country_id': 'GB', 'item_location_country': 'United Kingdom', 'item_location_city': 'Huntingdon', 'number_sold': 15,

In [12]:
df_hairdryers_ie.to_csv('hairdryers_ie.csv')
df_hairdryers_reviews_ie.to_csv('hairdryers_reviews_ie.csv')

**airwraps**

In [13]:
df_airwraps_ie, df_airwraps_reviews_ie = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['airwrap'].get('keyword'),
        dyson_items['airwrap'].get('category_id'),
        dyson_items['airwrap'].get('min_price'),
        'EBAY_IE'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 152 items, Total so far: 352
352
176639856228
{'item_id': '176639856228', 'title': 'Dyson Airwrap Long Complete Gifting Edition, Pink Blue, Mint Condition.', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/176639856228?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKkVhBlJG4UPEmlp%2BbPWSx5UrH1QC5XGTU15wpfd%2BJ3rqjawYTV0uxQKql4c%2FHnT2wu2KGhb6kHrKoa7imd%2FshagOY%2B6FKtjczSFa%2FV4DgLeWNskGmp4lEhHo0Fx7RFuj5Fdyf5KrDutjKEf9%2Fq5ctSTbVvbDuxZLKdwnLrVtEgflnGa9PGz2D31H14P7nJKci8%3D', 'price_usd': '316.09', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '177659', 'category_name': 'Piastre e arricciacapelli', 'condition_id': '3000', 'condition_name': 'Used', 'item_location_country_id': 'IT', 'item_location_country': 'Italy', 'item_location_city': 'Ravenna', 'number_sold': 2, 'number_available': 1, 'seller_username': 'moxma-ravenna', 'seller_positive_feedback_percentage': '98.8', 'seller_feedback

In [14]:
df_airwraps_ie.to_csv('airwraps_ie.csv')
df_airwraps_reviews_ie.to_csv('airwraps_reviews_ie.csv')

**straighteners**

In [15]:
df_straighteners_ie, df_straighteners_reviews_ie = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['straightener'].get('keyword'),
        dyson_items['straightener'].get('category_id'),
        dyson_items['straightener'].get('min_price'),
        'EBAY_IE'
    )
)

Retrieved 109 items, Total so far: 109
109
285992928445
{'item_id': '285992928445', 'title': 'Brand New Dyson Corrale straightener Bright Copper/Bright Nickel', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/285992928445?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKnrLLcqsvF4JIUc66E%2FV6q8UpnQulGmJvn5sSj8ZkkHmD8%2FzJ99NNFQd%2BgaAriLSlb9JB5oT%2Bcijxegqe6Ob9Xsc6wCZlOChx9h5E1OM2czMJi18e0dNXVAlfHwBHezbnUFfVA%2FsRFHcLTMyvcsbbcDUsSjDNrPiobqAK57M73HMFTU7w82BfEicJ21KlZO9M8%3D', 'price_usd': '429.00', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '177659', 'category_name': 'Straighteners & Curling Tongs', 'condition_id': '1000', 'condition_name': 'New', 'item_location_country_id': 'GB', 'item_location_country': 'United Kingdom', 'item_location_city': 'Wembley', 'number_sold': 0, 'number_available': 1, 'seller_username': 'rajivagarwal1975', 'seller_positive_feedback_percentage': '96.4', 'seller_feedback_score': 3943, 'shipped_to_ie': Tru

In [16]:
df_straighteners_ie.to_csv('straighteners_ie.csv')
df_straighteners_reviews_ie.to_csv('straighteners_reviews_ie.csv')

**fans**

In [17]:
df_fans_ie, df_fans_reviews_ie = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['fan'].get('keyword'),
        dyson_items['fan'].get('category_id'),
        dyson_items['fan'].get('min_price'),
        'EBAY_IE'
    )
)

Retrieved 45 items, Total so far: 45
45
305908300083
{'item_id': '305908300083', 'title': 'Dyson Purifier HP09 Purifying Fan Heater Hot+Cool Formaldehyde w/Remote - Read', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/305908300083?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKnUFlulhtwQehQoMvKBbl4DzHmxu4dxJoE%2FbYuXA%2B1fh75Be7COX0QNCWz--Dp%2F83QyrpVkZO09JAx%2FA06gZ6oCopmQSkrVCDKzC%2B93MHnySM3NgFwJ0sJSkz1SJEmer5d9bPk5SieWoud9ADBu0yMgTmzNSTz2%2FS722phuNBPf%2FFY4rttW2mPPb3DFBFiWFDM%3D', 'price_usd': '378.53', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '43510', 'category_name': 'Air Purifiers', 'condition_id': '3000', 'condition_name': 'Used', 'item_location_country_id': 'GB', 'item_location_country': 'United Kingdom', 'item_location_city': 'Reading', 'number_sold': 0, 'number_available': 1, 'seller_username': 'golfingnoob2012', 'seller_positive_feedback_percentage': '100.0', 'seller_feedback_score': 1123, 'shipped_to_ie': True

In [None]:
df_fans_ie.to_csv('fans_ie.csv')
df_fans_reviews_ie.to_csv('fans_reviews_ie.csv')

## Extracting items data for each category (eBay US)

**vacuum_cleaners**

In [8]:
df_vacuum_cleaners_us, df_vacuum_cleaners_reviews_us = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['vacuum_cleaner'].get('keyword'),
        dyson_items['vacuum_cleaner'].get('category_id'),
        dyson_items['vacuum_cleaner'].get('min_price'),
        'EBAY_US'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 200 items, Total so far: 1800
Retrieved 200 items, Total so far: 2000
Retrieved 200 items, Total so far: 2200
Retrieved 200 items, Total so far: 2400
Retrieved 200 items, Total so far: 2600
Retrieved 200 items, Total so far: 2800
Retrieved 200 items, Total so far: 3000
Retrieved 64 items, Total so far: 3064
3064
276700866106
{'item_id': '276700866106', 'title': 'Dyson V10 Animal + Cordless Vacuum Cleaner | Purple | Certified Refurbished', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/276700866106?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKnpGRdLrOgJhb%2BXvLrCES3uESpukXIhGEIIL7ngQ3LUrTEdzpDah%2BOD3%2F9hEbz18wbCO%2BwADjD0h0Ga8lQ01xEaI%2FFtRz0cpPEQEGqI%2Bz9McyKj

In [9]:
df_vacuum_cleaners_us.to_csv('vacuum_cleaners_us.csv')
df_vacuum_cleaners_reviews_us.to_csv('vacuum_cleaners_reviews_us.csv')

**hairdryers**

In [8]:
df_hairdryers_us, df_hairdryers_reviews_us = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['hairdryer'].get('keyword'),
        dyson_items['hairdryer'].get('category_id'),
        dyson_items['hairdryer'].get('min_price'),
        'EBAY_US'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 200 items, Total so far: 1800
Retrieved 200 items, Total so far: 2000
Retrieved 200 items, Total so far: 2200
Retrieved 114 items, Total so far: 2314
2314
276495504432
{'item_id': '276495504432', 'title': 'Dyson Supersonic Hair Dryer | Certified Refurbished | Latest Generation', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/276495504432?var=579077102713&amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKkPvSteH%2B2AEh4Sfb78%2FRn%2Bhv%2FX6wSRDyni3jRY6U2aThtd7P61wTgntfPvgraibMjRdt2N7YPctotlnxdnfHJYQN4eZiuVKJbvs60D15qPgqgcuxPice12ek2U2ddlWqFBhgv0A9i9Zw0E5GGP8QVt8SulRqdv%2BRC21s8vFPXpDzVU%2FOdqhzdlolmWbmKCizc%3D', 'price_usd': '229.99', 'original_price_usd': '429.99', 'd

In [9]:
df_hairdryers_us.to_csv('hairdryers_us.csv')
df_hairdryers_reviews_us.to_csv('hairdryers_reviews_us.csv')

**airwraps**

In [10]:
df_airwraps_us, df_airwraps_reviews_us = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['airwrap'].get('keyword'),
        dyson_items['airwrap'].get('category_id'),
        dyson_items['airwrap'].get('min_price'),
        'EBAY_US'
    ),
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 200 items, Total so far: 1800
Retrieved 200 items, Total so far: 2000
Retrieved 191 items, Total so far: 2191
2191
276710700795
{'item_id': '276710700795', 'title': 'Dyson Airwrap™ Origin Multi-Styler Long | Refurbished', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/276710700795?var=2560144079006&amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKkQlvTQ%2F9L4EdFz%2B%2BI47TQGJrZwNcSgXE3Y1og277Ue6GXHrETxH9fz7HRcKjL7cdQKbQDyOFE8fPz%2Feqfeygk%2BrjcHs%2FJEL6ZitVtf0oFFAsq%2F0NSa5d5vzCP4FbFJL%2BcwRv4GJqJu7YGEo7si0ZXr%2B4Pmf3qddZa5ShNV70eBh7GBnHhurdGCU3NCtwLS978%3D', 'price_usd': '249.99', 'original_price_usd': '599.99', 'discount_percentage': '58.0', 'discount_amount_usd':

In [11]:
df_airwraps_us.to_csv('airwraps_us.csv')
df_airwraps_reviews_us.to_csv('airwraps_reviews_us.csv')

**straighteners**

In [8]:
df_straighteners_us, df_straighteners_reviews_us = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['straightener'].get('keyword'),
        dyson_items['straightener'].get('category_id'),
        dyson_items['straightener'].get('min_price'),
        'EBAY_US'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 56 items, Total so far: 856
856
275300691493
{'item_id': '275300691493', 'title': 'Dyson Corrale™ Straightener | Refurbished', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/275300691493?var=578877582369&amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKm8VWj50MBJDHyDDFzulS9Bp0K7BXJsIp7E%2FPAt%2FVXSl9eCbwwQYeDFsjOwszxxGYsNcSiwPLybLW73vXhYdgTEus8kMTEIVmKdbRfLa9oTnGk0aJI9SxN%2F8ChGB%2FP%2BZxaRdQDeQS%2BzLN6U3XBx5fokdf4LPdLNQLaX2t%2BESlc%2FQYLyQDKt2R32gOjZ78wLEqQ%3D', 'price_usd': '179.99', 'original_price_usd': '499.99', 'discount_percentage': '64.0', 'discount_amount_usd': '320.00', 'category_id': '177659', 'category_name': 'Straightening & Curling Irons', 'condition_id': '2000', 'condition_name': 'Certified - Refurbished', 'item_location_country_id': 'US', 'item_location_country': 'United States', 'item_location_city': 'Chic

In [9]:
df_straighteners_us.to_csv('straighteners_us.csv')
df_straighteners_reviews_us.to_csv('straighteners_reviews_us.csv')

**fans**

In [10]:
df_fans_us, df_fans_reviews_us = filtering_ebay_items(
    *search_ebay_items(
        dyson_items['fan'].get('keyword'),
        dyson_items['fan'].get('category_id'),
        dyson_items['fan'].get('min_price'),
        'EBAY_US'
    )
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 106 items, Total so far: 706
706
276646642466
{'item_id': '276646642466', 'title': 'Dyson TP4A Pure Cool™ tower purifier fan | White/Silver | Refurbished', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/276646642466?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKmvroYePNtjZZGQLHKp8S9Y2j6AGYYGjeuJSZOZNotxSsGx1t0YvdtRh7Xzb8lWqXxu8I6ydl5ibLN08gegv5rgFJ5aYrIhhLxQxRgbv4aai%2BNP8YwM%2FsfMN8FrVMWgACgmJSqeTLZuwCjhtcFjTMCRbRkafq14OdrnIujFvMoZh2p4qUZlq0NmvyNHQVem69o%3D', 'price_usd': '179.99', 'original_price_usd': '549.99', 'discount_percentage': '67.0', 'discount_amount_usd': '370.00', 'category_id': '43510', 'category_name': 'Air Purifiers', 'condition_id': '2000', 'condition_name': 'Certified - Refurbished', 'item_location_country_id': 'US', 'item_location_country': 'United States', 'item_location_city': 'Fairfield', 'number_sold': 256, 'number_available': 200, 'se

In [11]:
df_fans_us.to_csv('fans_us.csv')
df_fans_reviews_us.to_csv('fans_reviews_us.csv')

## Creating dfs for items and reviews (ebay IE)

In [38]:
items_dir = 'extracted_data_ie/items/'
reviews_dir = 'extracted_data_ie/reviews/'

df_vacuum_cleaners_ie = pd.read_csv(items_dir+'vacuum_cleaners_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_hairdryers_ie = pd.read_csv(items_dir+'hairdryers_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_airwraps_ie = pd.read_csv(items_dir+'airwraps_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_straighteners_ie = pd.read_csv(items_dir+'straighteners_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_fans_ie = pd.read_csv(items_dir+'fans_ie.csv', usecols=lambda column: column != 'Unnamed: 0')

df_vacuum_cleaners_reviews_ie = pd.read_csv(reviews_dir+'vacuum_cleaners_reviews_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_hairdryers_reviews_ie = pd.read_csv(reviews_dir+'hairdryers_reviews_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_airwraps_reviews_ie = pd.read_csv(reviews_dir+'airwraps_reviews_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_straighteners_reviews_ie = pd.read_csv(reviews_dir+'straighteners_reviews_ie.csv', usecols=lambda column: column != 'Unnamed: 0')
df_fans_reviews_ie = pd.read_csv(reviews_dir+'fans_reviews_ie.csv', usecols=lambda column: column != 'Unnamed: 0')

In [39]:
df_items_ie = pd.concat([df_vacuum_cleaners_ie, 
                      df_hairdryers_ie, 
                      df_airwraps_ie, 
                      df_straighteners_ie, 
                      df_fans_ie], ignore_index=True)
df_items_ie.drop_duplicates(subset=['item_id'], inplace=True)


df_reviews_ie = pd.concat([df_vacuum_cleaners_reviews_ie, 
                        df_hairdryers_reviews_ie, 
                        df_airwraps_reviews_ie, 
                        df_straighteners_reviews_ie, 
                        df_fans_reviews_ie], ignore_index=True)
df_reviews_ie.drop_duplicates(inplace=True)
df_reviews_ie = df_reviews_ie.loc[df_reviews_ie.ItemID.isin(list(df_items_ie.item_id))]

## Creating dfs for items and reviews (ebay US)

In [40]:
items_dir = 'extracted_data_us/items/'
reviews_dir = 'extracted_data_us/reviews/'

df_vacuum_cleaners_us = pd.read_csv(items_dir+'vacuum_cleaners_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_hairdryers_us = pd.read_csv(items_dir+'hairdryers_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_airwraps_us = pd.read_csv(items_dir+'airwraps_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_straighteners_us = pd.read_csv(items_dir+'straighteners_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_fans_us = pd.read_csv(items_dir+'fans_us.csv', usecols=lambda column: column != 'Unnamed: 0')

df_vacuum_cleaners_reviews_us = pd.read_csv(reviews_dir+'vacuum_cleaners_reviews_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_hairdryers_reviews_us = pd.read_csv(reviews_dir+'hairdryers_reviews_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_airwraps_reviews_us = pd.read_csv(reviews_dir+'airwraps_reviews_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_straighteners_reviews_us = pd.read_csv(reviews_dir+'straighteners_reviews_us.csv', usecols=lambda column: column != 'Unnamed: 0')
df_fans_reviews_us = pd.read_csv(reviews_dir+'fans_reviews_us.csv', usecols=lambda column: column != 'Unnamed: 0')

In [41]:
df_items_us = pd.concat([df_vacuum_cleaners_us, 
                      df_hairdryers_us, 
                      df_airwraps_us, 
                      df_straighteners_us, 
                      df_fans_us], ignore_index=True)
df_items_us.drop_duplicates(subset=['item_id'], inplace=True)


df_reviews_us = pd.concat([df_vacuum_cleaners_reviews_us, 
                        df_hairdryers_reviews_us, 
                        df_airwraps_reviews_us, 
                        df_straighteners_reviews_us, 
                        df_fans_reviews_us], ignore_index=True)
df_reviews_us.drop_duplicates(inplace=True)
df_reviews_us = df_reviews_us.loc[df_reviews_us.ItemID.isin(list(df_items_us.item_id))]

## Merging IE and US data

In [42]:
df_items = pd.concat([df_items_ie, df_items_us], ignore_index=True)
df_items.drop_duplicates(subset=['item_id'], inplace=True, ignore_index=True)

In [43]:
df_reviews = pd.concat([df_reviews_ie, df_reviews_us], ignore_index=True)
df_reviews.drop_duplicates(inplace=True, ignore_index=True)

## Creating 3NF DB

In [44]:

# 1. Items Table
items = df_items[['item_id', 'title', 'brand', 'url', 'category_id', 'condition_id', 'item_location_country_id',
                      'item_location_city', 'marketplace_id', 'searching_marketplace']].copy()

# 2. Categories Table
categories = df_items[['category_id', 'category_name']].copy().drop_duplicates(
    subset=['category_id'], keep='last', ignore_index=True)
categories.loc[categories.category_id == 177659, 'category_name'] = 'Airwraps & Straighteners'

# 3. Conditions Table
conditions = df_items[['condition_id', 'condition_name']].copy().drop_duplicates(
    subset=['condition_id'], ignore_index=True)

# 4. Countries Table
countries = df_items[['item_location_country_id', 'item_location_country']].copy().drop_duplicates(
    subset=['item_location_country_id'], ignore_index=True)
countries.loc[countries['item_location_country_id'] == 'KR', 'item_location_country'] = 'South Korea'
countries.columns = ['country_id', 'country_name']

# 5. Sellers Table
sellers = df_items[['seller_username', 'seller_feedback_score', 
                       'seller_positive_feedback_percentage']].copy().drop_duplicates(
                           subset=['seller_username'], ignore_index=True)
sellers.insert(0, 'seller_id', range(1, len(sellers) + 1))
sellers['seller_id'] = sellers['seller_id'].astype(int)
sellers['seller_positive_feedback_percentage'] = sellers['seller_positive_feedback_percentage'].astype(float)

# 6. Prices Table
prices = df_items[['item_id', 'price_usd', 'original_price_usd', 'discount_percentage', 
                      'discount_amount_usd', 'shipping_cost_usd', 'date_time']].copy()
prices['price_usd'] = prices['price_usd'].astype(float)
prices['original_price_usd'] = prices['original_price_usd'].astype(float)
prices['discount_percentage'] = prices['discount_percentage'].astype(float)
prices['discount_amount_usd'] = prices['discount_amount_usd'].astype(float)
prices['shipping_cost_usd'] = prices['shipping_cost_usd'].astype(float)

# 7. Reviews Table
reviews = df_reviews.drop_duplicates(ignore_index=True)
reviews.insert(0, 'review_id', range(1, len(reviews) + 1))
reviews.columns = ['review_id', 'item_id', 'reviewer_name', 'reviewer_score', 'review_text', 'review_date', 'review_type']
reviews['review_id'] = reviews['review_id'].astype(int)
reviews['reviewer_score'] = reviews['reviewer_score'].astype(int)

# 8. Seller_Item_Performance Table
seller_item_performance = df_items[['item_id', 'seller_username', 'number_sold', 'number_available', 
                                       'item_positive_feedback_percentage', 'date_time']].copy()
                                        
seller_item_performance = seller_item_performance.merge(sellers[['seller_id', 'seller_username']], how='left', on='seller_username')
seller_item_performance.drop(columns=['seller_username'], inplace=True)

reviews_count = reviews.groupby(by='item_id').size().reset_index(name='reviews_num')
seller_item_performance = seller_item_performance.merge(reviews_count, how='left', on='item_id')
seller_item_performance['reviews_num'] = seller_item_performance['reviews_num'].fillna(0)
seller_item_performance['reviews_num'] = seller_item_performance['reviews_num'].astype(int)
seller_item_performance = seller_item_performance[['item_id', 'seller_id', 'number_sold', 'number_available', 'reviews_num', 'item_positive_feedback_percentage', 'date_time']]



## Fixing errors

In [45]:
# find out one seller has null seller_positive_feedback_percentage
seller_isna = sellers[sellers['seller_positive_feedback_percentage'].isna()]
seller_isna

Unnamed: 0,seller_id,seller_username,seller_feedback_score,seller_positive_feedback_percentage
1545,1546,rade_inc,1374,


In [46]:
headers = {
    'Authorization': f'Bearer {access_token}',
}

url = 'https://api.ebay.com/buy/browse/v1/item_summary/search?q=Dyson&filter=sellers:{rade_inc}'

response = requests.get(url=url, headers=headers)
item_details = response.json().get('itemSummaries')

seller_positive_feedback_percentage = float(item_details[0]['seller']['feedbackPercentage'])

In [47]:
sellers.loc[sellers['seller_username'] == 'rade_inc', 'seller_positive_feedback_percentage'] = seller_positive_feedback_percentage

In [48]:
sellers.loc[sellers['seller_username'] == 'rade_inc']

Unnamed: 0,seller_id,seller_username,seller_feedback_score,seller_positive_feedback_percentage
1545,1546,rade_inc,1374,99.7


## Saving DB tables in csv files

In [50]:
# Create the tables directory if it doesn't exist
tables_path = 'ebay_db_csv/'

# Save each DataFrame to a CSV file within the tables directory
items.to_csv(tables_path+'items.csv', index=False)
categories.to_csv(tables_path+'categories.csv', index=False)
conditions.to_csv(tables_path+'conditions.csv', index=False)
countries.to_csv(tables_path+'countries.csv', index=False)
sellers.to_csv(tables_path+'sellers.csv', index=False)
prices.to_csv(tables_path+'prices.csv', index=False)
reviews.to_csv(tables_path+'reviews.csv', index=False)
seller_item_performance.to_csv(tables_path+'seller_item_performance.csv', index=False)