In [2]:
import requests
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import datetime
import pycountry
import xml.etree.ElementTree as ET

# Load environment variables for the eBay API access token
load_dotenv()
access_token = os.getenv('access_token')

In [3]:
dyson_items = {
    'vacuum_cleaner': {
        'keyword': 'dyson vacuum cleaner',
        'category_id': 20614,
        'category_name': 'Vacuum Cleaners',
        'min_price': 80
    },

    'hairdryer': {
        'keyword': 'dyson hairdryer',
        'category_id': 11858,
        'category_name': 'Hair Dryers',
        'min_price': 80
    },

    'airwrap': {
        'keyword': 'dyson airwrap',
        'category_id': 177659,
        'category_name': 'Straighteners & Curling Tongs',
        'min_price': 80
    },

    'straightener': {
        'keyword': 'dyson straightener',
        'category_id': 177659,
        'category_name': 'Straighteners & Curling Tongs',
        'min_price': 60
    },

    'fan': {
        'keyword': 'dyson fan',
        'category_id': 43510,
        'category_name': 'Air Purifiers',
        'min_price': 60
    },

}

In [4]:
# Search function to get item IDs
def search_ebay_items(keyword, category_id, min_price):
    url = 'https://api.ebay.com/buy/browse/v1/item_summary/search'
    headers = {
        'Authorization': f'Bearer {access_token}',
        "X-EBAY-C-ENDUSERCTX": "contextualLocation=country=IE%2Czip=A91%20K584",
        'X-EBAY-C-MARKETPLACE-ID': 'EBAY_IE'
    }
    params = {
        'q': keyword,
        'filter': (
            "buyingOptions:{FIXED_PRICE},"
            "brand:{Dyson},"
            f"price:[{min_price}..1000],"
            "conditionIds:{1000|1500|1750|2000|2010|2020|2030|2500|2750|3000|4000|5000},"
            "ship_to_country:IE,"
            "deliveryCountry:IE,"
            "sellerAccountTypes:{BUSINESS}"
        ),
        'category_ids': category_id,
        'limit': 200 
    }
    # print(params)
    offset = 0
    all_items_id = []
    
    while True:
        # Update offset in the parameters
        params['offset'] = offset     

        # Make the API request
        response = requests.get(url, headers=headers, params=params)
        
        # Check for successful response
        if response.status_code == 200:
            data = response.json()
            items = data.get('itemSummaries', [])
            
            # Break if no more items are returned
            if not items:
                break
            

            items_id = [item['itemId'] for item in items]
            
            # Append the retrieved items to all_items
            all_items_id.extend(items_id)
            
            # Increment offset for next set of items
            offset += len(items_id)
            
            # Optional: print progress
            print(f"Retrieved {len(items_id)} items, Total so far: {len(all_items_id)}")  

            if len(items_id) < params['limit']:
                break
        else:
            print("Error:", response.status_code, response.text)
            break
    
    print(len(all_items_id))
    return all_items_id
    

In [5]:
# Get detailed info for each item using getItem endpoint
def get_item_details(item_id):
    url = f'https://api.ebay.com/buy/browse/v1/item/{item_id}'
    headers = {
        'Authorization': f'Bearer {access_token}',
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching details for item {item_id}: {response.status_code, response.text}")
        return None

In [6]:
def get_country_name(country_code):
    country = pycountry.countries.get(alpha_2=country_code)
    if country:
        return country.name
    else:
        return None

In [7]:
# Extract seller feedback for the specific item
# Define the XML request for GetFeedback
def create_get_feedback_xml(user_id, item_id):
    xml = f"""<?xml version="1.0" encoding="utf-8"?>
    <GetFeedbackRequest xmlns="urn:ebay:apis:eBLBaseComponents">
        <RequesterCredentials>
            <eBayAuthToken>{access_token}</eBayAuthToken>
        </RequesterCredentials>
        <UserID>{user_id}</UserID>
        <ItemID>{item_id}</ItemID>
        <FeedbackType>FeedbackReceivedAsSeller</FeedbackType>
        <DetailLevel>ReturnAll</DetailLevel>
        <Pagination>
            <EntriesPerPage>25</EntriesPerPage>
            <PageNumber>1</PageNumber>
        </Pagination>
    </GetFeedbackRequest>"""
    return xml


def get_feedback(user_id, item_id):
    url = "https://api.ebay.com/ws/api.dll"
    headers = {
        "Content-Type": "text/xml",
        "X-EBAY-API-CALL-NAME": "GetFeedback",
        "X-EBAY-API-SITEID": "0",
        "X-EBAY-API-COMPATIBILITY-LEVEL": "967"
    }

    xml_body = create_get_feedback_xml(user_id, item_id)
    # print(xml_body)
    response = requests.post(url, headers=headers, data=xml_body)

    if response.status_code == 200:
        return response.text
    else:
        print("Request failed:", response.status_code, response.text)
        return None
    
    
def parse_feedback_response(user_id, item_id):
    xml_data = get_feedback(user_id, item_id)

    # print(xml_data)
    
    root = ET.fromstring(xml_data)
    namespace = "{urn:ebay:apis:eBLBaseComponents}"
    
    # Feedback Details
    feedback_details = []
    for feedback in root.findall(f'.//{namespace}FeedbackDetail'):
        feedback_info = {
            'ItemID': feedback.find(f'{namespace}ItemID').text,
            'CommentingUser': feedback.find(f'{namespace}CommentingUser').text,
            'CommentingUserScore': feedback.find(f'{namespace}CommentingUserScore').text,
            'CommentText': feedback.find(f'{namespace}CommentText').text,
            'CommentTime': feedback.find(f'{namespace}CommentTime').text,
            'CommentType': feedback.find(f'{namespace}CommentType').text
        }
        feedback_details.append(feedback_info)
    
    return feedback_details

In [8]:
def items_filtering(items_id, min_price):
    filtered_items = []
    reviews = []
    i = 0
    for item_id in items_id:
        item_details = get_item_details(item_id)

        i += 1
        if i > 30:
            break
        
        # extract item details only for items with price > min_price
        if float(item_details['price']['value']) > min_price and item_details.get('brand') == 'Dyson':
            
            item = {}
            # item['item_id'] = item_details.get('itemId')
            item['item_id'] = item_details.get('legacyItemId')    
            item['title'] = item_details.get('title')
            item['brand'] = item_details.get('brand')
            item['url'] = item_details.get('itemWebUrl')

            item['price_usd'] = item_details['price']['value']
            item['original_price_usd'] = item_details.get('marketingPrice', {}).get('originalPrice', {}).get('value')
            item['discount_percentage'] = item_details.get('marketingPrice', {}).get('discountPercentage')
            item['discount_amount_usd'] = item_details.get('marketingPrice', {}).get('discountAmount', {}).get('value')

            item['category_id'] = item_details.get('categoryId')
            item['category_name'] = item_details.get('categoryPath', '').split('|')[-1]  # Last part of 'categoryPath'

            item['condition_id'] = item_details.get('conditionId')
            item['condition_name'] = item_details.get('condition')

            item['item_location_country_id'] = item_details.get('itemLocation', {}).get('country').strip()
            item['item_location_country'] = get_country_name(item['item_location_country_id'])
            item['item_location_city'] = item_details.get('itemLocation', {}).get('city').strip()
            if item['item_location_city'] == item['item_location_country_id']:
                item['item_location_city'] = None
            if item['item_location_city']:
                item['item_location_city'] = item['item_location_city'].capitalize()

            item['number_sold'] = item_details['estimatedAvailabilities'][0].get('estimatedSoldQuantity')
            item['number_available'] = item_details.get('estimatedAvailabilities', [{}])[0].get('estimatedRemainingQuantity')

            item['seller_username'] = item_details.get('seller', {}).get('username')
            item['seller_positive_feedback_percentage'] = item_details.get('seller', {}).get('feedbackPercentage')
            item['seller_feedback_score'] = item_details.get('seller', {}).get('feedbackScore')

            # item['shipped_to_ie'] = shipped_to_ie 
            item['shipping_cost_usd'] = item_details.get('shippingOptions', [{}])[0].get('shippingCost', {}).get('value')

            feedback_list = parse_feedback_response(item['seller_username'], item['item_id'])
            positive_feedback_num = 0
            positive_feedback_rate = None
            if feedback_list:
                for feedback in feedback_list:
                    reviews.append(feedback)
                    if feedback.get('CommentType') == 'Positive':
                        positive_feedback_num += 1
                positive_feedback_rate = positive_feedback_num / len(feedback_list) * 100
            
            item['item_positive_feedback_percentage'] = positive_feedback_rate
            item['marketplace_id'] = item_details.get('listingMarketplaceId')
            item['date_time'] = datetime.datetime.now()

            filtered_items.append(item)
            print(item)

    df_items = pd.DataFrame(filtered_items)
    df_reviews = pd.DataFrame(reviews)
    return df_items, df_reviews

In [19]:
# creating df for dyson airwrap (set min_price=50)
df_airwrap, df_airwrap_reviews = items_filtering(
    search_ebay_items(
        dyson_items['airwrap'].get('keyword'), 
        dyson_items['airwrap'].get('category_id'),
        dyson_items['airwrap'].get('min_price')
    ), 
    dyson_items['airwrap'].get('min_price')
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 200 items, Total so far: 1800
Retrieved 137 items, Total so far: 1937
1937
{'item_id': '296523071840', 'title': 'Dyson Japan Limited Color Airwrap Multi Styler HS05 BPR Sakura Rose Gold ', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/296523071840?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKl4d1ofxSYMJ93n5nlCmu2cNyALFa%2BUKCXufDqa8x1pKi7hPbR7dka29A7NxxesqJi%2FGisikqYQMc4oLM5Nm7CG1nT%2FGwEc631WNwr9HBiqYYvmyQKd5MJT1WOtV6i3S23MBwecusBRTC13T442f1%2FGUtT1i%2FdyHt9zyRfQhlOxanGKcV7HM3vsJhvUPWnblac%3D', 'price (USD)': '378.00', 'original_price (USD)': None, 'discount_percentage': None, 'discount_amount (USD)': None, 'category_id': '177659', 'category_name': 'Straighte

In [45]:
# creating df for dyson airwrap (set min_price=50)
df_fan, df_fan_reviews = items_filtering(
    search_ebay_items(
        dyson_items['fan'].get('keyword'), 
        dyson_items['fan'].get('category_id'),
        dyson_items['fan'].get('min_price')
    ), 
    dyson_items['fan'].get('min_price')
)

Retrieved 200 items, Total so far: 200
Retrieved 88 items, Total so far: 288
288
{'item_id': '135351644784', 'title': 'Dyson BP01 Pure Cool Me Purifying Fan - White/Silver NO REMOTE', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/135351644784?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKlUQe5%2FNSm0eLZXJTUUOdfkiI%2FSSetBpRrl%2But2zEdZh%2B8VTb3A0fhXs347vNjKm7oOABUKb8ZYRp8P8VqvrRvAvkhodxXje%2F%2B%2FuMcoFIXeNYzrpKwLEISGAgxa6MTQ5oqcTDaS1F0LX6jSDbvkqNzWAYrOelmdbayEPTKVLLFCPXdfEM%2B1r6AQOs%2Bh%2BCxYW%2BE%3D', 'price (USD)': '141.66', 'original_price (USD)': None, 'discount_percentage': None, 'discount_amount (USD)': None, 'category_id': '43510', 'category_name': 'Air Purifiers', 'condition_id': '3000', 'condition': 'Used', 'item_location_country_id': 'GB', 'item_location_country': 'United Kingdom', 'item_location_city': 'Waltham cross', 'number_sold': 0, 'number_available': 1, 'seller_username': 'luke_chris', 'seller_feedback_percentage': '99.6', 'seller_feedback_score': 856, 'shipping_

In [None]:
df_fan

In [34]:
df_hairdryer, df_hairdryer_reviews = items_filtering(
    search_ebay_items(
        dyson_items['hairdryer'].get('keyword'), 
        dyson_items['hairdryer'].get('category_id'),
        dyson_items['hairdryer'].get('min_price')
    ), 
    dyson_items['hairdryer'].get('min_price')
)

Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 7 items, Total so far: 1007
1007
{'item_id': '315931190960', 'title': 'Dyson Supersonic Nural Hair Dryer HD07 - Ceramic Patina / Topaz *NEW&/ORIGINAL BOX*✅', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/315931190960?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKk4r58QGgeLW9lIWj4IojTkr3Di3jj2m049HTMwlMSCnjllnYhZX%2BD9XJBDKvxnvaontxYUZDKB3cOTQkk0kbRGzfx7I6vrUXvumv0XBAl8RDRhOBm8PgrYMrBIZ4mF2DiQdCJc2ZkTUw8tFbGRiv3RIEMST4hQmBovOe78q28yR8l0JbdDAUlnDmwmepYrfpA%3D', 'price (USD)': '493.52', 'original_price (USD)': None, 'discount_percentage': None, 'discount_amount (USD)': None, 'category_id': '11858', 'category_name': 'Haartrockner', 'condition_id': '1000', 'condition': 'New', 'item_location_country_id': 'DE', 'item_location_country': 'Germany', 'item_location_city': 'Garmisch-partenki

In [None]:
df_hairdryer

## Creating a df of items and reviews

In [40]:
df_items = pd.DataFrame()
df_reviews = pd.DataFrame()
for key, value in dyson_items.items():
    items, reviews = items_filtering(
        search_ebay_items(
            value.get('keyword'), 
            value.get('category_id'),
            value.get('min_price')
        ),
        value.get('min_price')
    )
    df_items = pd.concat([df_items, items], ignore_index=True)
    df_reviews = pd.concat([df_reviews, reviews], ignore_index=True)


Retrieved 200 items, Total so far: 200
Retrieved 200 items, Total so far: 400
Retrieved 200 items, Total so far: 600
Retrieved 200 items, Total so far: 800
Retrieved 200 items, Total so far: 1000
Retrieved 200 items, Total so far: 1200
Retrieved 200 items, Total so far: 1400
Retrieved 200 items, Total so far: 1600
Retrieved 117 items, Total so far: 1717
1717
{'item_id': '176667241546', 'title': 'Dyson DC49 Vacuum Cleaner Hoover Blue & Silver Tested & Working', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/176667241546?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKmG5cb2pNx7aU%2F8JIxA6tw8pxG8U5D6dhPeSBjJM1J7UcvAwbrGIq5Or6awA63HBZo3HnT5%2FrdBehYf8rnLYIAG62QDS%2FEu2qGQHaWYDotqu%2B5lzysLMNNSaohHhAt4st4vhJURZ44s%2FGzIy71GL%2F3Zs0VjK9GMYyrucKv3OHSvPM0hJ8sPHuIh%2Biiq%2BEze0XU%3D', 'price_usd': '103.02', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '20614', 'category_name': 'Vacuum Cleaners', 'condition_id': '3000', 'condition_name': 

  df_items = pd.concat([df_items, items], ignore_index=True)


Retrieved 48 items, Total so far: 48
48
{'item_id': '135351644784', 'title': 'Dyson BP01 Pure Cool Me Purifying Fan - White/Silver NO REMOTE', 'brand': 'Dyson', 'url': 'https://www.ebay.com/itm/135351644784?amdata=enc%3AAQAJAAAAoHoV3kP08IDx%2BKZ9MfhVJKlUQe5%2FNSm0eLZXJTUUOdfkiI%2FSSetBpRrl%2But2zEdZhxF8itlWLNhAo0q6cDqkwGRQV9guk%2BPCeOdozxiGuE5jOlNIMif4bgIepoplyRrpJPinqLoh8rvqlXC%2Bb5zw7hFYzc1CfdxUXaVpOgHuLIHSE3rSJqXN5H6LAvRemK95zZm48fDhZRZt9rB37JiqLtY%3D', 'price_usd': '141.66', 'original_price_usd': None, 'discount_percentage': None, 'discount_amount_usd': None, 'category_id': '43510', 'category_name': 'Air Purifiers', 'condition_id': '3000', 'condition_name': 'Used', 'item_location_country_id': 'GB', 'item_location_country': 'United Kingdom', 'item_location_city': 'Waltham cross', 'number_sold': 0, 'number_available': 1, 'seller_username': 'luke_chris', 'seller_positive_feedback_percentage': '99.6', 'seller_feedback_score': 856, 'shipping_cost_usd': '56.29', 'item_positive_feedback_p

In [None]:
df_items

## Separating items df into 3NF database

In [87]:
# df_items.drop_duplicates(subset=['item_id'], inplace=True, ignore_index=True)

# 1. Items Table
items_df = df_items[['item_id', 'title', 'brand', 'url', 'category_id', 'condition_id', 'item_location_country_id',
                      'item_location_city', 'marketplace_id']].drop_duplicates(
                         subset=['item_id'], ignore_index=True)

# 2. Categories Table
categories_df = df_items[['category_id', 'category_name']].drop_duplicates(
    subset=['category_id'], ignore_index=True)

# 3. Conditions Table
conditions_df = df_items[['condition_id', 'condition_name']].drop_duplicates(
    subset=['condition_id'], ignore_index=True)

# 4. Countries Table
countries_df = df_items[['item_location_country_id', 'item_location_country']].drop_duplicates(
    subset=['item_location_country_id'], ignore_index=True)

# 5. Sellers Table
sellers_df = df_items[['seller_username', 'seller_feedback_score', 
                       'seller_positive_feedback_percentage']].drop_duplicates(
                           subset=['seller_username'], ignore_index=True)
sellers_df.insert(0, 'seller_id', range(1, len(sellers_df) + 1))
sellers_df['seller_positive_feedback_percentage'] = sellers_df['seller_positive_feedback_percentage'].astype(float)

# 6. Prices Table
prices_df = df_items[['item_id', 'price_usd', 'original_price_usd', 'discount_percentage', 
                      'discount_amount_usd', 'shipping_cost_usd', 'date_time']].drop_duplicates(
                          subset=['item_id'], ignore_index=True)
prices_df['price_usd'] = prices_df['price_usd'].astype(float)
prices_df['original_price_usd'] = prices_df['original_price_usd'].astype(float)
prices_df['discount_percentage'] = prices_df['discount_percentage'].astype(float)
prices_df['discount_amount_usd'] = prices_df['discount_amount_usd'].astype(float)
prices_df['shipping_cost_usd'] = prices_df['shipping_cost_usd'].astype(float)

# 7. Reviews Table
reviews_df = df_reviews.drop_duplicates(ignore_index=True)
reviews_df.columns = ['item_id', 'commenting_user', 'commenting_user_score', 'comment_text', 'date_time', 'comment_type']
reviews_df['commenting_user_score'] = reviews_df['commenting_user_score'].astype(int)

# 8. Seller_Item_Performance Table
seller_item_performance_df = df_items[['item_id', 'seller_username', 'number_sold', 'number_available', 
                                       'item_positive_feedback_percentage', 'date_time']].drop_duplicates(
                                           subset=['item_id'], ignore_index=True)
                                           
seller_item_performance_df = seller_item_performance_df.merge(sellers_df[['seller_id', 'seller_username']], how='right', on='seller_username')
seller_item_performance_df.drop(columns=['seller_username'], inplace=True)
seller_item_performance_df = seller_item_performance_df[['item_id', 'seller_id', 'number_sold', 'number_available', 'item_positive_feedback_percentage']]

reviews_count = reviews_df.groupby(by='item_id').size().reset_index(name='reviews_num')
seller_item_performance_df = seller_item_performance_df.merge(reviews_count, how='left', on='item_id')
seller_item_performance_df['reviews_num'] = seller_item_performance_df['reviews_num'].fillna(0)
seller_item_performance_df['reviews_num'] = seller_item_performance_df['reviews_num'].astype(int)





In [None]:
items_df

Unnamed: 0,item_id,seller_id,number_sold,number_available,item_positive_feedback_percentage,reviews_num
0,176667241546,1,0,1,,0
1,315447342001,2,0,4,,0
2,315431242700,2,20,4,100.0,6
3,315431242704,2,14,3,100.0,4
4,315431242732,2,22,24,100.0,3
...,...,...,...,...,...,...
70,276293136193,44,0,1,,0
71,276567449576,44,0,1,,0
72,395471344783,45,1,1,,0
73,315877882383,46,0,1,,0


In [58]:
seller_item_performance_df

Unnamed: 0,item_id,seller_id,number_sold,number_available,item_positive_feedback_percentage
0,176667241546,1,0,1,
1,315447342001,2,0,4,
2,315431242700,2,20,4,100.0
3,315431242704,2,14,3,100.0
4,315431242732,2,22,24,100.0
...,...,...,...,...,...
70,276293136193,44,0,1,
71,276567449576,44,0,1,
72,395471344783,45,1,1,
73,315877882383,46,0,1,


In [57]:
df_reviews

Unnamed: 0,ItemID,CommentingUser,CommentingUserScore,CommentText,CommentTime,CommentType
0,315431242700,c***a,201,Very fast delivery wife loves it perfect,2024-08-19T07:00:00.000Z,Positive
1,315431242700,k***k,584,Great product Fantastic price,2024-07-08T07:00:00.000Z,Positive
2,315431242700,n***u,1,äºæå¥½è¯ï¼,2024-07-01T07:00:00.000Z,Positive
3,315431242700,g***l,751,Great Dyson as usualð,2024-07-01T07:00:00.000Z,Positive
4,315431242700,g***l,751,Great vacuum cleaner and fast delivery.,2024-07-01T07:00:00.000Z,Positive
...,...,...,...,...,...,...
78,116237908965,o***a,2507,Highly trustworthy seller with exceptional cus...,2024-09-02T07:00:00.000Z,Positive
79,116237908965,k***k,54,"In very good condition, would reccomend this s...",2024-08-19T07:00:00.000Z,Positive
80,116237908965,9***n,10,Great to deal with.,2024-08-12T07:00:00.000Z,Positive
81,126579209303,d***e,633,Super fast delivery and pleased with item. Thanks,2024-07-22T07:00:00.000Z,Positive


In [9]:
get_country_name('KR')

'Korea, Republic of'