In [34]:
pip install requests beautifulsoup4 pandas plotly ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [35]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time
import random
from urllib.parse import quote
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

In [37]:
class PriceComparisonScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
        }
        self.ebay_base = "https://www.ebay.com/sch/i.html?_nkw="
        self.walmart_base = "https://www.walmart.com/search?q="
        self.amazon_base = "https://www.amazon.com/s?k="
        self.flipkart_base = "https://www.flipkart.com/search?q="

    def _get_page_content(self, url):
        try:
            time.sleep(random.uniform(2, 4))  # Random delay to avoid blocking
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            return BeautifulSoup(response.content, 'html.parser')
        except requests.RequestException as e:
            print(f"Error fetching {url}: {str(e)}")
            return None

    def clean_price(self, price_str):
        """Clean price string to float"""
        try:
            return float(price_str.replace('$', '').replace('₹', '').replace(',', '').replace('from', '').strip())
        except:
            return None

    def scrape_ebay(self, search_term):
        url = self.ebay_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'class': 's-item__info'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('div', {'class': 's-item__title'})
                if not title_elem or 'Shop on eBay' in title_elem.text:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('span', {'class': 's-item__price'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': 's-item__link'})
                link = link_elem['href'] if link_elem else ''

                # Get condition
                condition_elem = item.find('span', {'class': 's-item__condition'})
                condition = condition_elem.text if condition_elem else 'Not specified'

                # Get shipping
                shipping_elem = item.find('span', {'class': 's-item__shipping'})
                shipping = shipping_elem.text if shipping_elem else 'Not specified'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'eBay',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products

    def scrape_walmart(self, search_term):
        url = self.walmart_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'data-item-id': True})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('span', {'data-automation-id': 'product-title'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('div', {'data-automation-id': 'product-price'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link = f"https://www.walmart.com{item.find('a')['href']}"

                # Get availability
                availability = 'In Stock' if price_elem else 'Out of Stock'

                # Get fulfillment
                fulfillment_elem = item.find('div', {'data-automation-id': 'fulfillment-badge'})
                fulfillment = fulfillment_elem.text if fulfillment_elem else 'Standard shipping'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Walmart',
                    'condition': 'New',  # Walmart typically sells new items
                    'shipping': fulfillment
                })
            except Exception as e:
                continue

        return products

    def scrape_amazon(self, search_term):
        url = self.amazon_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('span', {'class': 'a-size-medium'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('span', {'class': 'a-offscreen'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': 'a-link-normal'})
                link = f"https://www.amazon.com{link_elem['href']}" if link_elem else ''

                # Get condition
                condition = 'New'  # Assume new for Amazon

                # Get shipping
                shipping_elem = item.find('div', {'class': 'a-row a-size-base a-color-secondary'})
                shipping = shipping_elem.text if shipping_elem else 'Standard shipping'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Amazon',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products

    def scrape_flipkart(self, search_term):
        url = self.flipkart_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'class': '_1AtVbE'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('div', {'class': '_4rR01T'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('div', {'class': '_30jeq3'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': '_1fQZEK'})
                link = f"https://www.flipkart.com{link_elem['href']}" if link_elem else ''

                # Get condition
                condition = 'New'  # Assume new for Flipkart

                # Get shipping
                shipping = 'Standard shipping'  # Flipkart typically offers standard shipping

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Flipkart',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products

    def compare_prices(self, search_term):
        print(f"Searching for '{search_term}'...")
        
        ebay_products = self.scrape_ebay(search_term)
        print(f"Found {len(ebay_products)} products from eBay")
        
        walmart_products = self.scrape_walmart(search_term)
        print(f"Found {len(walmart_products)} products from Walmart")
        
        amazon_products = self.scrape_amazon(search_term)
        print(f"Found {len(amazon_products)} products from Amazon")
        
        flipkart_products = self.scrape_flipkart(search_term)
        print(f"Found {len(flipkart_products)} products from Flipkart")
        
        all_products = ebay_products + walmart_products + amazon_products + flipkart_products
        
        if not all_products:
            print("No products found!")
            return pd.DataFrame()
        
        df = pd.DataFrame(all_products)
        df['timestamp'] = datetime.now()
        df = df.sort_values('price')
        
        return df

In [38]:
def create_price_comparison_chart(df):
    """Create an interactive price comparison chart"""
    fig = go.Figure()
    
    for source in df['source'].unique():
        source_data = df[df['source'] == source]
        fig.add_trace(go.Box(
            y=source_data['price'],
            name=source,
            boxpoints='all',
            jitter=0.3,
            pointpos=-1.8,
            hovertext=source_data['title']
        ))
    
    fig.update_layout(
        title='Price Distribution by Retailer',
        yaxis_title='Price ($)',
        showlegend=True
    )
    
    return fig

def create_condition_chart(df):
    """Create a chart showing product conditions"""
    condition_data = df.groupby(['source', 'condition']).size().reset_index(name='count')
    fig = px.bar(condition_data, 
                 x='source', 
                 y='count', 
                 color='condition',
                 title='Product Conditions by Retailer',
                 labels={'count': 'Number of Products', 'source': 'Retailer'})
    return fig

def search_and_visualize(search_term):
    scraper = PriceComparisonScraper()
    results = scraper.compare_prices(search_term)
    
    if results.empty:
        print("No results found. Try a different search term.")
        return
    
    # Display basic results table
    display(HTML("<h2>🔍 Price Comparison Results</h2>"))
    styled_df = results[['title', 'price', 'source', 'condition', 'shipping']].style.format({
        'price': '${:.2f}'
    }).background_gradient(subset=['price'], cmap='RdYlGn_r')
    display(styled_df)
    
    # Display price comparis                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  on chart
    price_fig = create_price_comparison_chart(results)
    price_fig.show()
    
    # Display condition chart
    condition_fig = create_condition_chart(results)
    condition_fig.show()                                                                          
    
    # Display summary statistics
    display(HTML("<h3>📊 Price Summary Statistics</h3>"))
    summary_stats = pd.DataFrame({
        'Metric': ['Lowest Price', 'Highest Price', 'Average Price', 'Median Price', 'Price Range'],
        'Value': [
            f"${results['price'].min():.2f}",
            f"${results['price'].max():.2f}",
            f"${results['price'].mean():.2f}",
            f"${results['price'].median():.2f}",
            f"${results['price'].max() - results['price'].min():.2f}"
        ]
    })
    display(summary_stats)
    
    # Save results
    filename = f"price_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    results.to_csv(filename, index=False)
    print(f"\n💾 Results saved to {filename}")

In [None]:
pip install matplotlib

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [40]:
from ipywidgets import interact, widgets

@interact(
    search_term=widgets.Text(
        value='airpods pro',
        placeholder='Type product name...',
        description='🔍 Search:',
        style={'description_width': 'initial'}
    )
)
def interactive_search(search_term):
    if search_term.strip():
        search_and_visualize(search_term)

interactive(children=(Text(value='airpods pro', description='🔍 Search:', placeholder='Type product name...', s…

In [72]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time
import random
from urllib.parse import quote
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, HTML
import warnings
import smtplib
from email.mime.text import MIMEText

warnings.filterwarnings('ignore')

In [80]:
class PriceComparisonScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
        }
        self.ebay_base = "https://www.ebay.com/sch/i.html?_nkw="
        self.walmart_base = "https://www.walmart.com/search?q="
        self.amazon_base = "https://www.amazon.com/s?k="
        self.flipkart_base = "https://www.flipkart.com/search?q="

    def _get_page_content(self, url):
        try:
            time.sleep(random.uniform(2, 4))  # Random delay to avoid blocking
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            return BeautifulSoup(response.content, 'html.parser')
        except requests.RequestException as e:
            print(f"Error fetching {url}: {str(e)}")
            return None

    def clean_price(self, price_str):
        """Clean price string to float"""
        try:
            return float(price_str.replace('$', '').replace('₹', '').replace(',', '').replace('from', '').strip())
        except:
            return None

    def scrape_ebay(self, search_term):
        url = self.ebay_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'class': 's-item__info'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('div', {'class': 's-item__title'})
                if not title_elem or 'Shop on eBay' in title_elem.text:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('span', {'class': 's-item__price'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': 's-item__link'})
                link = link_elem['href'] if link_elem else ''

                # Get condition
                condition_elem = item.find('span', {'class': 's-item__condition'})
                condition = condition_elem.text if condition_elem else 'Not specified'

                # Get shipping
                shipping_elem = item.find('span', {'class': 's-item__shipping'})
                shipping = shipping_elem.text if shipping_elem else 'Not specified'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'eBay',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products

    def scrape_walmart(self, search_term):
        url = self.walmart_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'data-item-id': True})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('span', {'data-automation-id': 'product-title'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('div', {'data-automation-id': 'product-price'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link = f"https://www.walmart.com{item.find('a')['href']}"

                # Get availability
                availability = 'In Stock' if price_elem else 'Out of Stock'

                # Get fulfillment
                fulfillment_elem = item.find('div', {'data-automation-id': 'fulfillment-badge'})
                fulfillment = fulfillment_elem.text if fulfillment_elem else 'Standard shipping'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Walmart',
                    'condition': 'New',  # Walmart typically sells new items
                    'shipping': fulfillment
                })
            except Exception as e:
                continue

        return products

    def scrape_amazon(self, search_term):
        url = self.amazon_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('span', {'class': 'a-size-medium'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('span', {'class': 'a-offscreen'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': 'a-link-normal'})
                link = f"https://www.amazon.com{link_elem['href']}" if link_elem else ''

                # Get condition
                condition = 'New'  # Assume new for Amazon

                # Get shipping
                shipping_elem = item.find('div', {'class': 'a-row a-size-base a-color-secondary'})
                shipping = shipping_elem.text if shipping_elem else 'Standard shipping'

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Amazon',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products

    def scrape_flipkart(self, search_term):
        url = self.flipkart_base + quote(search_term)
        soup = self._get_page_content(url)
        if not soup:
            return []

        products = []
        items = soup.find_all('div', {'class': '_1AtVbE'})
        
        for item in items[:5]:  # Limit to first 5 results
            try:
                # Get title
                title_elem = item.find('div', {'class': '_4rR01T'})
                if not title_elem:
                    continue
                title = title_elem.text.strip()

                # Get price
                price_elem = item.find('div', {'class': '_30jeq3'})
                if not price_elem:
                    continue
                price = self.clean_price(price_elem.text)
                if not price:
                    continue

                # Get link
                link_elem = item.find('a', {'class': '_1fQZEK'})
                link = f"https://www.flipkart.com{link_elem['href']}" if link_elem else ''

                # Get condition
                condition = 'New'  # Assume new for Flipkart

                # Get shipping
                shipping = 'Standard shipping'  # Flipkart typically offers standard shipping

                products.append({
                    'title': title,
                    'price': price,
                    'link': link,
                    'source': 'Flipkart',
                    'condition': condition,
                    'shipping': shipping
                })
            except Exception as e:
                continue

        return products


    def send_email(self, recipient, subject, body):
        sender = "your_email@example.com"  # Replace with your email address
        msg = MIMEText(body)
        msg['Subject'] = subject
        msg['From'] = sender
        msg['To'] = recipient

        with smtplib.SMTP('localhost') as smtp:
            smtp.send_message(msg)

    def compare_prices(self, search_term):
        print(f"Searching for '{search_term}'...")
        
        ebay_products = self.scrape_ebay(search_term)
        print(f"Found {len(ebay_products)} products from eBay")
        
        walmart_products = self.scrape_walmart(search_term)
        print(f"Found {len(walmart_products)} products from Walmart")
        
        amazon_products = self.scrape_amazon(search_term)
        print(f"Found {len(amazon_products)} products from Amazon")
        
        flipkart_products = self.scrape_flipkart(search_term)
        print(f"Found {len(flipkart_products)} products from Flipkart")
        
        all_products = ebay_products + walmart_products + amazon_products + flipkart_products
        
        if not all_products:
            print("No products found!")
            return pd.DataFrame()
        
        df = pd.DataFrame(all_products)
        df['timestamp'] = datetime.now()
        df = df.sort_values('price')
        body = df.to_html(index=False)
        subject = f"Price Comparison for '{search_term}'"
        self.send_email(recipient_email, subject, body)
        
        return df

def create_price_comparison_chart(df):
    """Create an interactive price comparison chart"""
    fig = go.Figure()
    
    for source in df['source'].unique():
        source_data = df[df['source'] == source]
        fig.add_trace(go.Box(
            y=source_data['price'],
            name=source,
            boxpoints='all',
            jitter=0.3,
            pointpos=-1.8,
            hovertext=source_data['title']
        ))
    
    fig.update_layout(
        title='Price Distribution by Retailer',
        yaxis_title='Price ($)',
        showlegend=True
    )
    
    return fig

In [81]:
def create_condition_chart(df):
    """Create a chart showing product conditions"""
    condition_data = df.groupby(['source', 'condition']).size().reset_index(name='count')
    fig = px.bar(condition_data, 
                 x='source', 
                 y='count', 
                 color='condition',
                 title='Product Conditions by Retailer',
                 labels={'count': 'Number of Products', 'source': 'Retailer'})
    return fig

def search_and_visualize(search_term):
    scraper = PriceComparisonScraper()
    results = scraper.compare_prices(search_term)
    
    if results.empty:
        print("No results found. Try a different search term.")
        return
    
    # Display basic results table
    display(HTML("<h2>🔍 Price Comparison Results</h2>"))
    styled_df = results[['title', 'price', 'source', 'condition', 'shipping']].style.format({
        'price': '${:.2f}'
    }).background_gradient(subset=['price'], cmap='RdYlGn_r')
    display(styled_df)
    
    # Display price comparison chart
    price_fig = create_price_comparison_chart(results)
    price_fig.show()
    
    # Display condition chart
    condition_fig = create_condition_chart(results)
    condition_fig.show()
    
    # Display summary statistics
    display(HTML("<h3>📊 Price Summary Statistics</h3>"))
    summary_stats = pd.DataFrame({
        'Metric': ['Lowest Price', 'Highest Price', 'Average Price', 'Median Price', 'Price Range'],
        'Value': [
            f"${results['price'].min():.2f}",
            f"${results['price'].max():.2f}",
            f"${results['price'].mean():.2f}",
            f"${results['price'].median():.2f}",
            f"${results['price'].max() - results['price'].min():.2f}"
        ]
    })
    display(summary_stats)
    
    # Save results
    filename = f"price_comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    results.to_csv(filename, index=False)
    print(f"\n💾 Results saved to {filename}")

In [82]:
pip install matplotlib

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [83]:
def interactive_search(search_term, recipient_email):
    if search_term.strip() and recipient_email.strip():
        search_and_visualize(search_term, recipient_email)
    else:
        print("Please enter a search term and email address.")

In [14]:
def search_and_visualize(search_term, recipient_email):
    # Placeholder for your search functionality
    print(f"Searching for: {search_term}")

    # Placeholder content for email
    email_content = f"Results for your search '{search_term}' go here."

    # Send email
    send_email(recipient_email, f"Search Results for {search_term}", email_content)
    print(f"Email sent to {recipient_email}")

# Define the email sending function
import smtplib
from email.message import EmailMessage

def send_email(recipient, subject, content):
    try:
        msg = EmailMessage()
        msg.set_content(content)
        msg['Subject'] = subject
        msg['From'] = 'adirm2603@gmail.com'
        msg['To'] = recipient

        smtp_server = 'smtp.gmail.com'
        smtp_port = 587

        # Connect to the SMTP server
        with smtplib.SMTP(smtp_server, smtp_port) as server:
            server.starttls()  # Start TLS for security
            # Use your Gmail and generated App Password here
            server.login('harishmagvgireesh@gmail.com', 'cose dtlj ylew odea')
            server.send_message(msg)
        
        print(f"Email sent successfully to {recipient}!")
    except smtplib.SMTPAuthenticationError:
        print("Authentication failed. Please check your email and app password.")
    except Exception as e:
        print(f"Failed to send email: {e}")

# Example usage
send_email("harishmagvgireesh@gmail.com", "eBay", "eBay has lowest price. link for eBay:https://www.walmart.com/search?q=")


from ipywidgets import interact, widgets

search_term = widgets.Text(
    value='airpods pro',
    placeholder='Type product name...',
    description='🔍 Search:',
    style={'description_width': 'initial'}
)

recipient_email = widgets.Text(
    value='',
    placeholder='Enter email address...',
    description='📧 Email:',
    style={'description_width': 'initial'}
)

@interact(search_term=search_term, recipient_email=recipient_email)
def interactive_search(search_term, recipient_email):
    if search_term.strip() and recipient_email.strip():
        search_and_visualize(search_term, recipient_email)
    else:
        print("Please enter a search term and email address.")

Email sent successfully to harishmagvgireesh@gmail.com!


interactive(children=(Text(value='airpods pro', description='🔍 Search:', placeholder='Type product name...', s…