In [1]:
# ============================================================================
# COMPLETE SHOPIFY INSIGHTS-FETCHER WITH COMPETITOR ANALYSIS & DATABASE
# ============================================================================

# Install all required packages
!pip install -q sqlalchemy aiohttp beautifulsoup4 pydantic pandas numpy requests nest-asyncio lxml

# Import all necessary libraries
import asyncio
import aiohttp
import json
import re
import pandas as pd
from datetime import datetime
from typing import List, Dict, Optional, Union
from urllib.parse import urljoin, urlparse
import nest_asyncio
from bs4 import BeautifulSoup
from pydantic import BaseModel, validator
import sqlite3
import requests
import warnings
warnings.filterwarnings('ignore')

# SQLAlchemy imports
from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, Float, Boolean, ForeignKey, Table, func
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship

# Enable nested event loops for Colab
nest_asyncio.apply()

print("✅ All dependencies installed and imported successfully!")

✅ All dependencies installed and imported successfully!


In [2]:
# ============================================================================
# DATABASE SCHEMA AND PYDANTIC MODELS
# ============================================================================

# SQLAlchemy Base
Base = declarative_base()

# Association table for competitor relationships
competitor_association = Table(
    'competitor_relationships',
    Base.metadata,
    Column('store_id', Integer, ForeignKey('stores.id'), primary_key=True),
    Column('competitor_id', Integer, ForeignKey('stores.id'), primary_key=True),
    Column('relationship_type', String(50), default='competitor'),
    Column('discovered_at', DateTime, default=datetime.utcnow)
)

# Database Models
class StoreDB(Base):
    __tablename__ = "stores"

    id = Column(Integer, primary_key=True)
    store_url = Column(String(500), unique=True, nullable=False)
    store_name = Column(String(255))
    domain = Column(String(255))
    industry = Column(String(255))
    privacy_policy = Column(Text)
    return_policy = Column(Text)
    refund_policy = Column(Text)
    about_brand = Column(Text)
    scraped_at = Column(DateTime, default=datetime.utcnow)
    is_competitor = Column(Boolean, default=False)
    competitor_discovery_method = Column(String(255))

    # Relationships
    products = relationship('ProductDB', backref='store', cascade="all, delete-orphan")
    faqs = relationship('FAQDB', backref='store', cascade="all, delete-orphan")
    social_handles = relationship('SocialHandleDB', backref='store', cascade="all, delete-orphan")
    important_links = relationship('ImportantLinkDB', backref='store', cascade="all, delete-orphan")
    contact_info = relationship('ContactInfoDB', uselist=False, backref='store', cascade="all, delete-orphan")

    # Competitor relationships
    competitors = relationship(
        'StoreDB',
        secondary=competitor_association,
        primaryjoin=id == competitor_association.c.store_id,
        secondaryjoin=id == competitor_association.c.competitor_id,
        backref='competing_with'
    )

class ProductDB(Base):
    __tablename__ = "products"

    id = Column(Integer, primary_key=True)
    product_id = Column(Integer, index=True)
    title = Column(String(255))
    handle = Column(String(255))
    description = Column(Text)
    vendor = Column(String(255))
    product_type = Column(String(255))
    price = Column(Float)
    compare_at_price = Column(Float)
    available = Column(Boolean)
    tags = Column(Text)  # JSON
    images = Column(Text)  # JSON
    store_id = Column(Integer, ForeignKey('stores.id'))

class FAQDB(Base):
    __tablename__ = "faqs"

    id = Column(Integer, primary_key=True)
    question = Column(Text)
    answer = Column(Text)
    category = Column(String(255))
    store_id = Column(Integer, ForeignKey('stores.id'))

class SocialHandleDB(Base):
    __tablename__ = "social_handles"

    id = Column(Integer, primary_key=True)
    platform = Column(String(255))
    url = Column(String(512))
    username = Column(String(255))
    store_id = Column(Integer, ForeignKey('stores.id'))

class ImportantLinkDB(Base):
    __tablename__ = "important_links"

    id = Column(Integer, primary_key=True)
    name = Column(String(255))
    url = Column(String(512))
    store_id = Column(Integer, ForeignKey('stores.id'))

class ContactInfoDB(Base):
    __tablename__ = "contact_infos"

    id = Column(Integer, primary_key=True)
    emails = Column(Text)  # JSON
    phone_numbers = Column(Text)  # JSON
    address = Column(Text)
    store_id = Column(Integer, ForeignKey('stores.id'))

class CompetitorAnalysisDB(Base):
    __tablename__ = "competitor_analysis"

    id = Column(Integer, primary_key=True)
    original_store_id = Column(Integer, ForeignKey('stores.id'))
    analysis_date = Column(DateTime, default=datetime.utcnow)
    total_competitors_found = Column(Integer, default=0)
    analysis_method = Column(String(255))
    analysis_summary = Column(Text)

    original_store = relationship('StoreDB', foreign_keys=[original_store_id])

# Pydantic Models
class Product(BaseModel):
    id: int = 0
    title: str = ""
    handle: str = ""
    description: str = ""
    vendor: str = ""
    product_type: str = ""
    price: float = 0.0
    compare_at_price: Optional[float] = None
    available: bool = True
    tags: List[str] = []
    images: List[str] = []

    @validator('tags', pre=True)
    def parse_tags(cls, v):
        if isinstance(v, str):
            return [tag.strip() for tag in v.split(',') if tag.strip()]
        elif isinstance(v, list):
            return [str(tag).strip() for tag in v if tag]
        return []

    @validator('price', pre=True)
    def parse_price(cls, v):
        try:
            return float(v) if v else 0.0
        except:
            return 0.0

    class Config:
        arbitrary_types_allowed = True

class SocialHandle(BaseModel):
    platform: str
    url: str
    username: Optional[str] = None

class ContactInfo(BaseModel):
    emails: List[str] = []
    phone_numbers: List[str] = []
    address: Optional[str] = None

class FAQ(BaseModel):
    question: str
    answer: str
    category: Optional[str] = None

class BrandInsights(BaseModel):
    store_url: str
    store_name: str
    product_catalog: List[Product] = []
    hero_products: List[Product] = []
    privacy_policy: Optional[str] = None
    return_policy: Optional[str] = None
    refund_policy: Optional[str] = None
    faqs: List[FAQ] = []
    social_handles: List[SocialHandle] = []
    contact_info: ContactInfo = ContactInfo()
    about_brand: Optional[str] = None
    important_links: Dict[str, str] = {}
    scraped_at: datetime = datetime.now()
    is_competitor: bool = False
    competitor_discovery_method: Optional[str] = None

    class Config:
        arbitrary_types_allowed = True

print("✅ Database schema and models defined!")


✅ Database schema and models defined!


In [3]:
# ============================================================================
# CORE SCRAPING ENGINE
# ============================================================================

class ShopifyStoreScraper:
    """Core Shopify store scraper with all data extraction capabilities"""

    def __init__(self):
        self.session = None
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        }

    async def __aenter__(self):
        connector = aiohttp.TCPConnector(limit=30, limit_per_host=10)
        self.session = aiohttp.ClientSession(
            headers=self.headers,
            timeout=aiohttp.ClientTimeout(total=30),
            connector=connector
        )
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        if self.session:
            await self.session.close()

    async def scrape_store(self, store_url: str) -> BrandInsights:
        """Main scraping orchestrator"""
        print(f"🔍 Analyzing: {store_url}")

        if not store_url.startswith('http'):
            store_url = f'https://{store_url}'

        insights = BrandInsights(
            store_url=store_url,
            store_name=self._extract_store_name(store_url)
        )

        # Execute all scraping tasks concurrently
        tasks = [
            self._scrape_product_catalog(store_url),
            self._scrape_hero_products(store_url),
            self._scrape_policies(store_url),
            self._scrape_faqs(store_url),
            self._scrape_social_handles(store_url),
            self._scrape_contact_info(store_url),
            self._scrape_about_brand(store_url),
            self._scrape_important_links(store_url)
        ]

        print("📊 Extracting all data points...")
        results = await asyncio.gather(*tasks, return_exceptions=True)

        # Parse results
        insights.product_catalog = results[0] if not isinstance(results[0], Exception) else []
        insights.hero_products = results[1] if not isinstance(results[1], Exception) else []
        policies = results[2] if not isinstance(results[2], Exception) else {}
        insights.privacy_policy = policies.get('privacy')
        insights.return_policy = policies.get('return')
        insights.refund_policy = policies.get('refund')
        insights.faqs = results[3] if not isinstance(results[3], Exception) else []
        insights.social_handles = results[4] if not isinstance(results[4], Exception) else []
        insights.contact_info = results[5] if not isinstance(results[5], Exception) else ContactInfo()
        insights.about_brand = results[6] if not isinstance(results[6], Exception) else None
        insights.important_links = results[7] if not isinstance(results[7], Exception) else {}

        print("✅ Store analysis completed!")
        return insights

    def _extract_store_name(self, url: str) -> str:
        """Extract clean store name from URL"""
        domain = urlparse(url).netloc.replace('www.', '')
        return domain.split('.')[0].title()

    async def _scrape_product_catalog(self, store_url: str) -> List[Product]:
        """Scrape complete product catalog"""
        print("📦 Fetching product catalog...")

        products = []
        page = 1

        while True:
            try:
                url = f"{store_url.rstrip('/')}/products.json?page={page}&limit=250"
                async with self.session.get(url) as response:
                    if response.status != 200:
                        break

                    data = await response.json()
                    page_products = data.get('products', [])

                    if not page_products:
                        break

                    for product_data in page_products:
                        try:
                            product = Product(
                                id=product_data.get('id', 0),
                                title=product_data.get('title', ''),
                                handle=product_data.get('handle', ''),
                                description=self._clean_html(product_data.get('body_html', '')),
                                vendor=product_data.get('vendor', ''),
                                product_type=product_data.get('product_type', ''),
                                available=product_data.get('available', True),
                                tags=product_data.get('tags', []),
                                images=[img.get('src', '') for img in product_data.get('images', [])]
                            )

                            # Extract price from first variant
                            variants = product_data.get('variants', [])
                            if variants:
                                product.price = float(variants[0].get('price', 0))
                                if variants[0].get('compare_at_price'):
                                    product.compare_at_price = float(variants[0]['compare_at_price'])

                            products.append(product)

                        except Exception as e:
                            print(f"⚠️ Error parsing product: {e}")
                            continue

                    page += 1
                    if page > 10:  # Limit to prevent infinite loops
                        break

            except Exception as e:
                print(f"❌ Error fetching products page {page}: {e}")
                break

        print(f"✅ Found {len(products)} products")
        return products

    async def _scrape_hero_products(self, store_url: str) -> List[Product]:
        """Scrape hero products from homepage"""
        print("🏠 Identifying hero products...")

        try:
            async with self.session.get(store_url) as response:
                if response.status != 200:
                    return []

                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')

                product_links = soup.find_all('a', href=re.compile(r'/products/'))
                hero_products = []
                seen_handles = set()

                for link in product_links[:8]:
                    href = link.get('href', '')
                    if '/products/' in href:
                        handle = href.split('/products/')[-1].split('?')[0]

                        if handle and handle not in seen_handles:
                            seen_handles.add(handle)
                            product_data = await self._fetch_product_by_handle(store_url, handle)
                            if product_data:
                                hero_products.append(product_data)

                print(f"✅ Found {len(hero_products)} hero products")
                return hero_products

        except Exception as e:
            print(f"❌ Error scraping hero products: {e}")
            return []

    async def _fetch_product_by_handle(self, store_url: str, handle: str) -> Optional[Product]:
        """Fetch single product by handle"""
        try:
            url = f"{store_url.rstrip('/')}/products/{handle}.json"
            async with self.session.get(url) as response:
                if response.status == 200:
                    data = await response.json()
                    product_info = data.get('product', {})

                    product = Product(
                        id=product_info.get('id', 0),
                        title=product_info.get('title', ''),
                        handle=product_info.get('handle', ''),
                        description=self._clean_html(product_info.get('body_html', '')),
                        vendor=product_info.get('vendor', ''),
                        product_type=product_info.get('product_type', ''),
                        available=product_info.get('available', True),
                        tags=product_info.get('tags', []),
                        images=[img.get('src', '') for img in product_info.get('images', [])]
                    )

                    variants = product_info.get('variants', [])
                    if variants:
                        product.price = float(variants[0].get('price', 0))

                    return product
        except:
            return None

    async def _scrape_policies(self, store_url: str) -> Dict[str, str]:
        """Scrape privacy, return, and refund policies"""
        print("📜 Extracting policies...")

        policies = {}
        policy_endpoints = {
            'privacy': ['/policies/privacy-policy', '/pages/privacy-policy', '/pages/privacy'],
            'return': ['/policies/return-policy', '/pages/return-policy', '/pages/returns'],
            'refund': ['/policies/refund-policy', '/pages/refund-policy', '/pages/refunds']
        }

        for policy_type, paths in policy_endpoints.items():
            for path in paths:
                try:
                    url = f"{store_url.rstrip('/')}{path}"
                    async with self.session.get(url) as response:
                        if response.status == 200:
                            html = await response.text()
                            soup = BeautifulSoup(html, 'html.parser')

                            for element in soup(['script', 'style', 'nav', 'header', 'footer']):
                                element.decompose()

                            main_content = soup.find('main') or soup.find('div', class_=re.compile(r'content|policy'))
                            if main_content:
                                text = main_content.get_text(separator=' ', strip=True)
                            else:
                                text = soup.get_text(separator=' ', strip=True)

                            cleaned_text = self._clean_text(text)
                            if len(cleaned_text) > 200:
                                policies[policy_type] = cleaned_text[:3000]
                                break

                except Exception as e:
                    continue

        print(f"✅ Found {len(policies)} policies")
        return policies

    async def _scrape_faqs(self, store_url: str) -> List[FAQ]:
        """Scrape FAQs from common FAQ pages"""
        print("❓ Extracting FAQs...")

        faq_paths = ['/pages/faq', '/pages/faqs', '/faq', '/faqs', '/pages/frequently-asked-questions']

        for path in faq_paths:
            try:
                url = f"{store_url.rstrip('/')}{path}"
                async with self.session.get(url) as response:
                    if response.status == 200:
                        html = await response.text()
                        soup = BeautifulSoup(html, 'html.parser')

                        faqs = []

                        # Method 1: Details/Summary structure
                        details_elements = soup.find_all('details')
                        for detail in details_elements:
                            summary = detail.find('summary')
                            if summary:
                                question = summary.get_text(strip=True)
                                answer = detail.get_text(strip=True).replace(question, '', 1).strip()
                                if question and answer:
                                    faqs.append(FAQ(question=question, answer=answer))

                        # Method 2: Common FAQ patterns
                        if not faqs:
                            faq_containers = soup.find_all(['div', 'section'], class_=re.compile(r'faq|question|accordion'))
                            for container in faq_containers:
                                question_elem = container.find(['h3', 'h4', 'h5', 'strong'], string=re.compile(r'\?'))
                                if question_elem:
                                    question = question_elem.get_text(strip=True)
                                    answer_elem = question_elem.find_next_sibling()
                                    if answer_elem:
                                        answer = answer_elem.get_text(strip=True)
                                        if question and answer:
                                            faqs.append(FAQ(question=question, answer=answer))

                        if faqs:
                            print(f"✅ Found {len(faqs)} FAQs")
                            return faqs[:20]

            except Exception as e:
                continue

        print("⚠️ No FAQs found")
        return []

    async def _scrape_social_handles(self, store_url: str) -> List[SocialHandle]:
        """Extract social media handles"""
        print("📱 Finding social handles...")

        try:
            async with self.session.get(store_url) as response:
                if response.status != 200:
                    return []

                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')

                social_handles = []
                social_patterns = {
                    'instagram': r'(?:instagram\.com|instagr\.am)/([^/?\s&]+)',
                    'facebook': r'(?:facebook\.com|fb\.com)/([^/?\s&]+)',
                    'twitter': r'(?:twitter\.com|x\.com)/([^/?\s&]+)',
                    'youtube': r'youtube\.com/(?:channel/|user/|c/)?([^/?\s&]+)',
                    'tiktok': r'tiktok\.com/@?([^/?\s&]+)',
                    'linkedin': r'linkedin\.com/(?:company/|in/)?([^/?\s&]+)',
                    'pinterest': r'pinterest\.com/([^/?\s&]+)'
                }

                links = soup.find_all('a', href=True)
                found_platforms = set()

                for link in links:
                    href = link.get('href', '').lower()

                    for platform, pattern in social_patterns.items():
                        if platform not in found_platforms:
                            match = re.search(pattern, href, re.IGNORECASE)
                            if match:
                                username = match.group(1).strip('/')
                                if username and len(username) > 0:
                                    social_handles.append(SocialHandle(
                                        platform=platform,
                                        url=link.get('href'),
                                        username=username
                                    ))
                                    found_platforms.add(platform)

                print(f"✅ Found {len(social_handles)} social handles")
                return social_handles

        except Exception as e:
            print(f"❌ Error scraping social handles: {e}")
            return []

    async def _scrape_contact_info(self, store_url: str) -> ContactInfo:
        """Extract contact information"""
        print("📞 Extracting contact info...")

        contact_info = ContactInfo()

        contact_urls = [
            f"{store_url.rstrip('/')}/pages/contact",
            f"{store_url.rstrip('/')}/contact",
            store_url
        ]

        for url in contact_urls:
            try:
                async with self.session.get(url) as response:
                    if response.status == 200:
                        html = await response.text()

                        # Extract emails
                        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
                        emails = re.findall(email_pattern, html, re.IGNORECASE)

                        # Extract phone numbers
                        phone_patterns = [
                            r'(\+?91[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
                            r'(\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
                            r'(\+?44[-.\s]?)?\(?\d{3,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}'
                        ]

                        phones = []
                        for pattern in phone_patterns:
                            matches = re.findall(pattern, html)
                            phones.extend(matches)

                        unique_emails = list(set([email.lower() for email in emails if '@' in email]))
                        unique_phones = list(set([phone if isinstance(phone, str) else '-'.join(phone) for phone in phones]))

                        if unique_emails or unique_phones:
                            contact_info.emails = unique_emails[:3]
                            contact_info.phone_numbers = unique_phones[:3]
                            break

            except Exception as e:
                continue

        print(f"✅ Found {len(contact_info.emails)} emails, {len(contact_info.phone_numbers)} phones")
        return contact_info

    async def _scrape_about_brand(self, store_url: str) -> Optional[str]:
        """Extract about brand information"""
        print("ℹ️ Extracting brand info...")

        about_paths = ['/pages/about', '/pages/about-us', '/pages/our-story', '/about']

        for path in about_paths:
            try:
                url = f"{store_url.rstrip('/')}{path}"
                async with self.session.get(url) as response:
                    if response.status == 200:
                        html = await response.text()
                        soup = BeautifulSoup(html, 'html.parser')

                        for element in soup(['script', 'style', 'nav', 'header', 'footer']):
                            element.decompose()

                        main_content = soup.find('main') or soup.find('div', class_=re.compile(r'about|content'))
                        if main_content:
                            text = main_content.get_text(separator=' ', strip=True)
                            cleaned_text = self._clean_text(text)
                            if len(cleaned_text) > 100:
                                print("✅ Found brand information")
                                return cleaned_text[:2000]

            except Exception as e:
                continue

        print("⚠️ No brand info found")
        return None

    async def _scrape_important_links(self, store_url: str) -> Dict[str, str]:
        """Extract important links"""
        print("🔗 Finding important links...")

        try:
            async with self.session.get(store_url) as response:
                if response.status != 200:
                    return {}

                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')

                important_links = {}
                important_keywords = {
                    'Order Tracking': ['track', 'tracking', 'order tracking', 'track order'],
                    'Contact Us': ['contact', 'contact us', 'get in touch'],
                    'Blog': ['blog', 'news', 'articles'],
                    'Size Guide': ['size', 'size guide', 'sizing'],
                    'Shipping': ['shipping', 'delivery', 'shipping info'],
                    'Returns': ['returns', 'return policy'],
                    'Support': ['support', 'help', 'customer service']
                }

                links = soup.find_all('a', href=True)

                for link in links:
                    href = link.get('href', '')
                    text = link.get_text(strip=True).lower()

                    if len(text) > 50:
                        continue

                    for category, keywords in important_keywords.items():
                        if any(keyword in text for keyword in keywords):
                            if href.startswith('/'):
                                href = f"{store_url.rstrip('/')}{href}"
                            elif not href.startswith('http'):
                                href = f"{store_url.rstrip('/')}/{href}"

                            important_links[category] = href
                            break

                print(f"✅ Found {len(important_links)} important links")
                return important_links

        except Exception as e:
            print(f"❌ Error scraping important links: {e}")
            return {}

    def _clean_html(self, html_string: str) -> str:
        """Clean HTML content"""
        if not html_string:
            return ""
        soup = BeautifulSoup(html_string, 'html.parser')
        return soup.get_text(separator=' ', strip=True)

    def _clean_text(self, text: str) -> str:
        """Clean and normalize text"""
        if not text:
            return ""
        text = re.sub(r'\s+', ' ', text)
        return text.strip()

print("✅ Core scraping engine ready!")


✅ Core scraping engine ready!


In [4]:
# ============================================================================
# COMPETITOR DISCOVERY ENGINE
# ============================================================================

class CompetitorDiscoveryEngine:
    """Advanced competitor discovery using multiple methods"""

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }

    def extract_brand_info(self, store_url: str) -> dict:
        """Extract brand information for competitor search"""
        try:
            response = requests.get(store_url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            brand_name = self._extract_brand_name(store_url, soup)
            industry = self._extract_industry(soup)
            products = self._extract_primary_products(soup)

            return {
                'brand_name': brand_name,
                'industry': industry,
                'products': products,
                'domain': urlparse(store_url).netloc
            }
        except Exception as e:
            print(f"❌ Error extracting brand info: {e}")
            return {}

    def _extract_brand_name(self, store_url: str, soup: BeautifulSoup) -> str:
        """Extract brand name from various sources"""
        title = soup.find('title')
        if title:
            brand_name = title.get_text().split('-')[0].strip()
            if brand_name:
                return brand_name

        domain = urlparse(store_url).netloc.replace('www.', '')
        return domain.split('.')[0].title()

    def _extract_industry(self, soup: BeautifulSoup) -> str:
        """Extract industry/category from page content"""
        industry_indicators = [
            'fashion', 'clothing', 'apparel', 'beauty', 'cosmetics', 'skincare',
            'electronics', 'gadgets', 'tech', 'home', 'decor', 'furniture',
            'fitness', 'sports', 'health', 'wellness', 'food', 'beverage'
        ]

        page_text = soup.get_text().lower()
        for indicator in industry_indicators:
            if indicator in page_text:
                return indicator.title()

        return 'General'

    def _extract_primary_products(self, soup: BeautifulSoup) -> list:
        """Extract primary product types"""
        products = []
        product_links = soup.find_all('a', href=re.compile(r'/products/'))
        for link in product_links[:5]:
            product_text = link.get_text().strip()
            if product_text and len(product_text) > 3:
                products.append(product_text)
        return products

    def discover_competitors_by_industry(self, brand_info: dict, max_results: int = 5) -> list:
        """Discover competitors by industry analysis"""
        competitors = []

        # Industry-specific competitor databases
        industry_competitors = {
            'fashion': [
                {'url': 'https://www.zara.com', 'name': 'Zara', 'shopify': False},
                {'url': 'https://www.hm.com', 'name': 'H&M', 'shopify': False},
                {'url': 'https://bombayshirts.com', 'name': 'Bombay Shirts', 'shopify': True},
                {'url': 'https://www.koovs.com', 'name': 'Koovs', 'shopify': False},
                {'url': 'https://www.thewhitehangers.com', 'name': 'The White Hangers', 'shopify': True}
            ],
            'beauty': [
                {'url': 'https://www.nykaa.com', 'name': 'Nykaa', 'shopify': False},
                {'url': 'https://www.purplle.com', 'name': 'Purplle', 'shopify': False},
                {'url': 'https://www.mcaffeine.com', 'name': 'mCaffeine', 'shopify': True},
                {'url': 'https://www.thebodyshop.in', 'name': 'The Body Shop', 'shopify': False}
            ],
            'electronics': [
                {'url': 'https://www.flipkart.com', 'name': 'Flipkart', 'shopify': False},
                {'url': 'https://www.amazon.in', 'name': 'Amazon India', 'shopify': False},
                {'url': 'https://www.croma.com', 'name': 'Croma', 'shopify': False}
            ],
            'general': [
                {'url': 'https://hairoriginals.com', 'name': 'Hair Originals', 'shopify': True},
                {'url': 'https://www.bewakoof.com', 'name': 'Bewakoof', 'shopify': False},
                {'url': 'https://www.myntra.com', 'name': 'Myntra', 'shopify': False}
            ]
        }

        industry = brand_info.get('industry', 'general').lower()
        industry_stores = industry_competitors.get(industry, industry_competitors['general'])

        # Filter for Shopify stores and add to competitors
        for store in industry_stores:
            if len(competitors) >= max_results:
                break

            # For demo purposes, we'll include some non-Shopify stores
            # In production, you'd filter only Shopify stores
            competitors.append({
                'url': store['url'],
                'name': store['name'],
                'discovery_method': 'industry_analysis',
                'relationship_type': 'indirect',
                'is_shopify': store.get('shopify', False)
            })

        return competitors[:max_results]

    def discover_competitors_by_similarity(self, brand_info: dict, max_results: int = 3) -> list:
        """Discover competitors by analyzing similar characteristics"""
        competitors = []

        # Simulated competitor discovery based on brand characteristics
        similar_stores = [
            {'url': 'https://www.ajio.com', 'name': 'Ajio', 'discovery_method': 'similarity_analysis'},
            {'url': 'https://www.jabong.com', 'name': 'Jabong', 'discovery_method': 'similarity_analysis'},
            {'url': 'https://www.limeroad.com', 'name': 'Limeroad', 'discovery_method': 'similarity_analysis'}
        ]

        for store in similar_stores[:max_results]:
            competitors.append({
                'url': store['url'],
                'name': store['name'],
                'discovery_method': store['discovery_method'],
                'relationship_type': 'potential'
            })

        return competitors

print("✅ Competitor discovery engine ready!")


✅ Competitor discovery engine ready!


In [5]:
# ============================================================================
# DATABASE MANAGER
# ============================================================================

class DatabaseManager:
    """Handles all database operations"""

    def __init__(self, db_url="sqlite:///shopify_competitor_insights.db"):
        self.engine = create_engine(db_url, echo=False)
        Base.metadata.create_all(self.engine)
        self.Session = sessionmaker(bind=self.engine)
        print(f"✅ Database initialized: {db_url}")

    def save_store_insights(self, insights: BrandInsights, brand_info: dict = None) -> int:
        """Save store insights to database"""
        session = self.Session()

        try:
            # Check if store exists
            existing_store = session.query(StoreDB).filter_by(store_url=insights.store_url).first()
            if existing_store:
                session.delete(existing_store)
                session.commit()

            # Create new store record
            store = StoreDB(
                store_url=insights.store_url,
                store_name=insights.store_name,
                domain=urlparse(insights.store_url).netloc,
                industry=brand_info.get('industry', 'Unknown') if brand_info else 'Unknown',
                privacy_policy=insights.privacy_policy,
                return_policy=insights.return_policy,
                refund_policy=insights.refund_policy,
                about_brand=insights.about_brand,
                scraped_at=insights.scraped_at,
                is_competitor=insights.is_competitor,
                competitor_discovery_method=insights.competitor_discovery_method
            )

            # Add contact info
            if insights.contact_info:
                contact_info = ContactInfoDB(
                    emails=json.dumps(insights.contact_info.emails),
                    phone_numbers=json.dumps(insights.contact_info.phone_numbers),
                    address=insights.contact_info.address,
                    store=store
                )
                session.add(contact_info)

            # Add products
            for product in insights.product_catalog:
                product_db = ProductDB(
                    product_id=product.id,
                    title=product.title,
                    handle=product.handle,
                    description=product.description,
                    vendor=product.vendor,
                    product_type=product.product_type,
                    price=product.price,
                    compare_at_price=product.compare_at_price,
                    available=product.available,
                    tags=json.dumps(product.tags),
                    images=json.dumps(product.images),
                    store=store
                )
                session.add(product_db)

            # Add FAQs
            for faq in insights.faqs:
                faq_db = FAQDB(
                    question=faq.question,
                    answer=faq.answer,
                    category=getattr(faq, 'category', None),
                    store=store
                )
                session.add(faq_db)

            # Add social handles
            for handle in insights.social_handles:
                handle_db = SocialHandleDB(
                    platform=handle.platform,
                    url=handle.url,
                    username=handle.username,
                    store=store
                )
                session.add(handle_db)

            # Add important links
            for name, url in insights.important_links.items():
                link_db = ImportantLinkDB(
                    name=name,
                    url=url,
                    store=store
                )
                session.add(link_db)

            session.add(store)
            session.commit()

            store_id = store.id
            print(f"✅ Store saved to database with ID: {store_id}")
            return store_id

        except Exception as e:
            session.rollback()
            print(f"❌ Error saving store: {e}")
            raise
        finally:
            session.close()

    def create_competitor_relationship(self, original_store_id: int, competitor_store_id: int, relationship_type: str = 'competitor'):
        """Create competitor relationship"""
        session = self.Session()

        try:
            # Check if relationship already exists
            existing = session.query(competitor_association).filter_by(
                store_id=original_store_id,
                competitor_id=competitor_store_id
            ).first()

            if not existing:
                session.execute(
                    competitor_association.insert().values(
                        store_id=original_store_id,
                        competitor_id=competitor_store_id,
                        relationship_type=relationship_type,
                        discovered_at=datetime.utcnow()
                    )
                )
                session.commit()
                print(f"✅ Competitor relationship created: {original_store_id} -> {competitor_store_id}")

        except Exception as e:
            session.rollback()
            print(f"❌ Error creating competitor relationship: {e}")
        finally:
            session.close()

    def get_store_summary(self) -> pd.DataFrame:
        """Get summary of all stores"""
        session = self.Session()

        try:
            stores = session.query(StoreDB).all()
            data = []

            for store in stores:
                product_count = session.query(ProductDB).filter_by(store_id=store.id).count()
                social_count = session.query(SocialHandleDB).filter_by(store_id=store.id).count()
                faq_count = session.query(FAQDB).filter_by(store_id=store.id).count()

                data.append({
                    'Store ID': store.id,
                    'Store Name': store.store_name,
                    'Industry': store.industry,
                    'Products': product_count,
                    'Social Platforms': social_count,
                    'FAQs': faq_count,
                    'Is Competitor': store.is_competitor,
                    'Scraped At': store.scraped_at.strftime('%Y-%m-%d %H:%M')
                })

            return pd.DataFrame(data)

        finally:
            session.close()

    def generate_competitive_report(self, store_id: int) -> dict:
        """Generate competitive analysis report"""
        session = self.Session()

        try:
            original_store = session.query(StoreDB).filter_by(id=store_id).first()
            if not original_store:
                return {'error': f'Store with ID {store_id} not found'}

            # Get competitors
            competitors = session.query(StoreDB).join(
                competitor_association,
                StoreDB.id == competitor_association.c.competitor_id
            ).filter(competitor_association.c.store_id == store_id).all()

            # Calculate metrics
            original_metrics = self._calculate_store_metrics(session, original_store)
            competitor_metrics = [self._calculate_store_metrics(session, comp) for comp in competitors]

            # Generate insights
            insights = self._generate_competitive_insights(original_metrics, competitor_metrics)

            report = {
                'original_store': original_metrics,
                'competitors': competitor_metrics,
                'competitive_insights': insights,
                'generated_at': datetime.utcnow().isoformat()
            }

            return report

        finally:
            session.close()

    def _calculate_store_metrics(self, session, store: StoreDB) -> dict:
        """Calculate metrics for a store"""
        product_count = session.query(ProductDB).filter_by(store_id=store.id).count()

        price_stats = session.query(
            func.avg(ProductDB.price).label('avg_price'),
            func.min(ProductDB.price).label('min_price'),
            func.max(ProductDB.price).label('max_price')
        ).filter_by(store_id=store.id).first()

        social_count = session.query(SocialHandleDB).filter_by(store_id=store.id).count()
        faq_count = session.query(FAQDB).filter_by(store_id=store.id).count()

        return {
            'store_name': store.store_name,
            'store_url': store.store_url,
            'industry': store.industry,
            'total_products': product_count,
            'price_stats': {
                'average_price': float(price_stats.avg_price) if price_stats.avg_price else 0,
                'min_price': float(price_stats.min_price) if price_stats.min_price else 0,
                'max_price': float(price_stats.max_price) if price_stats.max_price else 0
            },
            'social_platforms': social_count,
            'faq_count': faq_count,
            'has_privacy_policy': bool(store.privacy_policy),
            'has_return_policy': bool(store.return_policy),
            'has_about_section': bool(store.about_brand)
        }

    def _generate_competitive_insights(self, original_metrics: dict, competitor_metrics: list) -> dict:
        """Generate competitive insights"""
        if not competitor_metrics:
            return {'message': 'No competitors found for analysis'}

        # Product comparison
        original_products = original_metrics['total_products']
        competitor_products = [comp['total_products'] for comp in competitor_metrics]
        avg_competitor_products = sum(competitor_products) / len(competitor_products) if competitor_products else 0

        # Price comparison
        original_avg_price = original_metrics['price_stats']['average_price']
        competitor_avg_prices = [comp['price_stats']['average_price'] for comp in competitor_metrics]
        avg_competitor_price = sum(competitor_avg_prices) / len(competitor_avg_prices) if competitor_avg_prices else 0

        insights = {
            'product_catalog_comparison': {
                'original_store_products': original_products,
                'competitor_average': avg_competitor_products,
                'position': 'above_average' if original_products > avg_competitor_products else 'below_average'
            },
            'pricing_comparison': {
                'original_avg_price': original_avg_price,
                'competitor_avg_price': avg_competitor_price,
                'pricing_position': 'premium' if original_avg_price > avg_competitor_price else 'competitive'
            },
            'feature_analysis': {
                'original_social_platforms': original_metrics['social_platforms'],
                'competitor_avg_social': sum([comp['social_platforms'] for comp in competitor_metrics]) / len(competitor_metrics),
                'original_faq_count': original_metrics['faq_count'],
                'competitor_avg_faq': sum([comp['faq_count'] for comp in competitor_metrics]) / len(competitor_metrics)
            }
        }

        return insights

print("✅ Database manager ready!")


✅ Database manager ready!


In [6]:
# ============================================================================
# MAIN APPLICATION - COMPLETE SHOPIFY COMPETITOR ANALYZER
# ============================================================================

class CompleteShopifyAnalyzer:
    """Complete Shopify analyzer with competitor analysis and database persistence"""

    def __init__(self):
        self.scraper = ShopifyStoreScraper()
        self.competitor_engine = CompetitorDiscoveryEngine()
        self.db_manager = DatabaseManager()

    async def comprehensive_analysis(self, store_url: str, include_competitors: bool = True, max_competitors: int = 3) -> dict:
        """Run complete analysis with competitor discovery and database persistence"""
        print(f"🚀 Starting comprehensive analysis for: {store_url}")
        print("="*80)

        # Step 1: Analyze original store
        print("📊 Analyzing original store...")
        async with self.scraper as scraper:
            original_insights = await scraper.scrape_store(store_url)

        # Step 2: Extract brand information
        print("🔍 Extracting brand information...")
        brand_info = self.competitor_engine.extract_brand_info(store_url)

        # Step 3: Save original store to database
        print("💾 Saving original store to database...")
        original_store_id = self.db_manager.save_store_insights(original_insights, brand_info)

        competitor_insights = []
        competitor_store_ids = []

        if include_competitors:
            # Step 4: Discover competitors
            print("🎯 Discovering competitors...")
            industry_competitors = self.competitor_engine.discover_competitors_by_industry(brand_info, max_competitors)
            similarity_competitors = self.competitor_engine.discover_competitors_by_similarity(brand_info, max_competitors)

            all_competitors = industry_competitors + similarity_competitors
            unique_competitors = self._deduplicate_competitors(all_competitors)[:max_competitors]

            print(f"Found {len(unique_competitors)} unique competitors")

            # Step 5: Analyze competitor stores
            print("📈 Analyzing competitor stores...")
            for i, competitor in enumerate(unique_competitors, 1):
                try:
                    print(f"  [{i}/{len(unique_competitors)}] Analyzing: {competitor['name']}")

                    # Only analyze if it's a potential Shopify store
                    if self._is_likely_shopify(competitor['url']):
                        async with self.scraper as scraper:
                            competitor_insight = await scraper.scrape_store(competitor['url'])

                        competitor_insight.is_competitor = True
                        competitor_insight.competitor_discovery_method = competitor['discovery_method']

                        # Save competitor to database
                        competitor_store_id = self.db_manager.save_store_insights(competitor_insight, brand_info)

                        # Create competitor relationship
                        self.db_manager.create_competitor_relationship(
                            original_store_id,
                            competitor_store_id,
                            competitor['relationship_type']
                        )

                        competitor_insights.append(competitor_insight)
                        competitor_store_ids.append(competitor_store_id)

                    else:
                        print(f"    ⚠️ Skipping {competitor['name']} - Not a Shopify store")

                except Exception as e:
                    print(f"    ❌ Failed to analyze {competitor['name']}: {e}")

        # Step 6: Generate comprehensive report
        print("📋 Generating comprehensive report...")
        competitive_report = self.db_manager.generate_competitive_report(original_store_id)

        # Step 7: Display results
        self._display_comprehensive_results(original_insights, competitor_insights, competitive_report)

        # Step 8: Export data
        self._export_analysis_data(original_insights, competitor_insights, competitive_report)

        print("✅ Comprehensive analysis completed!")

        return {
            'original_store': original_insights,
            'competitors': competitor_insights,
            'brand_info': brand_info,
            'competitive_report': competitive_report,
            'original_store_id': original_store_id,
            'competitor_store_ids': competitor_store_ids
        }

    def _deduplicate_competitors(self, competitors: list) -> list:
        """Remove duplicate competitors"""
        seen_urls = set()
        unique_competitors = []

        for competitor in competitors:
            if competitor['url'] not in seen_urls:
                unique_competitors.append(competitor)
                seen_urls.add(competitor['url'])

        return unique_competitors

    def _is_likely_shopify(self, url: str) -> bool:
        """Check if URL is likely a Shopify store"""
        try:
            response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout=10)
            content = response.text.lower()

            shopify_indicators = [
                'shopify',
                'cdn.shopify.com',
                'shopifycdn.com',
                'myshopify.com'
            ]

            return any(indicator in content for indicator in shopify_indicators)

        except:
            return False

    def _display_comprehensive_results(self, original_insights: BrandInsights, competitor_insights: list, competitive_report: dict):
        """Display comprehensive analysis results"""
        print("\n" + "="*80)
        print("📊 COMPREHENSIVE ANALYSIS RESULTS")
        print("="*80)

        # Original store summary
        print(f"\n🏪 ORIGINAL STORE: {original_insights.store_name}")
        print(f"URL: {original_insights.store_url}")
        print(f"Products: {len(original_insights.product_catalog)}")
        print(f"Hero Products: {len(original_insights.hero_products)}")
        print(f"Social Platforms: {len(original_insights.social_handles)}")
        print(f"FAQs: {len(original_insights.faqs)}")
        print(f"Contact Emails: {len(original_insights.contact_info.emails)}")
        print(f"Contact Phones: {len(original_insights.contact_info.phone_numbers)}")

        # Competitor summary
        print(f"\n🏆 COMPETITORS ANALYZED: {len(competitor_insights)}")
        for i, competitor in enumerate(competitor_insights, 1):
            print(f"  {i}. {competitor.store_name}")
            print(f"     URL: {competitor.store_url}")
            print(f"     Products: {len(competitor.product_catalog)}")
            print(f"     Discovery Method: {competitor.competitor_discovery_method}")

        # Competitive insights
        if 'competitive_insights' in competitive_report and competitive_report['competitive_insights']:
            insights = competitive_report['competitive_insights']
            print(f"\n🔍 COMPETITIVE INSIGHTS:")
            print(f"Product Catalog Position: {insights['product_catalog_comparison']['position']}")
            print(f"Pricing Position: {insights['pricing_comparison']['pricing_position']}")
            print(f"Average Price: ${insights['pricing_comparison']['original_avg_price']:.2f}")
            print(f"Competitor Average Price: ${insights['pricing_comparison']['competitor_avg_price']:.2f}")

        # Display key data points
        print(f"\n📦 PRODUCT INSIGHTS:")
        if original_insights.product_catalog:
            print("Top Products:")
            for i, product in enumerate(original_insights.product_catalog[:5], 1):
                print(f"  {i}. {product.title} - ${product.price}")

        print(f"\n📱 SOCIAL MEDIA PRESENCE:")
        for handle in original_insights.social_handles:
            print(f"  {handle.platform.title()}: {handle.url}")

        print(f"\n❓ CUSTOMER SUPPORT:")
        if original_insights.faqs:
            print("Sample FAQs:")
            for i, faq in enumerate(original_insights.faqs[:3], 1):
                print(f"  Q{i}: {faq.question}")
                print(f"  A{i}: {faq.answer[:100]}...")

        print(f"\n🔗 IMPORTANT LINKS:")
        for name, url in original_insights.important_links.items():
            print(f"  {name}: {url}")

        print("\n" + "="*80)

    def _export_analysis_data(self, original_insights: BrandInsights, competitor_insights: list, competitive_report: dict):
        """Export analysis data to files"""
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Export original store data
        original_filename = f"{original_insights.store_name.lower().replace(' ', '_')}_analysis_{timestamp}.json"
        with open(original_filename, 'w', encoding='utf-8') as f:
            json.dump(original_insights.dict(), f, indent=2, default=str, ensure_ascii=False)

        # Export competitive report
        report_filename = f"{original_insights.store_name.lower().replace(' ', '_')}_competitive_report_{timestamp}.json"
        with open(report_filename, 'w', encoding='utf-8') as f:
            json.dump(competitive_report, f, indent=2, default=str, ensure_ascii=False)

        print(f"📁 Analysis exported to: {original_filename}")
        print(f"📁 Competitive report exported to: {report_filename}")

    def get_database_summary(self) -> pd.DataFrame:
        """Get summary of all data in database"""
        return self.db_manager.get_store_summary()

print("✅ Complete Shopify analyzer ready!")


✅ Complete Shopify analyzer ready!


In [7]:
# ============================================================================
# DEMO EXECUTION - RUN THE COMPLETE ANALYSIS
# ============================================================================

async def run_complete_demo():
    """Run the complete demonstration"""
    print("🎯 COMPLETE SHOPIFY COMPETITOR ANALYSIS DEMO")
    print("="*60)

    # Initialize analyzer
    analyzer = CompleteShopifyAnalyzer()

    # Target store for analysis
    target_store = "https://memy.co.in"

    # Run comprehensive analysis
    results = await analyzer.comprehensive_analysis(
        store_url=target_store,
        include_competitors=True,
        max_competitors=2  # Limit for demo
    )

    print(f"\n📊 ANALYSIS SUMMARY:")
    print(f"Original Store: {results['original_store'].store_name}")
    print(f"Competitors Analyzed: {len(results['competitors'])}")
    print(f"Database Store ID: {results['original_store_id']}")

    # Show database summary
    print(f"\n💾 DATABASE SUMMARY:")
    db_summary = analyzer.get_database_summary()
    print(db_summary.to_string(index=False))

    return results

# Execute the complete demo
print("🚀 STARTING COMPLETE ANALYSIS...")
demo_results = await run_complete_demo()


🚀 STARTING COMPLETE ANALYSIS...
🎯 COMPLETE SHOPIFY COMPETITOR ANALYSIS DEMO
✅ Database initialized: sqlite:///shopify_competitor_insights.db
🚀 Starting comprehensive analysis for: https://memy.co.in
📊 Analyzing original store...
🔍 Analyzing: https://memy.co.in
📊 Extracting all data points...
📦 Fetching product catalog...
🏠 Identifying hero products...
📜 Extracting policies...
❓ Extracting FAQs...
📱 Finding social handles...
📞 Extracting contact info...
ℹ️ Extracting brand info...
🔗 Finding important links...
✅ Found 2 emails, 3 phones
✅ Found 2 social handles
✅ Found 4 important links
✅ Found brand information
✅ Found 222 products
⚠️ No FAQs found
✅ Found 2 policies
✅ Found 5 hero products
✅ Store analysis completed!
🔍 Extracting brand information...
💾 Saving original store to database...
✅ Store saved to database with ID: 1
🎯 Discovering competitors...
Found 2 unique competitors
📈 Analyzing competitor stores...
  [1/2] Analyzing: Hair Originals
🔍 Analyzing: https://hairoriginals.com
📊

In [8]:
# ============================================================================
# ADDITIONAL UTILITIES AND QUERY EXAMPLES
# ============================================================================

# Quick analysis function
async def quick_analysis(store_url: str):
    """Quick analysis for testing"""
    print(f"⚡ Quick Analysis: {store_url}")
    print("-" * 40)

    analyzer = CompleteShopifyAnalyzer()

    # Analyze without competitors for speed
    results = await analyzer.comprehensive_analysis(
        store_url=store_url,
        include_competitors=False
    )

    insights = results['original_store']
    print(f"🏪 Store: {insights.store_name}")
    print(f"📦 Products: {len(insights.product_catalog)}")
    print(f"📱 Social: {len(insights.social_handles)}")
    print(f"❓ FAQs: {len(insights.faqs)}")
    print(f"📞 Contact: {len(insights.contact_info.emails)} emails")
    print(f"🔗 Links: {len(insights.important_links)}")

    return results

# Database query examples
def show_database_stats():
    """Show database statistics"""
    db_manager = DatabaseManager()

    print("\n📊 DATABASE STATISTICS:")
    print("=" * 40)

    # Get summary
    summary = db_manager.get_store_summary()
    print(f"Total stores in database: {len(summary)}")
    print(f"Total products: {summary['Products'].sum()}")
    print(f"Total social handles: {summary['Social Platforms'].sum()}")
    print(f"Total FAQs: {summary['FAQs'].sum()}")

    print("\nStore Summary:")
    print(summary.to_string(index=False))

# File management utilities
import os

def list_generated_files():
    """List all generated files"""
    print("\n📁 GENERATED FILES:")
    print("=" * 30)

    json_files = [f for f in os.listdir('.') if f.endswith('.json')]
    db_files = [f for f in os.listdir('.') if f.endswith('.db')]

    print("JSON Files:")
    for file in json_files:
        size = os.path.getsize(file) / 1024
        print(f"  {file} ({size:.1f} KB)")

    print("\nDatabase Files:")
    for file in db_files:
        size = os.path.getsize(file) / 1024
        print(f"  {file} ({size:.1f} KB)")

# Show final statistics
show_database_stats()
list_generated_files()

print("\n✅ Complete Shopify Competitor Analysis System Ready!")
print("="*60)
print("🎯 Features Available:")
print("✅ Complete product catalog extraction")
print("✅ Hero products identification")
print("✅ Policy extraction (Privacy, Return, Refund)")
print("✅ FAQ extraction with Q&A pairs")
print("✅ Social media handle discovery")
print("✅ Contact information extraction")
print("✅ About brand content extraction")
print("✅ Important links identification")
print("✅ Competitor discovery & analysis")
print("✅ Full SQL database persistence")
print("✅ Competitive analysis reports")
print("✅ Data export capabilities")


✅ Database initialized: sqlite:///shopify_competitor_insights.db

📊 DATABASE STATISTICS:
Total stores in database: 3
Total products: 321
Total social handles: 5
Total FAQs: 20

Store Summary:
 Store ID    Store Name Industry  Products  Social Platforms  FAQs  Is Competitor       Scraped At
        1          Memy  Apparel       222                 2     0          False 2025-07-18 08:08
        2 Hairoriginals  Apparel        99                 3    20           True 2025-07-18 08:08
        3      Bewakoof  Apparel         0                 0     0           True 2025-07-18 08:08

📁 GENERATED FILES:
JSON Files:
  memy_analysis_20250718_080904.json (499.1 KB)
  memy_competitive_report_20250718_080904.json (1.8 KB)

Database Files:
  shopify_competitor_insights.db (840.0 KB)

✅ Complete Shopify Competitor Analysis System Ready!
🎯 Features Available:
✅ Complete product catalog extraction
✅ Hero products identification
✅ Policy extraction (Privacy, Return, Refund)
✅ FAQ extraction with Q&