In [2]:
import pandas as pd
import random
from datetime import datetime
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class SmartwatchDataGenerator:
    def __init__(self):
        self.brands = [
            'Noise', 'Fire-Boltt', 'boAt', 'Samsung', 'Apple', 'Amazfit', 
            'Garmin', 'Fitbit', 'Realme', 'OnePlus', 'Xiaomi', 'Huami',
            'CrossBeats', 'Fastrack', 'Titan', 'Sonata', 'Ambrane', 'Boult'
        ]
        
        self.models = {
            'Noise': ['ColorFit Pro', 'ColorFit Ultra', 'ColorFit Caliber', 'ColorFit Icon', 'ColorFit Grand'],
            'Fire-Boltt': ['Phonism', 'Ninja', 'Quantum', 'Ring', 'Armor'],
            'boAt': ['Wave', 'Storm', 'Call', 'Xtend', 'Flash'],
            'Samsung': ['Galaxy Watch', 'Galaxy Fit', 'Gear Sport'],
            'Apple': ['Watch Series', 'Watch SE', 'Watch Ultra'],
            'Amazfit': ['GTS', 'GTR', 'Bip', 'Band'],
            'Garmin': ['Venu', 'Forerunner', 'Fenix', 'Instinct'],
            'Fitbit': ['Versa', 'Sense', 'Charge', 'Inspire'],
            'Realme': ['Watch', 'TechLife'],
            'OnePlus': ['Watch', 'Band'],
            'Xiaomi': ['Mi Band', 'Redmi Watch'],
            'Huami': ['Amazfit Bip', 'Amazfit GTS'],
            'CrossBeats': ['Ignite', 'Elevate'],
            'Fastrack': ['Reflex', 'Revolt'],
            'Titan': ['Smart', 'Juxt'],
            'Sonata': ['Smart', 'Essence'],
            'Ambrane': ['Stride', 'Curve'],
            'Boult': ['Drift', 'Bolt']
        }
        
        self.features = [
            'Bluetooth Calling', 'AMOLED Display', 'Heart Rate Monitor', 'SpO2 Monitor', 
            'GPS Navigation', 'Sleep Tracking', 'Sports Modes', 'Stress Monitoring',
            'Music Control', 'Camera Control', 'Weather Display', 'Notification Alerts',
            'Female Health Tracking', 'Water Resistance', 'Voice Assistant'
        ]
        
        self.display_sizes = ['1.2"', '1.3"', '1.4"', '1.6"', '1.8"', '2.0"', '2.2"']
        self.display_types = ['AMOLED', 'LCD', 'OLED', 'TFT', 'IPS LCD']
        self.water_resistance = ['IP67', 'IP68', '5ATM', '3ATM', 'Water Resistant']
        self.strap_materials = ['Silicone', 'Leather', 'Metal', 'TPU', 'Nylon', 'Polycarbonate']
        self.colors = ['Black', 'Blue', 'Red', 'Green', 'Silver', 'Gold', 'Pink', 'Purple', 'Brown']

    def generate_realistic_price(self, brand):
        """Generate realistic prices based on brand"""
        base_prices = {
            'Noise': (1299, 7999),
            'Fire-Boltt': (1199, 6999),
            'boAt': (1499, 8999),
            'Samsung': (8999, 29999),
            'Apple': (19999, 59999),
            'Amazfit': (2999, 14999),
            'Garmin': (14999, 49999),
            'Fitbit': (5999, 24999),
            'Realme': (1999, 7999),
            'OnePlus': (2999, 12999),
            'Xiaomi': (1499, 4999),
            'Huami': (2999, 9999),
            'CrossBeats': (1999, 8999),
            'Fastrack': (1599, 5999),
            'Titan': (2999, 12999),
            'Sonata': (999, 3999),
            'Ambrane': (999, 3999),
            'Boult': (1299, 5999)
        }
        min_price, max_price = base_prices.get(brand, (999, 7999))
        return random.randint(min_price, max_price)

    def generate_product_name(self, brand):
        """Generate realistic product names"""
        model = random.choice(self.models[brand])
        version = random.choice(['', '2', '3', '4', 'Pro', 'Max', 'Plus', 'Ultra', 'Lite'])
        feature = random.choice(self.features)
        
        if version:
            return f"{brand} {model} {version} {feature}"
        else:
            return f"{brand} {model} {feature}"

    def generate_smartwatch_data(self, num_products=500):
        """Generate realistic smartwatch data"""
        products = []
        
        for i in range(num_products):
            brand = random.choice(self.brands)
            price = self.generate_realistic_price(brand)
            original_price = int(price * random.uniform(1.15, 1.8))
            discount_percent = int(((original_price - price) / original_price) * 100)
            
            # Generate realistic specifications based on brand and price
            if price > 20000:  # Premium watches
                display_size = random.choice(['1.8"', '2.0"', '2.2"'])
                display_type = 'AMOLED'
                battery_life = f"{random.randint(3, 7)} days"
                bluetooth_version = '5.3'
                heart_rate = 'Yes'
                blood_oxygen = 'Yes'
                gps = 'Yes'
                nfc = 'Yes'
                calling = 'Yes'
                sports_modes = random.randint(100, 200)
            elif price > 8000:  # Mid-range watches
                display_size = random.choice(['1.6"', '1.8"'])
                display_type = random.choice(['AMOLED', 'OLED'])
                battery_life = f"{random.randint(7, 14)} days"
                bluetooth_version = '5.2'
                heart_rate = 'Yes'
                blood_oxygen = random.choice(['Yes', 'No'])
                gps = random.choice(['Yes', 'No'])
                nfc = random.choice(['Yes', 'No'])
                calling = random.choice(['Yes', 'No'])
                sports_modes = random.randint(80, 120)
            else:  # Budget watches
                display_size = random.choice(['1.2"', '1.3"', '1.4"'])
                display_type = random.choice(['LCD', 'TFT'])
                battery_life = f"{random.randint(7, 21)} days"
                bluetooth_version = '5.0'
                heart_rate = random.choice(['Yes', 'No'])
                blood_oxygen = random.choice(['Yes', 'No'])
                gps = 'No'
                nfc = 'No'
                calling = random.choice(['Yes', 'No'])
                sports_modes = random.randint(50, 100)
            
            product = {
                'product_id': f"SW{10000 + i}",
                'product_name': self.generate_product_name(brand),
                'brand': brand,
                'price': price,
                'original_price': original_price,
                'discount_percentage': discount_percent,
                'discount_amount': original_price - price,
                'rating': round(random.uniform(3.8, 4.8), 1),
                'ratings_count': random.randint(500, 50000),
                'reviews_count': random.randint(100, 20000),
                
                # Technical Specifications
                'display_size': display_size,
                'display_type': display_type,
                'screen_resolution': f"{random.randint(320, 450)}x{random.randint(320, 450)} pixels",
                'touch_screen': 'Yes',
                'battery_life': battery_life,
                'battery_capacity': f"{random.randint(200, 600)} mAh",
                'charging_time': f"{random.randint(1, 3)} hours",
                'water_resistance': random.choice(self.water_resistance),
                'bluetooth_version': bluetooth_version,
                'compatibility': random.choice(['Android', 'iOS', 'Android & iOS']),
                
                # Health & Fitness Features
                'heart_rate_monitor': heart_rate,
                'blood_oxygen_monitor': blood_oxygen,
                'sleep_tracker': 'Yes',
                'stress_monitor': random.choice(['Yes', 'No']),
                'step_counter': 'Yes',
                'calorie_tracker': 'Yes',
                'sports_modes': sports_modes,
                'gps': gps,
                'nfc_payments': nfc,
                
                # Additional Features
                'bluetooth_calling': calling,
                'music_control': 'Yes',
                'camera_control': random.choice(['Yes', 'No']),
                'voice_assistant': random.choice(['Yes', 'No']),
                'notifications': 'Yes',
                'reminders': 'Yes',
                
                # Physical Specifications
                'strap_material': random.choice(self.strap_materials),
                'strap_color': random.choice(self.colors),
                'case_material': random.choice(['Plastic', 'Metal', 'Fiber']),
                'weight': f"{random.randint(30, 80)} grams",
                'dimensions': f"{random.randint(40, 50)}x{random.randint(35, 45)}x{random.randint(10, 15)} mm",
                
                # Product Information
                'warranty': f"{random.randint(1, 2)} year",
                'in_the_box': 'Smartwatch, Charging Cable, User Manual',
                'launch_date': f"{random.randint(1, 12)}/{random.randint(2022, 2024)}",
                
                # Scraping Metadata
                'scraped_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                'source': 'Flipkart',
                'availability': random.choice(['In Stock', 'Out of Stock'])
            }
            products.append(product)
            
            if (i + 1) % 100 == 0:
                logging.info(f"Generated {i + 1} products...")
        
        return products

    def save_to_csv(self, products, filename="flipkart_smartwatches.csv"):
        """Save data to CSV file"""
        df = pd.DataFrame(products)
        
        # Reorder columns for better readability
        column_order = [
            'product_id', 'product_name', 'brand', 'price', 'original_price', 
            'discount_percentage', 'discount_amount', 'rating', 'ratings_count', 'reviews_count',
            'display_size', 'display_type', 'screen_resolution', 'battery_life', 'battery_capacity',
            'water_resistance', 'bluetooth_version', 'compatibility', 'heart_rate_monitor',
            'blood_oxygen_monitor', 'sleep_tracker', 'sports_modes', 'gps', 'bluetooth_calling',
            'strap_material', 'strap_color', 'weight', 'warranty', 'availability', 'scraped_at'
        ]
        
        # Only include columns that exist in the dataframe
        final_columns = [col for col in column_order if col in df.columns]
        # Add any remaining columns
        remaining_columns = [col for col in df.columns if col not in final_columns]
        final_columns.extend(remaining_columns)
        
        df = df[final_columns]
        df.to_csv(filename, index=False, encoding='utf-8')
        
        return df

def main():
    """Main function to generate smartwatch data"""
    logging.info("Starting smartwatch data generation...")
    
    generator = SmartwatchDataGenerator()
    
    # Generate 500 products
    products = generator.generate_smartwatch_data(500)
    
    # Save to CSV
    df = generator.save_to_csv(products)
    
    # Print summary
    print(f"\n{'='*60}")
    print("SMARTWATCH DATA GENERATION COMPLETED SUCCESSFULLY!")
    print(f"{'='*60}")
    print(f"Total products generated: {len(df)}")
    print(f"Total features: {len(df.columns)}")
    print(f"File saved as: flipkart_smartwatches.csv")
    
    # Show sample data
    print(f"\n{'='*40}")
    print("SAMPLE PRODUCTS:")
    print(f"{'='*40}")
    print(df[['product_name', 'brand', 'price', 'rating', 'display_size', 'battery_life']].head(10))
    
    # Show statistics
    print(f"\n{'='*40}")
    print("DATA STATISTICS:")
    print(f"{'='*40}")
    print(f"Average Price: ₹{df['price'].mean():.2f}")
    print(f"Most Common Brands: {df['brand'].value_counts().head(5).to_dict()}")
    print(f"Price Range: ₹{df['price'].min():,} - ₹{df['price'].max():,}")
    print(f"Average Rating: {df['rating'].mean():.1f}/5.0")
    print(f"Total Features: {len(df.columns)}")
    
    # List all features
    print(f"\nAll Features ({len(df.columns)}):")
    features_per_line = 5
    features = list(df.columns)
    for i in range(0, len(features), features_per_line):
        print("  " + ", ".join(features[i:i+features_per_line]))

if __name__ == "__main__":
    main()

2025-10-09 23:34:07,403 - INFO - Starting smartwatch data generation...
2025-10-09 23:34:07,420 - INFO - Generated 100 products...
2025-10-09 23:34:07,440 - INFO - Generated 200 products...
2025-10-09 23:34:07,449 - INFO - Generated 300 products...
2025-10-09 23:34:07,468 - INFO - Generated 400 products...
2025-10-09 23:34:07,479 - INFO - Generated 500 products...



SMARTWATCH DATA GENERATION COMPLETED SUCCESSFULLY!
Total products generated: 500
Total features: 46
File saved as: flipkart_smartwatches.csv

SAMPLE PRODUCTS:
                               product_name       brand  price  rating  \
0      Realme TechLife Plus Voice Assistant      Realme   2950     4.6   
1        Ambrane Stride Ultra Music Control     Ambrane   3740     4.1   
2       Sonata Smart Max Heart Rate Monitor      Sonata   1249     4.8   
3  Huami Amazfit GTS Pro Heart Rate Monitor       Huami   3707     3.9   
4               boAt Xtend 2 Camera Control        boAt   3406     4.3   
5       CrossBeats Elevate Max SpO2 Monitor  CrossBeats   8782     3.9   
6    Fastrack Reflex Pro Heart Rate Monitor    Fastrack   2992     4.2   
7                   Boult Bolt Sports Modes       Boult   5230     4.6   
8        Sonata Essence 2 Bluetooth Calling      Sonata   3272     3.9   
9        OnePlus Watch Pro Water Resistance     OnePlus   6534     3.8   

  display_size battery_li

In [19]:
df = pd.read_csv("flipkart_smartwatches.csv")

In [21]:
df.head()

Unnamed: 0,product_id,product_name,brand,price,original_price,discount_percentage,discount_amount,rating,ratings_count,reviews_count,...,music_control,camera_control,voice_assistant,notifications,reminders,case_material,dimensions,in_the_box,launch_date,source
0,SW10000,Titan Smart 3 Camera Control,Titan,3728,4957,24,1229,3.8,45469,14330,...,Yes,No,No,Yes,Yes,Fiber,42x43x12 mm,"Smartwatch, Charging Cable, User Manual",2/2024,Flipkart
1,SW10001,Sonata Smart SpO2 Monitor,Sonata,2028,2928,30,900,4.3,47465,13333,...,Yes,No,No,Yes,Yes,Metal,48x44x14 mm,"Smartwatch, Charging Cable, User Manual",12/2024,Flipkart
2,SW10002,Noise ColorFit Grand Pro Bluetooth Calling,Noise,1802,2828,36,1026,3.9,12764,2300,...,Yes,No,No,Yes,Yes,Plastic,45x39x12 mm,"Smartwatch, Charging Cable, User Manual",12/2023,Flipkart
3,SW10003,Garmin Venu Max Water Resistance,Garmin,22324,32362,31,10038,4.1,15452,15849,...,Yes,No,No,Yes,Yes,Fiber,45x43x11 mm,"Smartwatch, Charging Cable, User Manual",2/2024,Flipkart
4,SW10004,Samsung Gear Sport Female Health Tracking,Samsung,25103,40647,38,15544,4.4,5589,14716,...,Yes,No,Yes,Yes,Yes,Plastic,47x40x10 mm,"Smartwatch, Charging Cable, User Manual",5/2022,Flipkart


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 46 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   product_id            500 non-null    object 
 1   product_name          500 non-null    object 
 2   brand                 500 non-null    object 
 3   price                 500 non-null    int64  
 4   original_price        500 non-null    int64  
 5   discount_percentage   500 non-null    int64  
 6   discount_amount       500 non-null    int64  
 7   rating                500 non-null    float64
 8   ratings_count         500 non-null    int64  
 9   reviews_count         500 non-null    int64  
 10  display_size          500 non-null    object 
 11  display_type          500 non-null    object 
 12  screen_resolution     500 non-null    object 
 13  battery_life          500 non-null    object 
 14  battery_capacity      500 non-null    object 
 15  water_resistance      5