In [8]:
import requests
import re
import os
import pandas as pd
import json

In [9]:
r = requests.session()

# Define the URL for fetching data from the homepage
homepageUrl = "https://api-gateway.juno.lenskart.com/v2/products/category/3363"

# Send a GET request to the homepage URL
homepage = r.get(homepageUrl)

# Extract the total number of products from the homepage response
total_products = homepage.json()["result"]["num_of_products"]
total_products

4487

In [10]:
# Initialize combined_data dictionary to store all product data
combined_data = {
    "category_name": "Eyeglasses",
    "product_list": []
}

# Define threshold count and calculate total pages
threshold_count = 1395
total_pages = total_products // threshold_count + (1 if total_products % threshold_count != 0 else 0)

# Loop through each page to fetch product data
for page in range(total_pages):
    # Determine page size based on threshold count
    pageSize = threshold_count if total_products >= threshold_count else total_products
    total_products -= threshold_count
    
    # Define URL for the current page
    pageUrl = f"https://api-gateway.juno.lenskart.com/v2/products/category/3363?page-size={pageSize}&page={page}"
    response = r.get(pageUrl)
    
    # Process the response
    if response.status_code == 200:
        data = response.json()
        result = {
            "category_name": "Eyeglasses",
            "product_list": data.get("result", {}).get("product_list", [])
        }
        
        # Write the product data to a JSON file
        with open(f'raw/glasses/page{page}.json', 'w') as file:
            json.dump(result, file, indent=2)
        
        # Extend the product list in combined_data
        combined_data["product_list"].extend(result["product_list"])
    else:
        print(f"Error fetching data for page {page}. Status code: {response.status_code}")

# Separate prices into two columns before saving the combined data to a new JSON file
for product in combined_data["product_list"]:
    prices = product.pop('prices', [])
    for price in prices:
        if price["name"] == "Market Price":
            product["market_price"] = price["price"]
        elif price["name"] == "Lenskart Price":
            product["lenskart_price"] = price["price"]

# Save the combined data to a JSON file
with open('raw/glasses/glasses.json', 'w') as combined_file:
    json.dump(combined_data, combined_file, indent=2)

# Convert combined data to DataFrame
df = pd.DataFrame(combined_data["product_list"])

# Safety checks and selecting relevant columns with default values
if not df.empty:
    selected_columns = ["id", "product_url", "color", "size", "width", "brand_name_en",
                        "model_name", "classification", "purchaseCount", "avgRating",  "qty",
                        "wishlistCount", "market_price", "lenskart_price"]  # Include wishlistCount

    # Set default values for missing columns
    default_values = {
        "purchaseCount": 0,
        "avgRating": 0.0,
        "qty": 0,
        "wishlistCount": 0  # Default value for wishlistCount
    }

    df = df[selected_columns].fillna(default_values)

    # Save the DataFrame to a CSV file
    df.to_csv('output/glasses.csv', index=False)
else:
    print("No data to write to CSV.")


In [11]:
with open('raw/glasses/glasses.json', 'r') as file:
    js = json.load(file)
    print(len(js['product_list']))

4487
