In [1]:
from prisma import Prisma
import pandas as pd
from datetime import datetime

db = Prisma()
await db.connect()

In [2]:
from pymongo import MongoClient

# Provide the mongodb atlas url to connect python to mongodb using pymongo
CONNECTION_STRING = "mongodb://localhost:27017"

# Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
client = MongoClient(CONNECTION_STRING)

# Create the database for our example (we will use the same database throughout the tutorial
mongo_db = client["eezeeDb"]

product_collection = mongo_db["products"]

category_colletion = mongo_db["categories"]

In [3]:
stats = {}

In [4]:
all_items_count = await db.item.count(
    where={
        "deleted": False,
        "baseCurrency": "SGD",
        "isHiddenFromSearchEngine": False
    }
)
stats["all_items_count"] = all_items_count
stats

{'all_items_count': 354945}

In [5]:
cte_query = """
WITH spec AS (
    select 
        itemspec.*,
        specheader.id as "itemCategorySpecHeaderId",
        specheader.*
    from "ItemSpecification" itemspec
    join "ItemCategorySpecHeader" specheader on itemspec."itemCategorySpecHeaderId" = specheader.id
)
"""

In [6]:
items_w_specs_query = """
    select 
        COUNT(DISTINCT item.id) AS "itemWithSpecsCount"
    from "Item" item
    join spec on spec."itemId" = item.id and item."subCat" = spec."itemCategoryTitleUrl"
    where 1=1
    and item.deleted = false
    and item."baseCurrency" = 'SGD'
    and item."isHiddenFromSearchEngine" = false
    """
items_with_specs_count = await db.query_raw(query=cte_query+items_w_specs_query)

stats["product_specification"] = {"products_w_specification_count": items_with_specs_count[0]["itemWithSpecsCount"]}
stats["product_specification"]["specs_coverage"] = (items_with_specs_count[0]["itemWithSpecsCount"] / all_items_count) * 100

distinct_specs = """
    select 
        COUNT(DISTINCT spec."nameKebabCase") AS "distinctSpecs"
    from "Item" item
    join spec on spec."itemId" = item.id and item."subCat" = spec."itemCategoryTitleUrl"
    where 1=1
    and item.deleted = false
    and item."baseCurrency" = 'SGD'
    and item."isHiddenFromSearchEngine" = false
"""
response = await db.query_raw(query=cte_query+distinct_specs)
stats["product_specification"]["distinct_specifications"] = response[0]["distinctSpecs"]
stats

{'all_items_count': 354945,
 'product_specification': {'products_w_specification_count': 158385,
  'specs_coverage': 44.62240628829819,
  'distinct_specifications': 3776}}

In [7]:
items_w_brand = await db.item.count(
    where={
        "deleted": False,
        "baseCurrency": "SGD",
        "isHiddenFromSearchEngine": False,
        "brand": {
            "not": "",
        }
    }
)

stats["brand"] = {"products_w_brand_count": items_w_brand}

distinct_brands = await db.query_raw(query="""
SELECT COUNT(DISTINCT item.brand) AS "unique_brand_count"
from "Item" item
where 1=1
    and item.deleted = false
    and item."baseCurrency" = 'SGD'
    and item."isHiddenFromSearchEngine" = false
""")
stats["brand"]["distinct_brands"] = distinct_brands[0]["unique_brand_count"]
stats["brand"]["brand_coverage"] = (items_w_brand/all_items_count) * 100

stats

{'all_items_count': 354945,
 'product_specification': {'products_w_specification_count': 158385,
  'specs_coverage': 44.62240628829819,
  'distinct_specifications': 3776},
 'brand': {'products_w_brand_count': 354944,
  'distinct_brands': 3105,
  'brand_coverage': 99.99971826621025}}

In [8]:
items_w_model_number = await db.item.count(
    where={
        "deleted": False,
        "baseCurrency": "SGD",
        "isHiddenFromSearchEngine": False,
        "modelNumber": {
            "not": ""
        }
    }
)
stats["model_number"] = {"products_w_model_number_count": items_w_model_number}

stats["model_number"]["model_number_coverage"] = (items_w_model_number/all_items_count) * 100

stats


{'all_items_count': 354945,
 'product_specification': {'products_w_specification_count': 158385,
  'specs_coverage': 44.62240628829819,
  'distinct_specifications': 3776},
 'brand': {'products_w_brand_count': 354944,
  'distinct_brands': 3105,
  'brand_coverage': 99.99971826621025},
 'model_number': {'products_w_model_number_count': 354945,
  'model_number_coverage': 100.0}}

In [12]:
items_w_category = await db.item.count(
    where={
        "deleted": False,
        "baseCurrency": "SGD",
        "isHiddenFromSearchEngine": False,
        "subCat": {
            "not": ""
        }
    }
)

stats["category"] = {"products_w_category_count": items_w_category}
stats["category_coverage"] = (items_w_category/all_items_count) * 100
distinct_categories = await db.query_raw(query="""
SELECT COUNT(DISTINCT item."subCat") AS "distinct_cat_count"
from "Item" item
where 1=1
    and item.deleted = false
    and item."baseCurrency" = 'SGD'
    and item."isHiddenFromSearchEngine" = false
""")
stats["category"]["distinct_categories"] = distinct_categories[0]["distinct_cat_count"]


stats

{'all_items_count': 354945,
 'product_specification': {'products_w_specification_count': 158385,
  'specs_coverage': 44.62240628829819,
  'distinct_specifications': 3776},
 'brand': {'products_w_brand_count': 354944,
  'distinct_brands': 3105,
  'brand_coverage': 99.99971826621025},
 'model_number': {'products_w_model_number_count': 354945,
  'model_number_coverage': 100.0},
 'category': {'products_w_category_count': 354945,
  'distinct_categories': 1465},
 'category_coverage': 100.0}

Unnamed: 0,all_items_count,product_specification,brand,model_number,category,category_coverage
products_w_specification_count,354945,158385.0,,,,100.0
specs_coverage,354945,44.622406,,,,100.0
distinct_specifications,354945,3776.0,,,,100.0
products_w_brand_count,354945,,354944.0,,,100.0
distinct_brands,354945,,3105.0,,,100.0
