In [1]:
from pymongo.collection import Collection

def mongo_value_counts(collection: Collection, field: str, top_n: int = None) -> list:
    """
    Mimics pandas `value_counts()` for a MongoDB collection.

    Parameters:
        collection (Collection): pymongo collection object
        field (str): Field name to count occurrences for
        top_n (int, optional): Number of top results to return (default: all)

    Returns:
        List of tuples: [(value, count), ...]
    """
    pipeline = [
        {
            "$group": {
                "_id": f"${field}",
                "count": {"$sum": 1}
            }
        },
        {
            "$sort": {"count": -1}
        }
    ]
    
    if top_n:
        pipeline.append({"$limit": top_n})
    
    results = collection.aggregate(pipeline)
    
    return [(doc["_id"], doc["count"]) for doc in results]


In [3]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["agent_evaluation_db"]
collection = db["qna_pairs"]

In [8]:
# Get counts similar to df['col'].value_counts()
counts = mongo_value_counts(collection, "title")

In [9]:
counts

[('Queen_Victoria', 883),
 ('New_York_City', 817),
 ('American_Idol', 790),
 ('Beyoncé', 753),
 ('Frédéric_Chopin', 697),
 ('Buddhism', 610),
 ('Pharmaceutical_industry', 586),
 ('New_Haven,_Connecticut', 582),
 ('Premier_League', 551),
 ('Hunting', 531),
 ('Antarctica', 525),
 ('2008_Sichuan_earthquake', 521),
 ('Houston', 521),
 ('Steven_Spielberg', 512),
 ('PlayStation_3', 508),
 ('Alfred_North_Whitehead', 502),
 ('2008_Summer_Olympics_torch_relay', 500),
 ('Charleston,_South_Carolina', 490),
 ('Macintosh', 487),
 ('Muammar_Gaddafi', 485),
 ('Multiracial_American', 481),
 ('San_Diego', 475),
 ('Spectre_(2015_film)', 462),
 ('Greeks', 458),
 ('Yale_University', 452),
 ('Middle_Ages', 452),
 ('Modern_history', 448),
 ('Mandolin', 446),
 ('Freemasonry', 444),
 ('Windows_8', 442),
 ('Federal_Bureau_of_Investigation', 441),
 ('Avicenna', 440),
 ('Hellenistic_period', 440),
 ('Affirmative_action_in_the_United_States', 440),
 ('Slavs', 437),
 ('Napoleon', 436),
 ('Miami', 436),
 ('Bermuda'

In [10]:
collection

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'agent_evaluation_db'), 'qna_pairs')

In [11]:
dir(collection)

['__annotations__',
 '__bool__',
 '__call__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_aggregate',
 '_aggregate_one_result',
 '_codec_options',
 '_command',
 '_conn_for_writes',
 '_count_cmd',
 '_create',
 '_create_helper',
 '_create_indexes',
 '_create_search_indexes',
 '_database',
 '_delete',
 '_delete_retryable',
 '_drop_index',
 '_find_and_modify',
 '_full_name',
 '_insert_one',
 '_is_protocol',
 '_list_indexes',
 '_name',
 '_read_concern',
 '_read_preference',
 '_read_preference_for',
 '_retryable_non_cursor_read',
 '_timeou

In [12]:
filter_condition = { "title": "Hunting" }

In [14]:
cursor = collection.find(filter_condition).limit(5)
for doc in cursor:
    print(doc)
    print(5*'=')

{'_id': ObjectId('68627f6e277fab6848d6ce14'), 'id': '573443cb879d6814001ca423', 'answers': 'Hunting', 'context_hash': 'c0feeb78bdaba73a41231e10931c9d1b', 'question': 'What is the practice of killing or trapping any animal?', 'title': 'Hunting'}
=====
{'_id': ObjectId('68627f6e277fab6848d6ce15'), 'id': '573443cb879d6814001ca424', 'answers': 'food', 'context_hash': 'c0feeb78bdaba73a41231e10931c9d1b', 'question': 'Why do humans most commonly hunt wildlife?', 'title': 'Hunting'}
=====
{'_id': ObjectId('68627f6e277fab6848d6ce16'), 'id': '573443cb879d6814001ca425', 'answers': 'poaching', 'context_hash': 'c0feeb78bdaba73a41231e10931c9d1b', 'question': 'What is there a distinction between lawful hunting and?', 'title': 'Hunting'}
=====
{'_id': ObjectId('68627f6e277fab6848d6ce17'), 'id': '573443cb879d6814001ca426', 'answers': 'illegal killing, trapping or capture of the hunted species', 'context_hash': 'c0feeb78bdaba73a41231e10931c9d1b', 'question': 'What is poaching?', 'title': 'Hunting'}
====