In [2]:
from pymongo import MongoClient
import json
import matplotlib.pyplot as plt

# Connect to your MongoDB instance
client = MongoClient('mongodb://localhost:27017/')
db = client['vaers']  
collection = db['reports']  

In [3]:
pipeline = [
    {
        "$project": {
            "vax_data": 1,
            "symptoms": 1,
            "_id": 0
        }
    },
    {
        "$unwind": "$vax_data"
    },
    # No $match stage is included here to filter by vaccine type
    {
        "$unwind": "$symptoms"
    },
    {
        "$group": {
            "_id": {
                "vaccine": "$vax_data.VAX_TYPE",
                "manufacturer": "$vax_data.VAX_MANU",
                "Event": "$symptoms"
            },
            "count": {"$sum": 1}
        }
    },
    {
        "$project": {
            "_id": 0,
            "vaccine": "$_id.vaccine",
            "manufacturer": "$_id.manufacturer",
            "Event": "$_id.Event",
            "count": "$count"
        }
    }
]


# Execute aggregation pipeline with allowDiskUse option
results = collection.aggregate(pipeline, allowDiskUse=True)

# Create a list of unique combinations and their counts
unique_combinations = [result for result in results]

# Filter combinations with counts less than 3
unique_combinations_filtered = [result for result in unique_combinations if result['count'] >= 3]

print(len(unique_combinations_filtered))
# Save filtered results as JSON
file_name_filtered = "All_Combinations_Filtered.json"
with open(file_name_filtered, 'w') as f:
    json.dump(unique_combinations_filtered, f)

print(f"Filtered results saved as {file_name_filtered}")

143761
Filtered results saved as All_Combinations_Filtered.json
