# MongoDB Filtering Operations

This notebook demonstrates various filtering techniques in MongoDB, from basic to complex queries.

In [None]:
import sys
!{sys.executable} -m pip install pandas pymongo --quiet

import pandas as pd
from pymongo import MongoClient
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

def print_mongo(obj):
    """Pretty print MongoDB output"""
    print(json.dumps(obj, indent=2, default=str))

def get_mongo_client(max_retries=5, retry_delay=5):
    """Connect to MongoDB with retry logic"""
    for attempt in range(max_retries):
        try:
            client = MongoClient('mongodb://admin:admin@router1:27017/businessdb?authSource=admin')
            client.admin.command('ping')
            print("Successfully connected to MongoDB")
            return client
        except Exception as e:
            print(f"Connection attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                raise

client = get_mongo_client()
db = client.businessdb
org_collection = db["organizations"]

## Simple Filters

### 1. Filtering by Country
This query demonstrates how to filter organizations based on a simple equality condition. It retrieves all organizations located in the USA.

In [None]:
usa_orgs = org_collection.find({"country": "Cote d'Ivoire"})
print("Organizations in Cote d'Ivoire:")
for org in usa_orgs:
    print_mongo(org)


### 2. Filtering by Industry
This query shows how to filter organizations by their industry type, specifically looking for software companies.

In [None]:
# Filter by industry
software_orgs = org_collection.find({"industry": "Human Resources / HR"})
print("\nOrganizations in Human Resources / HR:")
for org in software_orgs:
    print_mongo(org)

## Complex Filters

### 1. Multiple Field Criteria
This query combines multiple conditions using AND logic. It finds software companies in the USA that have more than 100 employees.

In [None]:
complex_filter = org_collection.find({
    "country": "China",
    "industry": "Public Safety",
    "numberOfEmployees": {"$gt": 5200}
})
print("\Public Safety organizations in China with more than 100 employees:")
for org in complex_filter:
    print_mongo(org)


### 2. Nested Conditions with OR Logic
This query demonstrates the use of logical OR operations. It finds software companies that are either located in the USA OR have more than 500 employees.

In [None]:
nested_filter = org_collection.find({
    "$or": [
        {"country": "USA"},
        {"numberOfEmployees": {"$gt": 500}}
    ],
    "industry": "Plastics"
})
print("\Plastics organizations in USA or with more than 500 employees:")
for org in nested_filter:
    print_mongo(org)


### 3. Pattern Matching with Regex
This query shows how to use regular expressions for pattern matching in MongoDB. It finds organizations whose names start with 'My' (case-insensitive).

In [None]:
regex_filter = org_collection.find({"name": {"$regex": "^Ma", "$options": "i"}})
print("\nOrganizations with names starting with 'Ma':")
for org in regex_filter:
    print_mongo(org)


### 4. Array Filtering with $elemMatch
This query demonstrates how to filter documents based on array field conditions. It finds organizations that have specific elements in their departments array matching multiple criteria.

In [None]:
org_collection.delete_one({"organizationId": "org999", "industry": "Software"})
org_collection.delete_one({"organizationId": "org111", "industry": "Software"})

sample_org = {
    "organizationId": "org999",
    "name": "Tech Solutions Inc",
    "industry": "Software",
    "departments": [
        {"name": "Engineering", "employees": 50, "location": "Floor 1"},
        {"name": "Marketing", "employees": 20, "location": "Floor 2"},
        {"name": "HR", "employees": 10, "location": "Floor 2"}
    ]
}
org_collection.insert_one(sample_org)

sample_org2 = {
    "organizationId": "org111",
    "name": "Infinity software",
    "industry": "Software",
    "departments": [
        {"name": "Engineering", "employees": 5, "location": "Floor 1"},
        {"name": "Marketing", "employees": 2, "location": "Floor 2"},
        {"name": "HR", "employees": 3, "location": "Floor 2"}
    ]
}
org_collection.insert_one(sample_org2)

array_filter = org_collection.find({
    "departments": {
        "$elemMatch": {
            "location": "Floor 2",
            "employees": {"$gt": 15}
        }
    }
})

print("Organizations with departments on Floor 2 having more than 15 employees:")
for org in array_filter:
    print_mongo(org)

org_collection.delete_one({"organizationId": "org999", "industry": "Software"})
org_collection.delete_one({"organizationId": "org111", "industry": "Software"})