In [1]:
! pip install pymongo
! pip install dnspython

Collecting pymongo
  Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.13.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.13.2


In [2]:
from pymongo import MongoClient
from google.colab import userdata


#### Connectivity

In [3]:
connection_url = f"mongodb+srv://{userdata.get('optimus_mongodb_all_user')}:{userdata.get('optimus_mongodb_all_user_pwd')}@optimus.geoo4if.mongodb.net/"

In [None]:
# connection_url = f"mongodb+srv://{userdata.get('optimus_rw_user')}:{userdata.get('optimus_rw_pwd')}@optimus.geoo4if.mongodb.net/"

In [4]:
mongo_client = MongoClient(connection_url)

In [5]:
mongo_db = mongo_client['datasets']

In [6]:
order_collection = mongo_db['orders']

In [16]:
product_collection = mongo_db['products']

### Order Collection

#### Fetching sample record from orders collection - using find

In [7]:
resultSet = []

cursorSet = order_collection.find({"order_id":123002578})

for item in cursorSet:
  resultSet.append(item)

In [8]:
resultSet

[{'_id': ObjectId('685cf70518134c4defa3540c'),
  'customer_id': 579,
  'customer_status': 'Platinum',
  'order_date': '1-Jan-17',
  'delivery_date': '7-Jan-17',
  'order_id': 123002578,
  'product_id': 220101000000.0,
  'order_quantity': 2,
  'total_retail_cost': 92.6,
  'cost_per_unit': 20.7}]

#### Fetching sample record from orders collection - using match
aggreate -> match gives us control over modifying the result set

In [9]:
order_collection.aggregate([{'$match':{"order_id":123002578}}]).to_list()

[{'_id': ObjectId('685cf70518134c4defa3540c'),
  'customer_id': 579,
  'customer_status': 'Platinum',
  'order_date': '1-Jan-17',
  'delivery_date': '7-Jan-17',
  'order_id': 123002578,
  'product_id': 220101000000.0,
  'order_quantity': 2,
  'total_retail_cost': 92.6,
  'cost_per_unit': 20.7}]

#### Get the total count of customer status group by values

In [10]:
order_collection.aggregate([{'$group': {
    '_id':'$customer_status',
    'totalCount': {'$count': {}}
}}]).to_list()

[{'_id': 'Silver', 'totalCount': 15008},
 {'_id': 'Diamond', 'totalCount': 37799},
 {'_id': 'Gold', 'totalCount': 51373},
 {'_id': 'Platinum', 'totalCount': 80833}]

#### Get total order count where total_retail_cost > 100 and group by customer status

In [15]:
order_collection.aggregate([
    {'$match':{'total_retail_cost':{'$gt':100}}},
    {'$group':{ '_id': '$customer_status','totalCount':{'$count': {}}}}
]).to_list()

[{'_id': 'Platinum', 'totalCount': 34797},
 {'_id': 'Diamond', 'totalCount': 16088},
 {'_id': 'Gold', 'totalCount': 22222},
 {'_id': 'Silver', 'totalCount': 6614}]

#### find the month name where order was high for a perticular year

In [None]:
order_collection.aggregate([
    {
        "$project": {
            "order_date":True,

        }
    }
])

### Products Collection

In [20]:
product_collection.find_one({"product_id":220100000000.0}) # taking first sample doc

{'_id': ObjectId('685cf75918134c4defa629c6'),
 'product_id': 220100000000.0,
 'product_line': 'Clothes & Shoes',
 'product_category': 'Clothes',
 'product_group': 'Eclipse Clothing',
 'product_name': '2000 Black/Red',
 'supplier_name': 'Eclipse Inc',
 'supplier_id': 1303}

#### how many product category do we have

In [23]:
product_collection.aggregate([{
    '$group': {'_id':'$product_category', 'totalCategory' : {'$count': {}}}
}]).to_list()

[{'_id': 'Golf', 'totalCategory': 346},
 {'_id': 'Shoes', 'totalCategory': 496},
 {'_id': 'Swim Sports', 'totalCategory': 205},
 {'_id': 'Children Outdoors', 'totalCategory': 50},
 {'_id': 'Children Sports', 'totalCategory': 722},
 {'_id': 'Winter Sports', 'totalCategory': 164},
 {'_id': 'Indoor Sports', 'totalCategory': 264},
 {'_id': 'Team Sports', 'totalCategory': 81},
 {'_id': 'Running - Jogging', 'totalCategory': 214},
 {'_id': 'Clothes', 'totalCategory': 1525},
 {'_id': 'Outdoors', 'totalCategory': 357},
 {'_id': 'Racket Sports', 'totalCategory': 169},
 {'_id': 'Assorted Sports Articles', 'totalCategory': 911}]

#### how many product group do we have

In [25]:
product_collection.aggregate([{
    '$group': {'_id':'$product_group', 'totalGroup' : {'$count': {}}}
}]).to_list()[:5]

[{'_id': 'Olssons, Kids', 'totalGroup': 20},
 {'_id': 'Backpacks', 'totalGroup': 74},
 {'_id': 'Tracker Shoes', 'totalGroup': 164},
 {'_id': 'Eclipse Clothing', 'totalGroup': 637},
 {'_id': 'Ski Dress', 'totalGroup': 99}]

### Combined Queries

#### Total Count of orders for each product category

In [31]:
product_collection.aggregate([{
    "$lookup": {
        "from": "orders",
        "localField": "product_id",
        "foreignField": "product_id",
        "pipeline": [{
            "$group": {"_id": "order_id", "totalOrders": {"$count": {}}}
        }],
        "as":"totalOrdersInfo"
    }
}]).to_list()[0]

KeyboardInterrupt: 