########################################################################################

## Upsert contentStats (test)
v2: exhibit engangements count

########################################################################################

## (optional) libraries in trigger function

In [1]:
import pymongo # connect to MongoDB
from pymongo import MongoClient
from IPython.display import clear_output # clear output everytime running
from pprint import pprint

## mandatory libraries in trigger function

In [2]:
import json
import sys
# from mongo_client import mongo_client
from bson.objectid import ObjectId
from bson import regex
from datetime import datetime, timedelta
import math

In [3]:
# connect to MongoDB
## define connection URI as role; analytics-admin
connectionUri = 'mongodb+srv://analytics-admin:pnYT55BGWwHePK1M@dev-cluster.fg2e5.mongodb.net/myFirstDatabase?retryWrites=true&w=majority'

## assign client
client = pymongo.MongoClient(connectionUri)

## assign databases
appDb = client['app-db']
analyticsDb = client['analytics-db']

## assign collections
### source collections
contents = appDb['contents']

### destination collections
creatorStats = analyticsDb['creatorStats']
hashtagStats = analyticsDb['hashtagStats']


########################################################################################

########################################################################################

## creator parsing using 'contents'

### note!!!: Be careful! running this cell cause collection changes

### below cell just for filtering in testing

In [4]:
# define content parameters
contentDateThreshold = 14
halfLifeHours = 24
topContentslimit = 100

In [38]:
# define cursor
contentStatsCursor = [
    {
        # filter for only visible contents
        '$match': {
            'createdAt': {
                '$gte': (datetime.utcnow() - timedelta(days=contentDateThreshold)) 
            },
            'visibility': 'publish'
        }
    }, {
        # map to calculate content low-level score
        '$project': {
            ## equation: ageScore = e^(-{\lambda}*t)
            'aggregator.ageScore': {
                '$exp': {
                    '$multiply': [
                        {
                            '$divide': [
                                {
                                    '$subtract': [
                                        datetime.utcnow(), '$updatedAt'
                                    ]
                                }, 60 * 60 * 1000
                            ]
                        }, {
                            '$divide': [
                                {
                                    '$ln': 2
                                }, halfLifeHours
                            ]
                        }, -1
                    ]
                }
            }, 
            ## equation: engagementScore = {\sigma}_{k}({\beta}_{k}*x_{k})
            'aggregator.engagementScore': {
                '$sum': [
                    {
                        '$multiply': [
                            '$engagements.like.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.comment.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.recast.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.quote.count', 1
                        ]
                    }
                ]
            }, 
            # project for investigation
            # add photo count & message character length
            'updatedAt': 1, 
            'likeCount': '$engagements.like.count', 
            'commentCount': '$engagements.comment.count', 
            'recastCount': '$engagements.recast.count', 
            'quoteCount': '$engagements.quote.count',
            'photoCount': {
                '$size': {
                    '$ifNull': [
                        '$payload.photo.contents', []
                    ]
                }
            },
            'characterLength': {
                '$strLenCP': {
                    '$ifNull': [
                       '$payload.message', '-' 
                    ]
                }
            }
        }
    }, {
        # scoring
        '$addFields': {
            'score': {
                '$multiply': [
                    {
                        '$add': [
                            # add bias = 1
                            '$aggregator.engagementScore', 1
                        ]
                    }, '$aggregator.ageScore'
                ]
            }
        }
#     }, {
#         # upsert to 'contentStats' collection
#         ## equation: score = ageScore*(engagementScore + 1)*(hastagDiversityScore)
#         '$merge': {
#             'into': {
#                 'db': 'analytics-db', 
#                 'coll': 'contentStats'
#             }, 
#             'on': '_id', 
#             'whenMatched': 'replace', 
#             'whenNotMatched': 'insert'
#         }
    }
]

In [39]:
# clear the output
clear_output()

# print output
pprint(list(contents.aggregate(contentStatsCursor)))

[{'_id': ObjectId('617a3fe741824b8714ae9be5'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 284,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617a3fe941824b869eae9bf1'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 201,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617a3fed41824b1eb9ae9bfd'),
  'aggregator': {'ageScore': 0.05535006847532668, 'engagementScore': 0},
  'characterLength': 284,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.05535006847532668,
  'updatedAt': datetime.datetime(2021, 10, 28, 6, 15, 9, 520000)},
 {'_id': ObjectId('617a3fef41824b762cae9

  'recast': 0,
  'score': 0.08474736320036018,
  'updatedAt': datetime.datetime(2021, 10, 28, 21, 0, 9, 524000)},
 {'_id': ObjectId('617b0f5bad6257668dd733e3'),
  'aggregator': {'ageScore': 0.08474850134192245, 'engagementScore': 0},
  'characterLength': 146,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.08474850134192245,
  'updatedAt': datetime.datetime(2021, 10, 28, 21, 0, 11, 198000)},
 {'_id': ObjectId('617b0f5cad6257fffed733ec'),
  'aggregator': {'ageScore': 0.08474951303614305, 'engagementScore': 0},
  'characterLength': 288,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.08474951303614305,
  'updatedAt': datetime.datetime(2021, 10, 28, 21, 0, 12, 686000)},
 {'_id': ObjectId('617b0f5ead625785f6d733f5'),
  'aggregator': {'ageScore': 0.08475082934455183, 'engagementScore': 0},
  'characterLength': 264,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.0847

  'recast': 0,
  'score': 0.13100426994380934,
  'updatedAt': datetime.datetime(2021, 10, 29, 12, 5, 0, 990000)},
 {'_id': ObjectId('617be94b6ddb6374fe7ed771'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 109,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617be9506ddb635cbc7ed77c'),
  'aggregator': {'ageScore': 0.1325984855988114, 'engagementScore': 0},
  'characterLength': 109,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.1325984855988114,
  'updatedAt': datetime.datetime(2021, 10, 29, 12, 30, 8, 710000)},
 {'_id': ObjectId('617bea766ddb6319307ed785'),
  'aggregator': {'ageScore': 0.13291097049152226, 'engagementScore': 0},
  'characterLength': 212,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.1329109704

  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617c958f6ddb63c0147edc0e'),
  'aggregator': {'ageScore': 0.1888741298394573, 'engagementScore': 0},
  'characterLength': 87,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.1888741298394573,
  'updatedAt': datetime.datetime(2021, 10, 30, 0, 45, 3, 884000)},
 {'_id': ObjectId('617c99116ddb638da07edc17'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 277,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617c99146ddb63df637edc22'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 189,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.857553502176534

  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d5df26ddb6363b97ee0af'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 100,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d5df46ddb63071c7ee0ba'),
  'aggregator': {'ageScore': 0.28504360901836684, 'engagementScore': 0},
  'characterLength': 41,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.28504360901836684,
  'updatedAt': datetime.datetime(2021, 10, 30, 15, 0, 4, 536000)},
 {'_id': ObjectId('617d5df46ddb63101e7ee0c3'),
  'aggregator': {'ageScore': 0.28504385141639166, 'engagementScore': 0},
  'characterLength': 81,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.2850438514163

  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d88f26ddb634e437ee5a8'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 204,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d89f66ddb637da67ee5b3'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 147,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d8ae16ddb63b58b7ee5be'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 173,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt

  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d96436ddb6312b97eeaa6'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 166,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d96456ddb6352467eeab1'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 171,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617d964a6ddb636a7a7eeabc'),
  'aggregator': {'ageScore': 0.32000666402883443, 'engagementScore': 0},
  'characterLength': 171,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.

  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617dea956ddb635b3f7eef92'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 158,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617dea986ddb6353757eef9d'),
  'aggregator': {'ageScore': 0.3805010250442408, 'engagementScore': 0},
  'characterLength': 178,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.3805010250442408,
  'updatedAt': datetime.datetime(2021, 10, 31, 1, 0, 8, 944000)},
 {'_id': ObjectId('617df19c6ddb63103e7eefa6'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 327,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8

  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617e49866ddb6372d27ef49f'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 135,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617e49886ddb6363db7ef4aa'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 269,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617e498c6ddb632fbf7ef4b5'),
  'aggregator': {'ageScore': 0.46243189503045284, 'engagementScore': 0},
  'characterLength': 135,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.

  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617e93576ddb631f647ef9fa'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 320,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617e935b6ddb6350ac7efa05'),
  'aggregator': {'ageScore': 0.5381216251800274, 'engagementScore': 0},
  'characterLength': 150,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.5381216251800274,
  'updatedAt': datetime.datetime(2021, 10, 31, 13, 0, 11, 704000)},
 {'_id': ObjectId('617e935f6ddb635c577efa0e'),
  'aggregator': {'ageScore': 0.5381364589419568, 'engagementScore': 0},
  'characterLength': 160,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0

  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617eba076ddb63450f7efedd'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 319,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617eba086ddb638d707efee8'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 230,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617eba086ddb6368fb7efef3'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 184,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast'

  'updatedAt': datetime.datetime(2021, 10, 31, 19, 45, 20, 355000)},
 {'_id': ObjectId('617ef2526ddb6393ed7f0431'),
  'aggregator': {'ageScore': 0.6540077204740032, 'engagementScore': 0},
  'characterLength': 115,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.6540077204740032,
  'updatedAt': datetime.datetime(2021, 10, 31, 19, 45, 22, 537000)},
 {'_id': ObjectId('617ef5c46ddb634c087f043a'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 153,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617ef5c86ddb6363dc7f0445'),
  'aggregator': {'ageScore': 0.8575535021765344, 'engagementScore': 0},
  'characterLength': 149,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021

  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8575535021765344,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 8, 17, 940000)},
 {'_id': ObjectId('617f319093707296b7cb181e'),
  'aggregator': {'ageScore': 0.7447152999904423, 'engagementScore': 0},
  'characterLength': 177,
  'comment': 0,
  'like': 0,
  'photoCount': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.7447152999904423,
  'updatedAt': datetime.datetime(2021, 11, 1, 0, 15, 12, 284000)},
 {'_id': ObjectId('617f3193937072c449cb1827'),
  'aggregator': {'ageScore': 0.7447321303637652, 'engagementScore': 0},
  'characterLength': 203,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.7447321303637652,
  'updatedAt': datetime.datetime(2021, 11, 1, 0, 15, 15, 101000)},
 {'_id': ObjectId('617f31969370721defcb1830'),
  'aggregator': {'ageScore': 0.7447522831002907, 'engagementScore': 0},
  'characterLength': 112,
  'comment': 0,
  'like': 0,
  'photoCoun

  'score': 0.8604324790893437,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 15, 15, 710000)},
 {'_id': ObjectId('617f77e593707267cccb1d1c'),
  'aggregator': {'ageScore': 0.8604480934805201, 'engagementScore': 0},
  'characterLength': 152,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8604480934805201,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 15, 17, 972000)},
 {'_id': ObjectId('617f77e893707207d7cb1d27'),
  'aggregator': {'ageScore': 0.8604652199356697, 'engagementScore': 0},
  'characterLength': 138,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8604652199356697,
  'updatedAt': datetime.datetime(2021, 11, 1, 5, 15, 20, 453000)},
 {'_id': ObjectId('617f77ec9370728a0ecb1d32'),
  'aggregator': {'ageScore': 0.8604960497946507, 'engagementScore': 0},
  'characterLength': 138,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.8604960497946507,
  'update

  'characterLength': 164,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.9589444534260714,
  'updatedAt': datetime.datetime(2021, 11, 1, 9, 0, 27, 397000)},
 {'_id': ObjectId('617facadc71bda3a76bc474d'),
  'aggregator': {'ageScore': 0.9589575088196424, 'engagementScore': 0},
  'characterLength': 146,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.9589575088196424,
  'updatedAt': datetime.datetime(2021, 11, 1, 9, 0, 29, 94000)},
 {'_id': ObjectId('617facafc71bdaaf1abc4756'),
  'aggregator': {'ageScore': 0.9589745957287944, 'engagementScore': 0},
  'characterLength': 259,
  'comment': 0,
  'like': 0,
  'photoCount': 1,
  'quote': 0,
  'recast': 0,
  'score': 0.9589745957287944,
  'updatedAt': datetime.datetime(2021, 11, 1, 9, 0, 31, 315000)},
 {'_id': ObjectId('617fb017c71bda4098bc475f'),
  'aggregator': {'ageScore': 0.9657081789195725, 'engagementScore': 0},
  'characterLength': 154,
  'comment': 0,
  