########################################################################################

## Upsert contentStats (test)
v1: collect only computed statistics without hashtag property

user independent

just for testing

########################################################################################

## (optional) libraries in trigger function

In [1]:
import pymongo # connect to MongoDB
from pymongo import MongoClient
from IPython.display import clear_output # clear output everytime running
from pprint import pprint

## mandatory libraries in trigger function

In [2]:
import json
import sys
# from mongo_client import mongo_client
from bson.objectid import ObjectId
from bson import regex
from datetime import datetime, timedelta
import re
import math

In [3]:
# connect to MongoDB
## define connection URI as role; analytics-admin
connectionUri = 'mongodb+srv://analytics-admin:pnYT55BGWwHePK1M@dev-cluster.fg2e5.mongodb.net/myFirstDatabase?retryWrites=true&w=majority'

## assign client
client = pymongo.MongoClient(connectionUri)

## assign databases
appDb = client['app-db']
analyticsDb = client['analytics-db']

## assign collections
### source collections
contents = appDb['contents']

### destination collections
creatorStats = analyticsDb['creatorStats']
hashtagStats = analyticsDb['hashtagStats']


########################################################################################

########################################################################################

## creator parsing using 'contents'

### note!!!: Be careful! running this cell cause collection changes

### below cell just for filtering in testing

In [4]:
# define content parameters
contentDateThreshold = 14
halfLifeHours = 24
topContentslimit = 100

In [5]:
# define cursor
cursor = [
    {
        # filter for only visible contents
        '$match': {
            'createdAt': {
                '$gte': (datetime.utcnow() - timedelta(days=contentDateThreshold)) 
            },
            'visibility': 'publish'
        }
    }, {
        # map to calculate content low-level score
        '$project': {
            ## equation: ageScore = e^(-{\lambda}*t)
            'aggregator.ageScore': {
                '$exp': {
                    '$multiply': [
                        {
                            '$divide': [
                                {
                                    '$subtract': [
                                        datetime.utcnow(), '$updatedAt'
                                    ]
                                }, 60 * 60 * 1000
                            ]
                        }, {
                            '$divide': [
                                {
                                    '$ln': 2
                                }, halfLifeHours
                            ]
                        }, -1
                    ]
                }
            }, 
            ## equation: engagementScore = {\sigma}_{k}({\beta}_{k}*x_{k})
            'aggregator.engagementScore': {
                '$sum': [
                    {
                        '$multiply': [
                            '$engagements.like.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.comment.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.recast.count', 1
                        ]
                    }, {
                        '$multiply': [
                            '$engagements.quote.count', 1
                        ]
                    }
                ]
            }, 
            # project for investigation
            'updatedAt': 1, 
            'like': '$engagements.like.count', 
            'comment': '$engagements.comment.count', 
            'recast': '$engagements.recast.count', 
            'quote': '$engagements.quote.count'
        }
    }, {
        # 
        '$addFields': {
            'score': {
                '$multiply': [
                    {
                        '$add': [
                            # add bias = 1
                            '$aggregator.engagementScore', 1
                        ]
                    }, '$aggregator.ageScore'
                ]
            }
        }
#     }, {
#         # upsert to 'contentStats' collection
#         ## equation: score = ageScore*(engagementScore + 1)*(hastagDiversityScore)
#         '$merge': {
#             'into': {
#                 'db': 'analytics-db', 
#                 'coll': 'contentStats'
#             }, 
#             'on': '_id', 
#             'whenMatched': 'replace', 
#             'whenNotMatched': 'insert'
#         }
    }
]

In [6]:
# clear the output
clear_output()

# print output
pprint(list(contents.aggregate(cursor)))

[{'_id': ObjectId('615a86cd248bfd4491f8547a'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615a86cd248bfd5df9f85482'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615a8dd5248bfd4da8f8548a'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615a94de248bfdebd3f85492'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,


  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615b7385ae203537602d9ce1'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615b75ddae2035d1742d9ce9'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615b75ddae2035d0a42d9cf1'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615b7a8dae2035647c2d9cf9'),


  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615c6169ae2035c6512da1ae'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615c64ecae20354e3f2da1b6'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615c64edae203562722da1be'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615c64edae2035b0572da1c6'),


  'recast': 0,
  'score': 0.14051732012351348,
  'updatedAt': datetime.datetime(2021, 10, 15, 8, 10, 16, 953000)},
 {'_id': ObjectId('615d97f7167e884570522454'),
  'aggregator': {'ageScore': 0.14051732012351348, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14051732012351348,
  'updatedAt': datetime.datetime(2021, 10, 15, 8, 10, 16, 953000)},
 {'_id': ObjectId('615d9a4e504f3b7f00d6102d'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615d9a4e504f3b6040d61035'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615d9a4e504f3b8b9cd6103d'),
  

 {'_id': ObjectId('615ea901db45d0514e925b71'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615eaa2ddb45d04f57925b79'),
  'aggregator': {'ageScore': 0.14051732012351348, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14051732012351348,
  'updatedAt': datetime.datetime(2021, 10, 15, 8, 10, 16, 953000)},
 {'_id': ObjectId('615eac86d17d164ec555d18b'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615eac86d17d1655b955d193'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
 

  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615f786d4f989b94f44c27c3'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615f786d4f989be8d84c27cb'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615f7bf14f989b68484c27d3'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('615f7bf14f989b

 {'_id': ObjectId('6160500ea52e720bba170dab'),
  'aggregator': {'ageScore': 0.14051732012351348, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14051732012351348,
  'updatedAt': datetime.datetime(2021, 10, 15, 8, 10, 16, 953000)},
 {'_id': ObjectId('61605265a52e722926170db5'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('61605265a52e723576170dbd'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('61605265a52e72fea2170dc5'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
 

  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6161c4b5a52e72dae0171355'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6161ca91a52e7243fb17135d'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6161cbbda52e723b3d171365'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6

  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6163da4825d4856a2afc98a9'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6163da4925d4857cf1fc98b1'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6163da4a25d4855197fc98b9'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6163da4c25d4850813fc98c1'),
  'aggregator': {'ageScore': 0.1499255873758219

  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6164630617d82522280797ef'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6164668917d82506640797f7'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('6164668917d825388a0797ff'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_

  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('61656026cf52e61a32fca5f5'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('61656026cf52e63becfca5ff'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('616563aacf52e626e2fca609'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('616564d5cf52e678cefca613'),
  'aggregator':

  'aggregator': {'ageScore': 0.14051732012351348, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14051732012351348,
  'updatedAt': datetime.datetime(2021, 10, 15, 8, 10, 16, 953000)},
 {'_id': ObjectId('61686c9d6ee6fbaf87956262'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('616870216ee6fb030a95626c'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
  'updatedAt': datetime.datetime(2021, 10, 15, 10, 24, 55, 237000)},
 {'_id': ObjectId('616870216ee6fb5f2a956276'),
  'aggregator': {'ageScore': 0.14992558737582198, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.14992558737582198,
 

  'recast': 0,
  'score': 0.16154799185703336,
  'updatedAt': datetime.datetime(2021, 10, 15, 13, 0, 1, 916000)},
 {'_id': ObjectId('61697b5227a56e6de5e31992'),
  'aggregator': {'ageScore': 0.16154815256417754, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.16154815256417754,
  'updatedAt': datetime.datetime(2021, 10, 15, 13, 0, 2, 40000)},
 {'_id': ObjectId('61697c7d27a56ec449e3199c'),
  'aggregator': {'ageScore': 0.16193674007626047, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.16193674007626047,
  'updatedAt': datetime.datetime(2021, 10, 15, 13, 5, 1, 510000)},
 {'_id': ObjectId('61697c7d27a56e2c90e319a6'),
  'aggregator': {'ageScore': 0.16193713631550002, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.16193713631550002,
  'updatedAt': datetime.datetime(2021, 10, 15, 13, 5, 1, 815000)},
 {'_id': ObjectId('61697c7e27a56e79f1e319b0'),
  'aggreg

 {'_id': ObjectId('616aa62427a56ee053e32084'),
  'aggregator': {'ageScore': 0.2984247310431608, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.2984247310431608,
  'updatedAt': datetime.datetime(2021, 10, 16, 10, 15, 0, 846000)},
 {'_id': ObjectId('616aa62527a56ebf4ce3208e'),
  'aggregator': {'ageScore': 0.29842611484968884, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.29842611484968884,
  'updatedAt': datetime.datetime(2021, 10, 16, 10, 15, 1, 424000)},
 {'_id': ObjectId('616aa62527a56eb6ade32098'),
  'aggregator': {'ageScore': 0.2984263662339164, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.2984263662339164,
  'updatedAt': datetime.datetime(2021, 10, 16, 10, 15, 1, 529000)},
 {'_id': ObjectId('616aa62527a56e4028e320a2'),
  'aggregator': {'ageScore': 0.2984265673414509, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recas

  'quote': 0,
  'recast': 0,
  'score': 0.6667274431606101,
  'updatedAt': datetime.datetime(2021, 10, 17, 14, 5, 1, 519000)},
 {'_id': ObjectId('616c2fe5e646071c480d513e'),
  'aggregator': {'ageScore': 0.6699455026887788, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.6699455026887788,
  'updatedAt': datetime.datetime(2021, 10, 17, 14, 15, 1, 708000)},
 {'_id': ObjectId('616c2fe5e6460738050d5148'),
  'aggregator': {'ageScore': 0.6699457982952792, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.6699457982952792,
  'updatedAt': datetime.datetime(2021, 10, 17, 14, 15, 1, 763000)},
 {'_id': ObjectId('616c2fe5e646073ba70d5152'),
  'aggregator': {'ageScore': 0.6699460724032416, 'engagementScore': 0},
  'comment': 0,
  'like': 0,
  'quote': 0,
  'recast': 0,
  'score': 0.6699460724032416,
  'updatedAt': datetime.datetime(2021, 10, 17, 14, 15, 1, 814000)},
 {'_id': ObjectId('616c2fe5e646073a4a0d515c')