########################################################################################

## Upsert creatorStats
v2: collect only computed statistics without hashtag property (wait for 'contents' upgrade)

########################################################################################

## (optional) libraries in trigger function

In [1]:
import pymongo # connect to MongoDB
from pymongo import MongoClient
from IPython.display import clear_output # clear output everytime running
from pprint import pprint

## mandatory libraries in trigger function

In [2]:
import json
import sys
# from mongo_client import mongo_client
from bson.objectid import ObjectId
from bson import regex
from datetime import datetime, timedelta
import re
import math

In [3]:
# connect to MongoDB
## define connection URI as role; analytics-admin
connectionUri = 'mongodb+srv://analytics-admin:pnYT55BGWwHePK1M@dev-cluster.fg2e5.mongodb.net/myFirstDatabase?retryWrites=true&w=majority'

## assign client
client = pymongo.MongoClient(connectionUri)

## assign databases
appDb = client['app-db']
analyticsDb = client['analytics-db']

## assign collections
### source collections
contents = appDb['contents']

### destination collections
creatorStats = analyticsDb['creatorStats']
hashtagStats = analyticsDb['hashtagStats']


########################################################################################

########################################################################################

## creator parsing using 'contents'

### note!!!: Be careful! running this cell cause collection changes

### below cell just for filtering in testing

In [89]:
# define content parameters
contentDateThreshold = 14
likedWeight = 1
commentedWeight = 1
recastedWeight = 1
quotedWeight = 1
followedWeight = 0.01
halfLifeHours = 24

In [112]:
# define cursor
cursor = [
    {
        # filter age of contents for only newer than specific days
        # filter only visible contents
        '$match': {
            'createdAt': {
                '$gte': (datetime.utcnow() - timedelta(days=contentDateThreshold)) 
            },
            'visibility': 'publish'
        }
    }, {
        # summarize to get summation of engagements for each content type & user
        '$group': {
            '_id': {
                'authorId': '$author.id', 
                'type': '$type'
            }, 
            'contentCount': {
                '$count': {}
            }, 
            'likedCount': {
                '$sum': '$engagements.like.count'
            }, 
            'commentedCount': {
                '$sum': '$engagements.comment.count'
            }, 
            'recastedCount': {
                '$sum': '$engagements.recast.count'
            }, 
            'quotedCount': {
                '$sum': '$engagements.quote.count'
            }, 
            'updatedAt': {
                '$max': '$updatedAt'
            }
        }
    }, {
        # add calculated fields
        '$addFields': {
            # age score: a decay value as time since last update time
            ## equation: ageScore = e^(-{\lambda}*t)
            'aggregator.ageScore': {
                '$exp': {
                    '$multiply': [
                        {
                            '$multiply': [
                                {
                                    '$divide': [
                                        {
                                            '$subtract': [
                                                datetime.utcnow(), '$updatedAt'
                                            ]
                                        }, 60 * 60 * 1000
                                    ]
                                }, {
                                    '$divide': [
                                        {
                                            '$ln': 2
                                        }, 24
                                    ]
                                }
                            ]
                        }, -1
                    ]
                }
            }, 
            # engagement score: a linear combination result among type of available engagements
            ## equation: engagementScore = {\sigma}_{k}({\beta}_{k}*x_{k})
            'aggregator.engagementScore': {
                '$sum': [
                        {
                            '$multiply': [
                                '$commentedCount', commentedWeight
                            ]
                        }, {
                            '$multiply': [
                                '$recastedCount', recastedWeight
                            ]
                        }, {
                            '$multiply': [
                                '$quotedCount', quotedWeight
                            ]
                        }, {
                            '$multiply': [
                                '$quotedCount', quotedWeight
                            ]
                        # add bias = 1
                        }, 1
                    ]
            }
        }
    }, {
        # summarize to get summation of engagements for each user
        '$group': {
            '_id': '$_id.authorId', 
            'creatorContentCount': {
                '$sum': '$contentCount'
            }, 
            'creatorLikedCount': {
                '$sum': '$likeCount'
            }, 
            'creatorCommentedCount': {
                '$sum': '$commentCount'
            }, 
            'creatorRecastedCount': {
                '$sum': '$recastCount'
            }, 
            'creatorQuotedCount': {
                '$sum': '$quoteCount'
            }, 
            'updatedAt': {
                '$max': '$updatedAt'
            }, 
            'summary': {
                '$push': {
                    'type': '$_id.type', 
                    'typeCount': '$contentCount', 
                    'updatedAt': '$updatedAt', 
                    'aggregator': '$aggregator'
                }
            }
        }
    }, {
        # deconstruct object, named "summary" for furthor calculation
        '$unwind': {
            'path': '$summary', 
            'preserveNullAndEmptyArrays': True
        }
    }, {
        # calculate total engagementScore
        '$addFields': {
            'aggregator.engagementScore': {
                '$sum': [
                    {
                        '$multiply': [
                            '$creatorLikedCount', commentedWeight
                        ]
                    }, {
                        '$multiply': [
                            '$creatorLikedCount', recastedWeight
                        ]
                    }, {
                        '$multiply': [
                            '$creatorCommentedCount', quotedWeight
                        ]
                    }, {
                        '$multiply': [
                            '$creatorQuotedCount', quotedWeight
                        ]
                    # add bias = 1
                    }, 1
                ]
            }, 
            # calculate weights as fractions of content type per total contents
            ## equation: typeWeight(type) = n_{content|type}/N_{content}
            'summary.typeWeight': {
                '$divide': [
                    '$summary.typeCount', '$creatorContentCount'
                ]
            }
        }
    }, {
        # join with 'users' for more information
        '$lookup': {
            'from': 'users', 
            'localField': '_id', 
            'foreignField': '_id', 
            'as': 'userDetail'
        }
    }, {
        # filter for only publish users
        '$match': {
            'userDetail.visibility': 'publish'
        }
    }, {
        # deconstruct 'userDetail' for accessibility
        '$unwind': {
            'path': '$userDetail', 
            'preserveNullAndEmptyArrays': True
        }
    }, {
        # calculate followed score then add to both aggregator
        ## equation: followedScore = (\gamma}*N_{follower}
        '$addFields': {
            'aggregator.followedScore ': {
                '$multiply': [
                    '$userDetail.followedCount', followedWeight
                ]
            }, 
            'summary.aggregator.followedScore': {
                '$multiply': [
                    '$userDetail.followedCount', followedWeight
                ]
            }
        }
    }, {
        # map intermediate result format
        '$project': {
            '_id': 1, 
            'updatedAt': 1, 
            'creatorContentCount': 1, 
            'summary.type': 1, 
            'summary.typeCount': 1, 
            'summary.updatedAt': 1, 
            'summary.typeWeight': 1, 
            'summary.aggregator': 1, 
            'aggregator.engagementScore': 1, 
            'aggregator.followedScore': 1, 
            'ownerAccount': '$userDetail.ownerAccount', 
            'displayId': '$userDetail.displayId', 
            'createdAt': '$userDetail.createdAt', 
            'followedCount': '$userDetail.followedCount', 
            'followerCount': '$userDetail.followerCount', 
            # calculate creator score for each content type
            ## equation: score = ((typeWeight)*(ageScore)*(engagementScore_{type}/engagementScore)) + followedScore
            'summary.score': {
                '$add': [
                    {
                        '$multiply': [
                            '$summary.typeWeight', {
                                '$divide': [
                                    {
                                        '$add': [
                                            '$summary.aggregator.engagementScore', 1
                                        ]
                                    }, {
                                        '$add': [
                                            '$aggregator.engagementScore', 1
                                        ]
                                    }
                                ]
                            }, '$summary.aggregator.ageScore'
                        ]
                    }, '$summary.aggregator.followedScore'
                ]
            }
        }
    }, {
        # undo the previous '$unwind'
        '$group': {
            '_id': '$_id', 
            'ownerAccount': {
                '$max': '$ownerAccount'
            }, 
            'displayId': {
                '$max': '$displayId'
            }, 
            'creatorContentCount': {
                '$max': '$creatorContentCount'
            }, 
            'createdAt': {
                '$max': '$createdAt'
            }, 
            'updatedAt': {
                '$max': '$updatedAt'
            }, 
            'followedCount': {
                '$max': '$followedCount'
            }, 
            'followerCount': {
                '$max': '$followerCount'
            }, 
            'summary': {
                '$push': '$summary'
            }, 
            'aggregator': {
                '$max': '$aggregator'
            }, 
            # add the latest ageScore to overall aggregator
            'ageScore': {
                '$max': '$summary.aggregator.ageScore'
            }
        }
    }, {
        # map final result format
        '$project': {
            '_id': 1, 
            'ownerAccount': 1, 
            'displayId': 1, 
            'createdAt': 1, 
            'updatedAt': 1, 
            'followedCount': 1, 
            'followerCount': 1, 
            'summary': 1, 
            'aggregator.ageScore': '$ageScore', 
            'aggregator.engagementScore': '$aggregator.engagementScore', 
            'aggregator.followedScore': '$aggregator.followedScore', 
            # calculate overall creator score
            ## equation: score = (ageScore*(engagementScore_{type}/engagementScore)) + followedScore
            'score': {
                '$add': [
                    {
                        '$multiply': [
                            {
                                '$divide': [
                                    '$aggregator.engagementScore', '$creatorContentCount'
                                ]
                            }, '$ageScore'
                        ]
                    }, '$aggregator.followedScore'
                ]
            }
        }
#     }, {
#         # upsert to 'userStats' collection
#         '$merge': {
#             'into': {
#                 'db': 'analytics-db', 
#                 'coll': 'creatorStats'
#             }, 
#             'on': '_id', 
#             'whenMatched': 'replace', 
#             'whenNotMatched': 'insert'
#         }
    }
]

In [113]:
# clear the output
clear_output()

# print output
pprint(list(contents.aggregate(cursor)))

[{'_id': ObjectId('614addffec903a9d987eb580'),
  'aggregator': {'ageScore': 0.9915533739654987, 'engagementScore': 1},
  'createdAt': datetime.datetime(2021, 9, 22, 7, 40, 47, 421000),
  'displayId': 'JulapongTec',
  'followedCount': 0,
  'followerCount': 2,
  'ownerAccount': ObjectId('614addc4ec903a2ca27eb573'),
  'score': None,
  'summary': [{'aggregator': {'ageScore': 0.9915533739654987,
                              'engagementScore': 1,
                              'followedScore': 0.0},
               'score': 0.9915533739654987,
               'type': 'short',
               'typeCount': 237,
               'typeWeight': 1.0,
               'updatedAt': datetime.datetime(2021, 10, 14, 11, 5, 5, 412000)}],
  'updatedAt': datetime.datetime(2021, 10, 14, 11, 5, 5, 412000)},
 {'_id': ObjectId('6151abf6809e787ef7568728'),
  'aggregator': {'ageScore': 0.0042083415423630486, 'engagementScore': 1},
  'createdAt': datetime.datetime(2021, 9, 27, 11, 33, 10, 889000),
  'displayId': 'phooc