# model training
v1: train then update all collection

## (optional) libraries in trigger function

In [1]:
import pymongo # connect to MongoDB
from pymongo import MongoClient
from IPython.display import clear_output # clear output everytime running
from pprint import pprint

## mandatory libraries in trigger function

In [2]:
import json
import sys
# from mongo_client import mongo_client
from bson.objectid import ObjectId
from bson import regex
from datetime import datetime, timedelta
import re
import math

import pickle
import numpy as np
import pandas as pd

In [3]:
# connect to MongoDB
## define connection URI as role; analytics-admin
connectionUri = 'mongodb+srv://analytics-admin:pnYT55BGWwHePK1M@dev-cluster.fg2e5.mongodb.net/myFirstDatabase?retryWrites=true&w=majority'

## assign client
client = pymongo.MongoClient(connectionUri)

## assign databases
appDb = client['app-db']
analyticsDb = client['analytics-db']

## assign collections
### source collections
contents = appDb['contents']
relationships = appDb['relationships']
users = appDb['users']

### destination collections
creatorStats = analyticsDb['creatorStats']
hashtagStats = analyticsDb['hashtagStats']
contentStats = analyticsDb['contentStats']
mlArtifacts = analyticsDb['mlArtifacts']

In [4]:
# define content parameters
updatedAtThreshold = 14

In [14]:
datasetCursor = [
    {
        # filter age of contents for only newer than specific days
        # filter only visible contents
        '$match': {
            'updatedAt': {
                '$gte': (datetime.utcnow() - timedelta(days=contentDateThreshold)) 
            }
        }
    }, {
        '$lookup': {
            'from': 'creatorStats', 
            'localField': 'authorId', 
            'foreignField': '_id', 
            'as': 'userStats'
        }
    }, {
        '$unwind': {
            'path': '$userStats', 
            'preserveNullAndEmptyArrays': True
        }
    }, {
        '$project': {
            '_id': 1,
            'likeCount': 1,
            'commentCount': 1,
            'recastCount': 1,
            'quoteCount': 1,
            'photoCount': 1,
            'characterLength': 1,
            'creatorContentCount' :'$userStats.contentCount',
            'creatorLikedCount': '$userStats.creatorLikedCount',
            'creatorCommentedCount': '$userStats.creatorCommentedCount',
            'creatorRecastedCount': '$userStats.creatorRecastedCount',
            'creatorQuotedCount': '$userStats.creatorQuotedCount',
            'ageScore': '$aggregator.ageScore'
        }
    }
]

In [15]:
# clear the output
clear_output()

# print output
pprint(list(contentStats.aggregate(datasetCursor)))

[{'_id': ObjectId('617a3fe741824b8714ae9be5'),
  'ageScore': 0.5053877431399482,
  'characterLength': 284,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617a3fe941824b869eae9bf1'),
  'ageScore': 0.5053877431399482,
  'characterLength': 201,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617a3fed41824b1eb9ae9bfd'),
  'ageScore': 0.03261982618972318,
  'characterLength': 284,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount':

 {'_id': ObjectId('617aec30ad62570f78d7332f'),
  'ageScore': 0.04646518114963759,
  'characterLength': 267,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617af332ad62578885d73338'),
  'ageScore': 0.5053877431399482,
  'characterLength': 20,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617af335ad625760d8d73344'),
  'ageScore': 0.5053877431399482,
  'characterLength': 121,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 

  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 1},
 {'_id': ObjectId('617b9bfa47928d0c3c4d159b'),
  'ageScore': 0.06666868113500983,
  'characterLength': 172,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617b9bfc47928d51c04d15a4'),
  'ageScore': 0.06786847221352064,
  'characterLength': 185,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 1},
 {'_id': ObjectId('617ba4a947928d169c4d15ae'),
  'ageScore': 0.07352865380745216,
  'characterLength': 1,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 5,
  'creatorLik

  'characterLength': 128,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617c36a36ddb63f6f97ed963'),
  'ageScore': 0.5053877431399482,
  'characterLength': 148,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617c36a66ddb6332d67ed96e'),
  'ageScore': 0.5053877431399482,
  'characterLength': 102,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617c36a86ddb63604e7

  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617cbfbf6ddb6325ef7edcf3'),
  'ageScore': 0.1213845443986017,
  'characterLength': 79,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617cc46d6ddb63441e7edcfc'),
  'ageScore': 0.12255704806056435,
  'characterLength': 156,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617cd4d56ddb63c2097edd05'),
  'ageScore': 0.5053877431399482,
  'characterLength': 69,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'cre

  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d688a6ddb6367987ee146'),
  'ageScore': 0.17167851144102575,
  'characterLength': 274,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d688a6ddb63028c7ee14f'),
  'ageScore': 0.17167866156648381,
  'characterLength': 288,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d688c6ddb635d227ee158'),
  'ageScore': 0.17168092585995612,
  'characterLength': 167,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCoun

  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d84a46ddb6377e17ee531'),
  'ageScore': 0.18187845834419947,
  'characterLength': 185,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d87676ddb637d787ee53a'),
  'ageScore': 0.5053877431399482,
  'characterLength': 173,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d87696ddb6353ea7ee545'),
  'ageScore': 0.5053877431399482,
  'characterLength': 227,
  'comm

  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d8fd56ddb6307fc7ee9aa'),
  'ageScore': 0.18610761056160058,
  'characterLength': 108,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 27,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d8fd96ddb63a4917ee9b3'),
  'ageScore': 0.18611374861256783,
  'characterLength': 106,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 27,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617d8fdf6ddb6312007ee9bc'),
  'ageScore': 0.18612295532200704,
  'characterLength': 97,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorConte

  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617dce736ddb6382c87eedf3'),
  'ageScore': 0.5053877431399482,
  'characterLength': 212,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617dce756ddb637b507eedfe'),
  'ageScore': 0.5053877431399482,
  'characterLength': 102,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617dce7c6ddb6323ad7eee09'),
  'ageScore': 0.21166265565099582,
  'characterLength': 212,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creato

  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e14d86ddb63d0e07ef254'),
  'ageScore': 0.2445697016215124,
  'characterLength': 274,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e14da6ddb6376f67ef25d'),
  'ageScore': 0.24457383769885363,
  'characterLength': 260,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e15ed6ddb636b507ef266'),
  'ageScore': 0.24511308537466703,
  'characterLength': 260,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuo

  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e69366ddb6357b27ef6a7'),
  'ageScore': 0.2908394700363865,
  'characterLength': 350,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e6ca76ddb633b117ef6b0'),
  'ageScore': 0.5053877431399482,
  'characterLength': 324,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e6caf6ddb63a75e7ef6bb'),
  'ageScore': 0.2929196978795689,
  'characterLength': 160,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'cr

  'recastCount': 0},
 {'_id': ObjectId('617e9a6e6ddb632d797efb00'),
  'ageScore': 0.32177676500367114,
  'characterLength': 165,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e9a716ddb63c02c7efb09'),
  'ageScore': 0.3217844527010814,
  'characterLength': 344,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617e9a746ddb6367b57efb12'),
  'ageScore': 0.3217904806245271,
  'characterLength': 197,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount':

 {'_id': ObjectId('617ebd906ddb63d2757eff59'),
  'ageScore': 0.5053877431399482,
  'characterLength': 275,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617ebd936ddb63d9967eff64'),
  'ageScore': 0.5053877431399482,
  'characterLength': 89,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617ebd956ddb6379d27eff6f'),
  'ageScore': 0.5053877431399482,
  'characterLength': 120,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 

  'ageScore': 0.5053877431399482,
  'characterLength': 220,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617eeec46ddb63c2597f03b9'),
  'ageScore': 0.5053877431399482,
  'characterLength': 183,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617eeec76ddb63034b7f03c4'),
  'ageScore': 0.38263762226427683,
  'characterLength': 155,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'

  'characterLength': 84,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f201f93707260edcb170b'),
  'ageScore': 0.4234458575734207,
  'characterLength': 236,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f23789370728074cb1714'),
  'ageScore': 0.5053877431399482,
  'characterLength': 231,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f237b937072512ccb1

  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f5bc29370722575cb1b68'),
  'ageScore': 0.5053877431399482,
  'characterLength': 237,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f5bc39370729305cb1b73'),
  'ageScore': 0.5053877431399482,
  'characterLength': 158,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f5bc9937072757bcb1b7e'),
  'ageScore': 0.

  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f940d9370725fb5cb1fc5'),
  'ageScore': 0.5372810694019136,
  'characterLength': 115,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f977c937072e034cb1fce'),
  'ageScore': 0.5410830380383779,
  'characterLength': 218,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617f978293707254a0cb1fd9'),
  'ageScore': 0.5411082937430625,
  'ch

  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fbaca7ea1b56f9ea604aa'),
  'ageScore': 0.5817726237543899,
  'characterLength': 232,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fbe287ea1b5c538a604b3'),
  'ageScore': 0.5858089598800419,
  'characterLength': 312,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fbe2a7ea1b5357ca604be'),
  'ageScore': 0.5858191065639635,
  'characterLength': 138,
  'commentCount': 0,
  'creatorCommente

  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fd6d57ea1b54d18a6090e'),
  'ageScore': 0.6162591874616774,
  'characterLength': 244,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fd6d67ea1b53a9aa60917'),
  'ageScore': 0.6162690260246269,
  'characterLength': 155,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fd6d87ea1b5bd05a60920'),
  'ageScore': 0.6162748501323835,
  'characterLength': 206,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorConte

  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fef5f7ea1b5a8a9a60d57'),
  'ageScore': 0.6481171764438252,
  'characterLength': 116,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fef627ea1b5664aa60d62'),
  'ageScore': 0.6481289431184973,
  'characterLength': 155,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('617fef657ea1b52373a60d6d'),
  'ageScore': 0.6481471317084991,
  'characterLength': 183,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creato

  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618008137ea1b5a2d7a611ba'),
  'ageScore': 0.6818464256875902,
  'characterLength': 141,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618008157ea1b5bb7fa611c5'),
  'ageScore': 0.6818587883116564,
  'characterLength': 151,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618008197ea1b50431a611d0'),
  'ageScore': 0.6818807626086845,
  'characterLength': 135,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuo

  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618020bc7ea1b51047a6160f'),
  'ageScore': 0.7172675363506577,
  'characterLength': 163,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618020bf7ea1b53fdba6161a'),
  'ageScore': 0.7172839247991308,
  'characterLength': 201,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618020c17ea1b5158ea61625'),
  'ageScore': 0.7172969759802804,
  'characterLength': 177,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCoun

  'recastCount': 0},
 {'_id': ObjectId('618039347ea1b5866da61a5e'),
  'ageScore': 0.7542329921890697,
  'characterLength': 108,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618039397ea1b5e135a61a69'),
  'ageScore': 0.7542640761073869,
  'characterLength': 108,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('61803cb97ea1b528c8a61a72'),
  'ageScore': 0.7597032962932926,
  'characterLength': 187,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount'

  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618051ef7ea1b54409a61e70'),
  'ageScore': 0.793530627655105,
  'characterLength': 208,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618051f07ea1b58403a61e79'),
  'ageScore': 0.7935407944277221,
  'characterLength': 100,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 578,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618051f57ea1b58375a61e82'),
  'ageScore': 0.7935

  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('61806a817ea1b51079a62258'),
  'ageScore': 0.8345995531238785,
  'characterLength': 205,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('61806a837ea1b51990a62263'),
  'ageScore': 0.8346168078779707,
  'characterLength': 170,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('61806a867ea1b56c19a6226e'),
  'ageScore': 0.834636520381286,
  'characterLength': 139,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorCont

  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('6180949e7ea1b560c1a626b1'),
  'ageScore': 0.9100031602742009,
  'characterLength': 158,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618094a07ea1b54ad9a626bc'),
  'ageScore': 0.9100198859509622,
  'characterLength': 171,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('618094a37ea1b54100a626c7'),
  'ageScore': 0.9100350422640144,
  'characterLength': 191,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQu

  'photoCount': 0,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('6180bed07ea1b55067a62b0a'),
  'ageScore': 0.9923837065858103,
  'characterLength': 100,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 661,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('6180c24e7ea1b5435fa62b13'),
  'ageScore': 0.9995265096937703,
  'characterLength': 247,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'creatorRecastedCount': 0,
  'likeCount': 0,
  'photoCount': 1,
  'quoteCount': 0,
  'recastCount': 0},
 {'_id': ObjectId('6180c2527ea1b52741a62b1e'),
  'ageScore': 0.9995513680913592,
  'characterLength': 316,
  'commentCount': 0,
  'creatorCommentedCount': 0,
  'creatorContentCount': 1437,
  'creatorLikedCount': 0,
  'creatorQuotedCount': 0,
  'c

In [16]:
content_features = pd.DataFrame(list(contentStats.aggregate(datasetCursor)))

In [17]:
content_features.head()

Unnamed: 0,_id,likeCount,commentCount,recastCount,quoteCount,photoCount,characterLength,creatorContentCount,creatorLikedCount,creatorCommentedCount,creatorRecastedCount,creatorQuotedCount,ageScore
0,617a3fe741824b8714ae9be5,0,0,0,0,1,284,1437,0,0,0,0,0.505388
1,617a3fe941824b869eae9bf1,0,0,0,0,1,201,1437,0,0,0,0,0.505388
2,617a3fed41824b1eb9ae9bfd,0,0,0,0,1,284,661,0,0,0,0,0.03262
3,617a3fef41824b762cae9c06,0,0,0,0,1,201,661,0,0,0,0,0.03262
4,617a46ed41824b7404ae9c0f,0,0,0,0,0,61,1437,0,0,0,0,0.505388
