In [None]:
import pymongo
import pandas as pd
from tqdm import tqdm

from notebook_modules.database import Database
from notebook_modules.quarter import Quarter
from notebook_modules.lists import make_list
from notebook_modules.plots import make_tag_plot

In [None]:
db = Database()
assert db.client, "No database client available!"
stackoverflow = db.client["stackoverflow"]
tags = stackoverflow["tags"]
posts = stackoverflow["posts"]

In [None]:
quarters = Quarter.make_quarters(2008, 2021)

In [None]:
topTags = (tags
.find({}, projection=["TagName", "Count"])
.sort("Count", pymongo.DESCENDING)
.limit(10))

In [None]:
for tag in list(topTags):
    df = pd.DataFrame(columns=["quarter", "count"])
    for quarter in tqdm(quarters, unit="query", desc=tag["TagName"], ascii=True):
        result = posts.count_documents({
            "PostTypeId": 1,
            "CreationDate": {
                "$gte": quarter.start,
                "$lt": quarter.end
            },
            "Tags": tag["TagName"]
        })
        index = len(df.index)
        df.loc[index] = [str(quarter), result]
    make_list("count", tag["TagName"], df)
    make_tag_plot("count", tag["TagName"], df)

In [None]:
for tag in list(topTags):
    df = pd.DataFrame(columns=["quarter", "views"])
    for quarter in tqdm(quarters, unit="query", desc=tag["TagName"], ascii=True):
        cursor = posts.aggregate([
            {"$match": {
                "PostTypeId": 1,
                "CreationDate": {
                    "$gte": quarter.start,
                    "$lt": quarter.end
                },
                "Tags": tag["TagName"]
            }},
            {"$group": {
                "_id": None,
                "views": {"$sum": "$ViewCount"}
            }},
        ])
        index = len(df.index)
        result = list(cursor)
        if len(result) == 0:
            df.loc[index] = [str(quarter), 0]
        else:
            df.loc[index] = [str(quarter), result[0]["views"]]
    make_list("views", tag["TagName"], df)
    make_tag_plot("views", tag["TagName"], df)

In [None]:
for tag in list(topTags):
    df = pd.DataFrame(columns=["quarter", "score"])
    for quarter in tqdm(quarters, unit="query", desc=tag["TagName"], ascii=True):
        cursor = posts.aggregate([
            {"$match": {
                "PostTypeId": 1,
                "CreationDate": {
                    "$gte": quarter.start,
                    "$lt": quarter.end
                },
                "Tags": tag["TagName"]
            }},
            {"$group": {
                "_id": None,
                "score": {"$avg": "$Score"}
            }},
        ])
        index = len(df.index)
        result = list(cursor)
        
        if len(result) == 0:
            df.loc[index] = [str(quarter), 0]
        else:
            df.loc[index] = [str(quarter), result[0]["score"]]
    make_list("score", tag["TagName"], df)
    make_tag_plot("score", tag["TagName"], df)