In [None]:
import pymongo
import pandas as pd
from tqdm import tqdm

from notebook_modules.database import Database
from notebook_modules.quarter import Quarter
from notebook_modules.lists import make_list
from notebook_modules.plots import make_frequency_plot

In [None]:
db = Database()
assert db.client, "No database client available!"
stackoverflow = db.client["stackoverflow"]
tags = stackoverflow["tags"]
posts = stackoverflow["posts"]

In [None]:
quarters = Quarter.make_quarters(2019, 2020)

In [None]:
for quarter in tqdm(quarters, unit="quarter", ascii=True):
    df = pd.DataFrame(columns=["tag", "frequency"])
    cursor = posts.aggregate([
        {"$match": {
            "PostTypeId": 1,
            "CreationDate": {
                "$gte": quarter.start,
                "$lt": quarter.end
            }
        }},
        {"$unwind": "$Tags"},
        {"$group": {
            "_id": "$Tags",
            "frequency": {"$sum": 1}
        }},
        {"$sort": {
            "frequency": -1
        }},
        {"$limit": 50}
    ])
    result = list(cursor)
    if len(result) == 0:
        df = pd.DataFrame(columns=["tag", "frequency"])
    else:
        df = pd.DataFrame(result)
        df.columns = ["tag", "frequency"]
    make_list("frequency", str(quarter), df)
    make_frequency_plot("frequency", str(quarter), df)