In [None]:
import pymongo
import pandas as pd
from tqdm import tqdm
from time import sleep

from notebook_modules.database import Database
from notebook_modules.quarter import Quarter
from notebook_modules.half import Half
from notebook_modules.lists import make_list
from notebook_modules.plots import make_frequency_plot, make_frequency_plot_from_csv

In [None]:
db = Database()
assert db.client, "No database client available!"
stackoverflow = db.client["stackoverflow"]
tags = stackoverflow["tags"]
posts = stackoverflow["posts"]

In [None]:
quarters = Quarter.make_quarters(2019, 2020)
halves = Half.make_halves(2019, 2020)

In [None]:
LIMIT = 20
topTags = (tags
.find({}, projection=["TagName", "Count"])
.sort("Count", pymongo.DESCENDING)
.limit(LIMIT))

In [None]:
for quarter in quarters:
    pbar = tqdm(total=LIMIT, unit="query", desc=str(quarter), ascii=True)
    df = pd.DataFrame(columns=["tag", "frequency"])
    for tag in topTags:
        result = posts.count_documents({
            "PostTypeId": 1,
            "CreationDate": {
                "$gte": quarter.start,
                "$lt": quarter.end
            },
            "Tags": tag["TagName"]
        })
        index = len(df.index)
        df.loc[index] = [tag["TagName"], result]
        pbar.update()
    topTags.rewind()
    pbar.close()
    df = df.sort_values("frequency", ascending=False)
    make_list("frequency", str(quarter), df)
    make_frequency_plot("frequency", str(quarter), df)