In [None]:
import matplotlib.pyplot as plt
import pymongo

In [None]:
MONGO_DB = "bsocoverage"
MONGO_COLLECTION = "publications"

In [None]:
mongo_database = pymongo.MongoClient()[MONGO_DB]
mongo_collection = mongo_database[MONGO_COLLECTION]

In [None]:
# Total in database
total_count = mongo_collection.count_documents({})
print(f"Total count : {total_count}")
# FOSM
total_fosm = mongo_collection.count_documents({"is_in_fosm": True})
total_fosm_percent = (total_fosm / total_count) * 100
print(
    f"Total FOSM : {total_fosm} (i.e. {total_fosm_percent:.0f} % of the total)")
# OpenAlex
total_openalex = mongo_collection.count_documents({"is_in_openalex": True})
total_openalex_percent = (total_openalex / total_count) * 100
print(
    f"Total OpenAlex : {total_openalex} (i.e. {total_openalex_percent:.0f} % of the total)")
# In FOSM and in OpenAlex
total_fosm_openalex = mongo_collection.count_documents(
    {"is_in_fosm": True, "is_in_openalex": True})
total_fosm_openalex_percent = (total_fosm_openalex / total_count) * 100
print(
    f"Total FOSM and OpenAlex : {total_fosm_openalex} (i.e. {total_fosm_openalex_percent:.0f} % of the total)")

In [None]:
agg_results = mongo_collection.aggregate([
    {"$match": {"is_in_openalex": True, "is_in_fosm": {"$exists": False}}},
    {"$group": {"_id": "$year", "count": {"$sum": 1}}},
    {"$sort": {"_id": 1}}
])
agg_results = [d for d in agg_results]
print(agg_results)

In [None]:
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1])
labels = [f"{d.get('_id')} ({d.get('count')})" for d in agg_results]
print([d for d in agg_results])
count = [d.get("count") for d in agg_results]
ax.bar(labels, count)
plt.xticks(rotation=90)
plt.savefig("repartition_by_year_of_publications_from_openalex_missing_in_fosm.png", bbox_inches='tight')
plt.show()