In this notebook, we'll demonstrate ORES' ability to do topical classifications using the "articletopic" model.  First we'll define some functions for getting the top level topic predictions for an article and then we'll experiment with trying it on a few articles.  

In [1]:
!pip install oresapi



In [2]:
import mwapi
import oresapi

# Functions for getting the top-level prediction

In [3]:
mw_session = mwapi.Session(
    "https://en.wikipedia.org",
    user_agent="ahalfaker@wikimedia.org -- ORES topic demo")
ores_session = oresapi.Session(
    "https://ores.wikimedia.org",
    user_agent="ahalfaker@wikimedia.org -- ORES topic demo")

def get_topic_prediction(page_title):
    doc = mw_session.get(action='query', prop='revisions', titles=page_title, rvprop=['ids'], formatversion=2)
    recent_rev_id = doc['query']['pages'][0]['revisions'][0]['revid']
    score_doc = list(ores_session.score('enwiki', ['articletopic'], recent_rev_id))[0]
    return score_doc['articletopic']['score']

def print_top_level_topics(score):
    proba_topics = [(proba, topic) for topic, proba in score['probability'].items()]
    proba_topics.sort(reverse=True)
    for proba, topic in proba_topics:
        if proba > 0.15:
            print("{0} ({1}%)".format(topic, round(proba, 2)*100))
        else:
            break

    

# Experimenting with the function

In [4]:
print_top_level_topics(get_topic_prediction("Ann Bishop (biologist)"))

STEM.STEM* (97.0%)
Culture.Biography.Biography* (93.0%)
STEM.Biology (75.0%)
STEM.Medicine & Health (59.0%)
History and Society.History (45.0%)
Culture.Biography.Women (21.0%)
Geography.Regions.Europe.Northern Europe (17.0%)
Geography.Regions.Europe.Europe* (15.0%)


In [5]:
print_top_level_topics(get_topic_prediction("Aaron Halfaker"))

STEM.STEM* (93.0%)
STEM.Computing (61.0%)
STEM.Technology (60.0%)
Culture.Biography.Biography* (56.00000000000001%)
Culture.Media.Software (40.0%)
Culture.Media.Media* (31.0%)
Culture.Internet culture (23.0%)
Geography.Regions.Americas.North America (16.0%)


In [6]:
print_top_level_topics(get_topic_prediction("Seattle"))

Geography.Regions.Americas.North America (95.0%)
STEM.STEM* (22.0%)
History and Society.Politics and government (19.0%)
Culture.Visual arts.Visual arts* (17.0%)
