In [11]:
import mwapi #you might need to install mwapi: !pip3 install mwapi

def content(page_name,project='en.wikipedia',date='2020-01-01T00:00:00Z'):
    """
    Function to get article's content starting from a given date
    project
    input:
        project: (str) project's name; example: 'en.wikipedia'
        page_name: (str) page title; example: Chile
        date: (str) date for first revision '2020-01-01T00:00:00Z'
    
    output: list of dics
        page: page_name
        time: revision timestamp
        rev_id: revision_id
        content: article's content
        
    """
    #open API
    session = mwapi.Session("https://%s.org" % project, user_agent="diego@wikimedia.org")
    #create an empty output list
    output = []
    #call the api using the mwapi 
    for response_doc in session.get(action='query', prop='revisions', titles=page_name, 
                                    rvprop=['ids', 'timestamp','size','content'], rvlimit=50, rvdir="newer",
                                    rvstart=date, rvslots='*',
                                    formatversion=2, continuation=True):
        for rev_doc in response_doc['query']['pages'][0]['revisions']:
            rev_id = rev_doc['revid']
            timestamp = rev_doc['timestamp']
            size = rev_doc['size']
            content = rev_doc['slots']['main']['content']
            output.append({'page':page_name,'time':timestamp,'size':size
                           ,'rev_id':rev_id,'content':content})
    return output


In [12]:
# Example
result = content(page_name='Machine_learning',project='en.wikipedia',date='2021-06-01T00:00:00Z')

In [13]:
result[0]

{'page': 'Machine_learning',
 'time': '2021-06-03T18:44:35Z',
 'size': 94573,
 'rev_id': 1026693554,
 'content': '{{Short description|Study of algorithms that improve automatically through experience}}\n{{For|the journal|Machine Learning (journal)}}\n{{Redirect|Statistical learning|statistical learning in linguistics|statistical learning in language acquisition}}\n{{Artificial intelligence|Major goals}}\n{{Machine learning bar}}\n{{Evolutionary algorithms}}\n\n\'\'\'Machine learning\'\'\' (\'\'\'ML\'\'\') is the study of computer [[algorithm]]s that improve automatically through experience and by the use of data.<ref>{{Cite book|last=Mitchell|first=Tom|url=http://www.cs.cmu.edu/~tom/mlbook.html|title=Machine Learning|publisher=McGraw Hill|year=1997|isbn=0-07-042807-7|location=New York|oclc=36417892|author-link=Tom M. Mitchell}}</ref> It is seen as a part of [[artificial intelligence]]. Machine learning algorithms build a model based on sample data, known as "[[training data]]", in orde

In [14]:
len(result)

57