This code is derived from code at:

https://public.paws.wmcloud.org/User:Jtmorgan/data512_a1_example.ipynb


In [179]:
import json
import requests
import pandas

The following strings represent the HTTP endpoints for the two REST APIs that we'll use.

The parameters for the APIs are encoded in these strings and are represented below by the URL segments that are in braces, such as `{granularity}`. These are placeholders that are replaced by values that we specify in blocks of JSON and pass into the function that makes the API call. (That function is defined later in this file.)

In [180]:
endpoint_pageviews = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

endpoint_legacy = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access-site}/{granularity}/{start}/{end}'

In this section, we specify blocks of JSON that contain the parameters for each of the **five** API calls that we make.

The first **three** parameter blocks are for the Pageviews API. The following **two** parameter blocks are for the Pagecount (legacy) API.

In [181]:
#
# Parameters for getting aggregated current standard pageview data
#
# See: https://wikimedia.org/api/rest_v1/#!/Pageviews_data/get_metrics_pageviews_aggregate_project_access_agent_granularity_start_end
#
params_pageviews_desktop = {
                    "project" : "en.wikipedia.org",
                    "access" : "desktop",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2020100100'
                        }

params_pageviews_mobile_web = {
                    "project" : "en.wikipedia.org",
                    "access" : "mobile-web",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2020100100'
                        }

params_pageviews_mobile_app = {
                    "project" : "en.wikipedia.org",
                    "access" : "mobile-app",
                    "agent" : "user",
                    "granularity" : "monthly",
                    "start" : "2015070100",
                    # for end use 1st day of month following final month of data
                    "end" : '2020100100'
                        }

#
# Parameters for getting aggregated legacy view data 
#
# See: https://wikimedia.org/api/rest_v1/#!/Legacy_data/get_metrics_legacy_pagecounts_aggregate_project_access_site_granularity_start_end
#
params_pagecounts_legacy_desktop = {
                 "project" : "en.wikipedia.org",
                 "access-site" : "desktop-site",
                 "granularity" : "monthly",
                 "start" : "2008010100",
                 # for end use 1st day of month following final month of data
                 "end" : "2016080100"
                    }

params_pagecounts_legacy_mobile = {
                 "project" : "en.wikipedia.org",
                 "access-site" : "mobile-site",
                 "granularity" : "monthly",
                 "start" : "2008010100",
                 # for end use 1st day of month following final month of data
                 "end" : "2016080100"
                    }


In [182]:
#
# Identify myself in the headers of the HTTP request
#
headers = {
    'User-Agent': 'https://github.com/carljparker',
    'From': 'cajopa@uw.edu'
}

In [183]:
def api_call(endpoint,parameters):
    call = requests.get(endpoint.format(**parameters), headers=headers)
    response = call.json()
    
    return response

## Extract pageview data for desktop site ##

In [184]:
pageviews_desktop = api_call(endpoint_pageviews, params_pageviews)

In [185]:
type( pageviews_desktop )

dict

In [186]:
type( pageviews_desktop['items'] )

list

In [187]:
with open("pageviews_desktop_201507-202009.json", "w") as write_file:
    json.dump(pageviews_desktop, write_file)

In [188]:
pandas.read_json( json.dumps( pageviews_desktop[ 'items' ] ), orient='records', convert_dates = False )

Unnamed: 0,project,access,agent,granularity,timestamp,views
0,en.wikipedia,desktop,user,monthly,2015070100,4376666686
1,en.wikipedia,desktop,user,monthly,2015080100,4332482183
2,en.wikipedia,desktop,user,monthly,2015090100,4485491704
3,en.wikipedia,desktop,user,monthly,2015100100,4477532755
4,en.wikipedia,desktop,user,monthly,2015110100,4287720220
...,...,...,...,...,...,...
58,en.wikipedia,desktop,user,monthly,2020050100,3078093615
59,en.wikipedia,desktop,user,monthly,2020060100,2721328557
60,en.wikipedia,desktop,user,monthly,2020070100,2638936132
61,en.wikipedia,desktop,user,monthly,2020080100,2613058239


## Extract pageview data for mobile web ##

In [189]:
pageviews_mobile_web = api_call(endpoint_pageviews, params_pageviews_mobile_web)

In [190]:
type( pageviews_mobile_web )

dict

In [191]:
type( pageviews_mobile_web['items'] )

list

In [192]:
with open("pageviews_mobile-web_201507-202009.json", "w") as write_file:
    json.dump(pageviews_mobile_web, write_file)

In [193]:
pandas.read_json( json.dumps( pageviews_mobile_web[ 'items' ] ), orient='records', convert_dates = False )

Unnamed: 0,project,access,agent,granularity,timestamp,views
0,en.wikipedia,mobile-web,user,monthly,2015070100,3179131148
1,en.wikipedia,mobile-web,user,monthly,2015080100,3192663889
2,en.wikipedia,mobile-web,user,monthly,2015090100,3073981649
3,en.wikipedia,mobile-web,user,monthly,2015100100,3173975355
4,en.wikipedia,mobile-web,user,monthly,2015110100,3142247145
...,...,...,...,...,...,...
58,en.wikipedia,mobile-web,user,monthly,2020050100,5089055354
59,en.wikipedia,mobile-web,user,monthly,2020060100,4552042163
60,en.wikipedia,mobile-web,user,monthly,2020070100,4675166579
61,en.wikipedia,mobile-web,user,monthly,2020080100,4647875180


## Extract pageview data for mobile app ##

In [194]:
pageviews_mobile_app = api_call(endpoint_pageviews, params_pageviews_mobile_app)

In [195]:
type( pageviews_mobile_app )

dict

In [196]:
type( pageviews_mobile_app['items'] )

list

In [197]:
with open("pageviews_mobile-app_201507-202009.json", "w") as write_file:
    json.dump(pageviews_mobile_app, write_file)

In [198]:
pandas.read_json( json.dumps( pageviews_mobile_app[ 'items' ] ), orient='records', convert_dates = False )

Unnamed: 0,project,access,agent,granularity,timestamp,views
0,en.wikipedia,mobile-app,user,monthly,2015070100,109624146
1,en.wikipedia,mobile-app,user,monthly,2015080100,109669149
2,en.wikipedia,mobile-app,user,monthly,2015090100,96221684
3,en.wikipedia,mobile-app,user,monthly,2015100100,94523777
4,en.wikipedia,mobile-app,user,monthly,2015110100,94353925
...,...,...,...,...,...,...
58,en.wikipedia,mobile-app,user,monthly,2020050100,142644741
59,en.wikipedia,mobile-app,user,monthly,2020060100,21933093
60,en.wikipedia,mobile-app,user,monthly,2020070100,134547886
61,en.wikipedia,mobile-app,user,monthly,2020080100,155433481


## Extract pagecount (legacy) data for desktop ##

In [199]:
pagecount_legacy_desktop = api_call(endpoint_legacy, params_pagecount_legacy_desktop)

In [200]:
with open("pagecounts_desktop_200801-201607.json", "w") as write_file:
    json.dump(pagecount_legacy_desktop, write_file)

In [201]:
pandas.read_json( json.dumps( pagecount_legacy_desktop[ 'items' ] ), orient='records', convert_dates = False )

Unnamed: 0,project,access-site,granularity,timestamp,count
0,en.wikipedia,desktop-site,monthly,2008010100,4930902570
1,en.wikipedia,desktop-site,monthly,2008020100,4818393763
2,en.wikipedia,desktop-site,monthly,2008030100,4955405809
3,en.wikipedia,desktop-site,monthly,2008040100,5159162183
4,en.wikipedia,desktop-site,monthly,2008050100,5584691092
...,...,...,...,...,...
98,en.wikipedia,desktop-site,monthly,2016030100,5407676056
99,en.wikipedia,desktop-site,monthly,2016040100,5572235399
100,en.wikipedia,desktop-site,monthly,2016050100,5330532334
101,en.wikipedia,desktop-site,monthly,2016060100,4975092447


## Extract pagecount (legacy) data for mobile ##

In [202]:
pagecount_legacy_mobile = api_call(endpoint_legacy, params_pagecount_legacy_mobile)

In [203]:
with open("pagecounts_mobile_200801-201607.json", "w") as write_file:
    json.dump(pagecount_legacy_mobile, write_file)

In [204]:
pandas.read_json( json.dumps( pagecount_legacy_mobile[ 'items' ] ), orient='records', convert_dates = False )

Unnamed: 0,project,access-site,granularity,timestamp,count
0,en.wikipedia,mobile-site,monthly,2014100100,3091546685
1,en.wikipedia,mobile-site,monthly,2014110100,3027489668
2,en.wikipedia,mobile-site,monthly,2014120100,3278950021
3,en.wikipedia,mobile-site,monthly,2015010100,3485302091
4,en.wikipedia,mobile-site,monthly,2015020100,3091534479
5,en.wikipedia,mobile-site,monthly,2015030100,3330832588
6,en.wikipedia,mobile-site,monthly,2015040100,3222089917
7,en.wikipedia,mobile-site,monthly,2015050100,3334069483
8,en.wikipedia,mobile-site,monthly,2015060100,3038162463
9,en.wikipedia,mobile-site,monthly,2015070100,3254472695
