In [1]:
import requests
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

#Request, from the wikipedia pageviews API, the daily pageviews from July 2015 (earliest available) to the end of September 2018
#for the article about London. Pageviews are for the english language wikipedia only and excludes non-human (web crawler) views.
r = requests.get('https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/Donald_Trump/daily/2015070100/2018093000')

In [2]:
#Create a dataframe from the response's dictionary that it outputted.
data = pd.DataFrame().from_dict(r.json())

#Create an article variable for the purpose of changing the pageview column's name to the name of the article.
article = data.loc[data.index[0], 'items'].get('article')

data.insert(0, 'day', 0)

data.columns = ['day', article]

#Populate the 'day' column with the timestamp of the daily viewcount entry.
for index, row in data.iterrows():
    data.loc[data.index[index], 'day'] = data.loc[data.index[index], article].get('timestamp')

#Change the Fascism column values to only the numerical viewcount portion of the entry.
for index, row in data.iterrows():
    data.loc[data.index[index], article] = data.loc[data.index[index], article].get('views')
    
display(data)


Unnamed: 0,day,Donald_Trump
0,2015070100,81749
1,2015070200,83055
2,2015070300,55174
3,2015070400,43373
4,2015070500,41914
5,2015070600,38109
6,2015070700,43763
7,2015070800,47992
8,2015070900,83148
9,2015071000,58505


In [3]:
#Same as above except request the same information for Paris.
r = requests.get('https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/Barack_Obama/daily/2015070100/2018093000')

In [4]:
#Same steps as above, but this time, we make a temporary dataframe and at the end, merge it with the original dataframe.
temp = pd.DataFrame().from_dict(r.json())

article = temp.loc[data.index[0], 'items'].get('article')

temp.insert(0, 'day', 0)

temp.columns = ['day', article]

for index, row in temp.iterrows():
    temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

for index, row in temp.iterrows():
    temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
data = data.merge(temp, on='day')
    
display(data)

Unnamed: 0,day,Donald_Trump,Barack_Obama
0,2015070100,81749,28001
1,2015070200,83055,25759
2,2015070300,55174,24962
3,2015070400,43373,25379
4,2015070500,41914,25612
5,2015070600,38109,25209
6,2015070700,43763,24133
7,2015070800,47992,22712
8,2015070900,83148,22591
9,2015071000,58505,22615


In [5]:
#The loop automates the process above, it just needs a list of article names and it will tack their daily
#viewcounts on to the original dataframe.
articles = ['Ronald_Reagan', 'George_H._W._Bush', 'Bill_Clinton', 'George_W._Bush', 'Nelson_Mandela', 'Justin_Trudeau', 'Angela_Merkel', 'Theresa_May', 'David_Cameron', 'Marine_Le_Pen', 'Emmanuel_Macron', 'Vladimir_Putin'] 

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)
    

Unnamed: 0,day,Donald_Trump,Barack_Obama,Ronald_Reagan,George_H._W._Bush,Bill_Clinton,George_W._Bush,Nelson_Mandela,Justin_Trudeau,Angela_Merkel,Theresa_May,David_Cameron,Marine_Le_Pen,Emmanuel_Macron,Vladimir_Putin
0,2015070100,81749,28001,10885,9294,12072,11888,9571,2107,7882,764,3978,468,125,8219
1,2015070200,83055,25759,12377,8679,9687,11292,9082,1800,6071,675,3909,540,144,7029
2,2015070300,55174,24962,12544,9014,9610,10913,8775,1530,4586,564,3514,415,172,6757
3,2015070400,43373,25379,13741,8635,9581,10689,6633,1461,4345,520,3359,345,142,6677
4,2015070500,41914,25612,13575,9452,10662,12457,8179,1552,7668,576,4196,684,349,7179
5,2015070600,38109,25209,12306,9775,9693,16562,8827,1510,17621,548,4475,1043,255,7588
6,2015070700,43763,24133,13993,9513,11327,13911,8324,1732,16018,468,4882,611,171,7601
7,2015070800,47992,22712,11712,8545,10348,11272,7900,1590,12491,2258,6042,1278,168,7990
8,2015070900,83148,22591,11356,10806,12953,13904,7698,1537,9920,766,4808,689,226,9795
9,2015071000,58505,22615,12911,10688,12746,14417,6558,1198,7272,846,4080,415,371,8994


In [6]:
#The loop automates the process above, it just needs a list of article names and it will tack their daily
#viewcounts on to the original dataframe.
articles = ['Muammar_Gaddafi', 'Jacob_Zuma', 'Robert_Mugabe', 'Narendra_Modi', 'Indira_Gandhi', 'Deng_Xiaoping', 'Bernie_Sanders', 'Hillary_Clinton', 'Amy_Schumer', 'Michael_Jackson', 'Kurt_Cobain', 'Jimi_Hendrix', 'Justin_Bieber', 'Eric_Clapton', 'Leonard_Cohen', 'Xi_Jinping', 'Mikhail_Gorbachev', 'Benjamin_Netanyahu', 'Saddam_Hussein', 'David_Duke', 'Richard_B._Spencer', 'Cenk_Uygur', 'Elizabeth_Warren']

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,Donald_Trump,Barack_Obama,Ronald_Reagan,George_H._W._Bush,Bill_Clinton,George_W._Bush,Nelson_Mandela,Justin_Trudeau,Angela_Merkel,...,Eric_Clapton,Leonard_Cohen,Xi_Jinping,Mikhail_Gorbachev,Benjamin_Netanyahu,Saddam_Hussein,David_Duke,Richard_B._Spencer,Cenk_Uygur,Elizabeth_Warren
0,2015070100,81749,28001,10885,9294,12072,11888,9571,2107,7882,...,4390,3863,2079,2806,2286,5403,1773,79,1332,2751
1,2015070200,83055,25759,12377,8679,9687,11292,9082,1800,6071,...,4698,3365,2107,2605,2330,5205,1430,51,1756,2377
2,2015070300,55174,24962,12544,9014,9610,10913,8775,1530,4586,...,4993,3077,1785,2539,2221,5292,1220,33,1360,2276
3,2015070400,43373,25379,13741,8635,9581,10689,6633,1461,4345,...,7664,3357,1573,2565,2368,5748,1258,47,1099,1759
4,2015070500,41914,25612,13575,9452,10662,12457,8179,1552,7668,...,6552,3567,1870,3080,2403,6241,1254,41,1111,1652
5,2015070600,38109,25209,12306,9775,9693,16562,8827,1510,17621,...,4967,5751,2249,3080,2484,5791,1396,618,1158,2068
6,2015070700,43763,24133,13993,9513,11327,13911,8324,1732,16018,...,8971,4426,2364,2868,2634,5924,1379,111,1227,2323
7,2015070800,47992,22712,11712,8545,10348,11272,7900,1590,12491,...,6369,3705,2866,2712,2524,5935,2001,64,1265,2104
8,2015070900,83148,22591,11356,10806,12953,13904,7698,1537,9920,...,5276,3987,2817,2824,2584,5250,1326,70,1106,2956
9,2015071000,58505,22615,12911,10688,12746,14417,6558,1198,7272,...,5081,3359,2535,2734,2408,4840,1973,54,1185,2564


In [7]:
articles = ['Nancy_Pelosi', 'Chuck_Schumer', 'Ted_Cruz', 'Paul_Ryan', 'Mitch_McConnell', 'Stephen_Harper', 'Pierre_Trudeau', 'James_Brown', 'John_Lennon', 'Paul_McCartney', 'Britney_Spears', 'Mariah_Carey', 'Rihanna', 'Jennifer_Lopez', 'Celine_Dion', 'Ella_Fitzgerald', 'Bob_Marley', 'Ben_Shapiro', 'Ann_Coulter', 'Whoopi_Goldberg', 'Chelsea_Handler', 'Anderson_Cooper', 'Tucker_Carlson', 'Tupac_Shakur', 'The_Notorious_B.I.G.', 'Snoop_Dogg', 'Robert_De_Niro', 'Al_Pacino', 'Marlon_Brando', 'Leonardo_DiCaprio', 'Harvey_Weinstein', 'Ben_Affleck', 'Mark_Wahlberg', 'Idris_Elba', 'Angelina_Jolie', 'Marilyn_Monroe', 'Natalie_Portman']

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,Donald_Trump,Barack_Obama,Ronald_Reagan,George_H._W._Bush,Bill_Clinton,George_W._Bush,Nelson_Mandela,Justin_Trudeau,Angela_Merkel,...,Al_Pacino,Marlon_Brando,Leonardo_DiCaprio,Harvey_Weinstein,Ben_Affleck,Mark_Wahlberg,Idris_Elba,Angelina_Jolie,Marilyn_Monroe,Natalie_Portman
0,2015070100,81749,28001,10885,9294,12072,11888,9571,2107,7882,...,6871,8122,13432,920,82695,19874,9137,15906,13237,8550
1,2015070200,83055,25759,12377,8679,9687,11292,9082,1800,6071,...,7488,7519,12798,915,38884,19226,7794,16353,13411,8329
2,2015070300,55174,24962,12544,9014,9610,10913,8775,1530,4586,...,9982,6833,13791,900,28725,20110,8075,15724,13785,8395
3,2015070400,43373,25379,13741,8635,9581,10689,6633,1461,4345,...,10147,7081,14088,789,22523,19357,9410,18889,14481,9481
4,2015070500,41914,25612,13575,9452,10662,12457,8179,1552,7668,...,9595,8307,14968,902,19921,30938,10002,18948,22903,10219
5,2015070600,38109,25209,12306,9775,9693,16562,8827,1510,17621,...,7934,8385,14819,1217,22217,18875,11914,20304,20896,10056
6,2015070700,43763,24133,13993,9513,11327,13911,8324,1732,16018,...,6919,7020,13466,1356,18954,15618,10101,18415,18931,9915
7,2015070800,47992,22712,11712,8545,10348,11272,7900,1590,12491,...,6924,6681,13020,1100,19187,16632,9424,17919,18210,8772
8,2015070900,83148,22591,11356,10806,12953,13904,7698,1537,9920,...,6674,6525,14395,1037,19166,16362,8148,15461,15001,8865
9,2015071000,58505,22615,12911,10688,12746,14417,6558,1198,7272,...,6761,7123,14225,948,24395,20259,8474,14886,14703,10520


In [8]:
articles = ['Muhammad_Ali', 'Joe_Frazier', 'Mike_Tyson', 'Floyd_Mayweather_Jr.', 'Manny_Pacquiao', 'Conor_McGregor', 'Ronda_Rousey', 'Tom_Brady', 'Michael_Jordan', 'LeBron_James']

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,Donald_Trump,Barack_Obama,Ronald_Reagan,George_H._W._Bush,Bill_Clinton,George_W._Bush,Nelson_Mandela,Justin_Trudeau,Angela_Merkel,...,Muhammad_Ali,Joe_Frazier,Mike_Tyson,Floyd_Mayweather_Jr.,Manny_Pacquiao,Conor_McGregor,Ronda_Rousey,Tom_Brady,Michael_Jordan,LeBron_James
0,2015070100,81749,28001,10885,9294,12072,11888,9571,2107,7882,...,11700,1709,11877,213,4777,36444,12458,5639,24863,18390
1,2015070200,83055,25759,12377,8679,9687,11292,9082,1800,6071,...,10466,1524,9480,191,5116,24610,22642,5759,20208,17245
2,2015070300,55174,24962,12544,9014,9610,10913,8775,1530,4586,...,13583,1868,9280,180,4146,33507,17479,5701,18120,15000
3,2015070400,43373,25379,13741,8635,9581,10689,6633,1461,4345,...,19454,2793,14947,165,4193,25802,13252,5914,18961,14276
4,2015070500,41914,25612,13575,9452,10662,12457,8179,1552,7668,...,31404,3499,10866,214,4287,26495,10975,5667,17781,13627
5,2015070600,38109,25209,12306,9775,9693,16562,8827,1510,17621,...,15545,2097,10015,195,4446,24139,11563,5721,17297,14626
6,2015070700,43763,24133,13993,9513,11327,13911,8324,1732,16018,...,12991,2857,9996,467,8293,25096,11462,5598,19014,15653
7,2015070800,47992,22712,11712,8545,10348,11272,7900,1590,12491,...,13545,3253,10223,394,5622,29705,11563,6045,16955,14140
8,2015070900,83148,22591,11356,10806,12953,13904,7698,1537,9920,...,12822,1907,9319,277,5433,35036,11296,7685,18882,17249
9,2015071000,58505,22615,12911,10688,12746,14417,6558,1198,7272,...,11942,1718,8996,326,5803,46931,13188,7634,16692,14495


In [9]:
articles = ['Wayne_Gretzky', 'Sidney_Crosby', 'Joe_Montana', 'Jimmy_Kimmel', 'Stephen_Colbert', 'Chris_Rock', 'Louis_C.K.', 'Mark_Zuckerberg', 'Steve_Jobs', 'Warren_Buffett', 'George_Soros', 'Bill_Gates', 'Jeff_Bezos', 'Bruce_Lee', 'Jackie_Chan', 'Jet_Li'] 

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,Donald_Trump,Barack_Obama,Ronald_Reagan,George_H._W._Bush,Bill_Clinton,George_W._Bush,Nelson_Mandela,Justin_Trudeau,Angela_Merkel,...,Louis_C.K.,Mark_Zuckerberg,Steve_Jobs,Warren_Buffett,George_Soros,Bill_Gates,Jeff_Bezos,Bruce_Lee,Jackie_Chan,Jet_Li
0,2015070100,81749,28001,10885,9294,12072,11888,9571,2107,7882,...,5157,15140,27143,10361,2856,17070,2343,12908,10959,3735
1,2015070200,83055,25759,12377,8679,9687,11292,9082,1800,6071,...,5025,12915,41366,11750,3224,21325,2262,12801,10512,4006
2,2015070300,55174,24962,12544,9014,9610,10913,8775,1530,4586,...,5193,10134,27632,10653,3257,16184,1968,13646,11552,5298
3,2015070400,43373,25379,13741,8635,9581,10689,6633,1461,4345,...,5143,9151,22133,8595,3039,14825,1900,13708,11405,4190
4,2015070500,41914,25612,13575,9452,10662,12457,8179,1552,7668,...,5410,9668,21299,8512,2482,14646,1811,13872,12227,4395
5,2015070600,38109,25209,12306,9775,9693,16562,8827,1510,17621,...,5324,10137,21966,10468,3252,15657,2455,15369,10809,4156
6,2015070700,43763,24133,13993,9513,11327,13911,8324,1732,16018,...,5373,10902,22085,13136,3299,16383,2678,17032,9557,4141
7,2015070800,47992,22712,11712,8545,10348,11272,7900,1590,12491,...,5718,10559,20016,10751,3715,16989,2655,16085,10820,4371
8,2015070900,83148,22591,11356,10806,12953,13904,7698,1537,9920,...,5472,17411,19608,10451,3415,16191,2625,15395,9579,4965
9,2015071000,58505,22615,12911,10688,12746,14417,6558,1198,7272,...,5351,10376,17475,9123,2948,14512,2276,15483,10251,4323


In [10]:

data['day'] = data['day'].astype(str).str.slice(0,8)

In [11]:
data.to_csv('HistoryViewsModerns.csv')