In [1]:
import requests
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

#Request, from the wikipedia pageviews API, the daily pageviews from July 2015 (earliest available) to the end of September 2018
#for the article about London. Pageviews are for the english language wikipedia only and excludes non-human (web crawler) views.
r = requests.get('https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/London/daily/2015070100/2018093000')

In [2]:
#Create a dataframe from the response's dictionary that it outputted.
data = pd.DataFrame().from_dict(r.json())

#Create an article variable for the purpose of changing the pageview column's name to the name of the article.
article = data.loc[data.index[0], 'items'].get('article')

data.insert(0, 'day', 0)

data.columns = ['day', article]

#Populate the 'day' column with the timestamp of the daily viewcount entry.
for index, row in data.iterrows():
    data.loc[data.index[index], 'day'] = data.loc[data.index[index], article].get('timestamp')

#Change the Fascism column values to only the numerical viewcount portion of the entry.
for index, row in data.iterrows():
    data.loc[data.index[index], article] = data.loc[data.index[index], article].get('views')
    
display(data)


Unnamed: 0,day,London
0,2015070100,14117
1,2015070200,13871
2,2015070300,12061
3,2015070400,11927
4,2015070500,12632
5,2015070600,13897
6,2015070700,16339
7,2015070800,14193
8,2015070900,13622
9,2015071000,16482


In [3]:
#Same as above except request the same information for Paris.
r = requests.get('https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/Paris/daily/2015070100/2018093000')

In [4]:
#Same steps as above, but this time, we make a temporary dataframe and at the end, merge it with the original dataframe.
temp = pd.DataFrame().from_dict(r.json())

article = temp.loc[data.index[0], 'items'].get('article')

temp.insert(0, 'day', 0)

temp.columns = ['day', article]

for index, row in temp.iterrows():
    temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

for index, row in temp.iterrows():
    temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
data = data.merge(temp, on='day')
    
display(data)

Unnamed: 0,day,London,Paris
0,2015070100,14117,8974
1,2015070200,13871,9329
2,2015070300,12061,8274
3,2015070400,11927,7351
4,2015070500,12632,8195
5,2015070600,13897,9832
6,2015070700,16339,9325
7,2015070800,14193,9468
8,2015070900,13622,8439
9,2015071000,16482,8524


In [5]:
#The loop automates the process above, it just needs a list of article names and it will tack their daily
#viewcounts on to the original dataframe.
articles = ['Rome', 'Florence', 'Milan', 'Venice', 'Edinburgh', 'Glasgow', 'New_York_City', 'Chicago', 'Los_Angeles', 'Toronto', 'Vancouver', 'Beijing', 'Hong_Kong', 'Singapore', 'Seoul', 'Taipei', 'Tokyo', 'Kyoto', 'Osaka', 'Shanghai', 'San_Francisco', 'Boston', 'Philadelphia', 'Seattle', 'Richmond,_Virginia', 'Washington,_D.C.', 'Istanbul', 'Karakorum', 'Vienna', 'Budapest', 'Prague', 'Berlin', 'Munich', 'Nuremberg', 'Frankfurt', 'Moscow', 'Saint_Petersburg']

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)
    

Unnamed: 0,day,London,Paris,Rome,Florence,Milan,Venice,Edinburgh,Glasgow,New_York_City,...,Karakorum,Vienna,Budapest,Prague,Berlin,Munich,Nuremberg,Frankfurt,Moscow,Saint_Petersburg
0,2015070100,14117,8974,4832,3285,3069,5028,3009,2793,14290,...,341,4283,5665,5159,4564,4699,1319,3238,4832,3195
1,2015070200,13871,9329,4824,3120,3077,4574,2990,2780,14127,...,318,4014,5793,5076,4673,4340,1524,3447,4839,3077
2,2015070300,12061,8274,4453,2970,2872,4745,2750,2461,13520,...,280,3772,5507,4732,4079,2961,1329,3074,4427,3281
3,2015070400,11927,7351,4347,3064,2821,4417,2596,2453,13651,...,299,3736,5389,4452,3970,2716,1195,2936,4269,2960
4,2015070500,12632,8195,4698,3110,2972,5046,2927,2633,14636,...,443,4056,5815,4892,4394,3269,1366,3061,4141,3219
5,2015070600,13897,9832,5200,3380,3690,5186,3345,2988,15917,...,443,4462,6237,5376,4850,4059,1539,3694,4481,3606
6,2015070700,16339,9325,5415,3379,3312,5109,3295,3092,18684,...,393,4649,6249,5152,4747,3677,1468,4048,4649,3541
7,2015070800,14193,9468,5140,3567,3359,5185,3168,3054,16391,...,408,4421,7030,5036,4546,3422,1295,3551,4434,3304
8,2015070900,13622,8439,4990,3484,3428,5637,3677,2858,15043,...,405,4454,6184,5014,4501,3533,1333,3637,4323,3927
9,2015071000,16482,8524,5258,3228,3564,5156,3074,3046,14525,...,331,4123,5666,4965,4514,3355,1545,3383,4427,3423


In [6]:
display(data['Richmond,_Virginia'])

0       1798
1       1781
2       1659
3       1695
4       1710
5       1943
6       1937
7       1920
8       1921
9       2051
10      1768
11      1767
12      1911
13      1856
14      1822
15      2000
16      1707
17      1543
18      1718
19      1892
20      1832
21      1875
22      2091
23      1764
24      1518
25      1761
26      1937
27      1967
28      1832
29      1910
        ... 
1158    1464
1159    1584
1160    1727
1161    1748
1162    1597
1163    1557
1164    1464
1165    1398
1166    1690
1167    2080
1168    2504
1169    2287
1170    1960
1171    1805
1172    1483
1173    1496
1174    2106
1175    2029
1176    1679
1177    1603
1178    1592
1179    1478
1180    1661
1181    1675
1182    1654
1183    1603
1184    1566
1185    1432
1186    1210
1187    1530
Name: Richmond,_Virginia, Length: 1188, dtype: object

In [7]:
articles = ['Brussels', 'Bruges', 'Antwerp', 'Amsterdam', 'York', 'Rotterdam', 'Tbilisi', 'Warsaw', 'Ankara', 'Jerusalem', 'Tel_Aviv', 'Cairo', 'Alexandria', 'Aleppo', 'Damascus', 'Baghdad', 'Tehran', 'Baku', 'Samarkand', 'Bukhara', 'Athens', 'Corinth', 'Thessaloniki', 'Belgrade', 'Zagreb', 'Sarajevo', 'Bucharest', 'Belfast', 'Dublin', 'Stockholm', 'Copenhagen', 'Oslo', 'Helsinki', 'Mexico_City', 'Rio_de_Janeiro', 'Buenos_Aires', 'Dallas', 'Mecca', 'Riyadh', 'Dubai', 'Hanoi']

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,London,Paris,Rome,Florence,Milan,Venice,Edinburgh,Glasgow,New_York_City,...,Oslo,Helsinki,Mexico_City,Rio_de_Janeiro,Buenos_Aires,Dallas,Mecca,Riyadh,Dubai,Hanoi
0,2015070100,14117,8974,4832,3285,3069,5028,3009,2793,14290,...,2056,1949,3927,3705,2799,3289,4840,1818,10810,1356
1,2015070200,13871,9329,4824,3120,3077,4574,2990,2780,14127,...,2815,2005,3654,3668,2573,3281,4760,1920,11736,1393
2,2015070300,12061,8274,4453,2970,2872,4745,2750,2461,13520,...,1899,1842,3503,3587,2484,3038,4403,1486,9863,1360
3,2015070400,11927,7351,4347,3064,2821,4417,2596,2453,13651,...,2080,2080,3347,3327,2744,2917,4284,1471,9493,1216
4,2015070500,12632,8195,4698,3110,2972,5046,2927,2633,14636,...,1978,1962,3381,3565,3353,3455,4269,1453,10000,1276
5,2015070600,13897,9832,5200,3380,3690,5186,3345,2988,15917,...,2233,1946,4317,3828,3050,3493,4601,1675,11162,1577
6,2015070700,16339,9325,5415,3379,3312,5109,3295,3092,18684,...,2322,2120,4224,3855,2839,3466,4375,1815,10730,1495
7,2015070800,14193,9468,5140,3567,3359,5185,3168,3054,16391,...,2379,2097,3929,3570,2692,3339,4912,1611,11592,1472
8,2015070900,13622,8439,4990,3484,3428,5637,3677,2858,15043,...,2206,1993,3763,3376,2621,3449,4756,1698,10336,1466
9,2015071000,16482,8524,5258,3228,3564,5156,3074,3046,14525,...,2053,1996,3533,3396,2461,3508,4518,1650,9720,1274


In [8]:
articles = ['Bangkok', 'New_Delhi', 'Mumbai', 'Chennai', 'Manchester', 'Cornwall', 'Madrid', 'Barcelona', 'Lisbon', 'Johannesburg', 'Kiev', 'Riga', 'Shiraz', 'Kabul', 'Avignon', 'Marseille', 'Havana', 'Jakarta', 'Sydney', 'Melbourne', 'Detroit'] 

for article in articles:
    r = requests.get(f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia/all-access/user/{article}/daily/2015070100/2018093000')
    
    temp = pd.DataFrame().from_dict(r.json())

    temp.insert(0, 'day', 0)

    temp.columns = ['day', article]

    for index, row in temp.iterrows():
        temp.loc[temp.index[index], 'day'] = temp.loc[data.index[index], article].get('timestamp')

    for index, row in temp.iterrows():
        temp.loc[data.index[index], article] = temp.loc[data.index[index], article].get('views')
    
    data = data.merge(temp, on='day')
    

display(data)

Unnamed: 0,day,London,Paris,Rome,Florence,Milan,Venice,Edinburgh,Glasgow,New_York_City,...,Riga,Shiraz,Kabul,Avignon,Marseille,Havana,Jakarta,Sydney,Melbourne,Detroit
0,2015070100,14117,8974,4832,3285,3069,5028,3009,2793,14290,...,1387,580,1000,861,1862,2087,3059,3653,3670,5160
1,2015070200,13871,9329,4824,3120,3077,4574,2990,2780,14127,...,1380,599,1079,887,1926,1661,3291,3552,3467,5481
2,2015070300,12061,8274,4453,2970,2872,4745,2750,2461,13520,...,1378,646,973,816,1679,1383,2573,3276,3303,5093
3,2015070400,11927,7351,4347,3064,2821,4417,2596,2453,13651,...,1201,558,1183,1086,1657,1290,2633,3015,3158,5063
4,2015070500,12632,8195,4698,3110,2972,5046,2927,2633,14636,...,1219,594,1163,987,2011,1318,2540,3330,3406,5279
5,2015070600,13897,9832,5200,3380,3690,5186,3345,2988,15917,...,1407,634,1149,960,2133,1375,2981,3919,3911,5953
6,2015070700,16339,9325,5415,3379,3312,5109,3295,3092,18684,...,1478,631,1141,1030,2178,1779,3003,3864,3775,5562
7,2015070800,14193,9468,5140,3567,3359,5185,3168,3054,16391,...,1565,691,1182,916,2122,1534,2979,3569,3850,5458
8,2015070900,13622,8439,4990,3484,3428,5637,3677,2858,15043,...,2516,596,1066,880,1991,1275,2952,3644,3748,5355
9,2015071000,16482,8524,5258,3228,3564,5156,3074,3046,14525,...,1438,736,985,869,1991,1285,2853,3452,3581,5391


In [9]:

data['day'] = data['day'].astype(str).str.slice(0,8)

In [10]:
#These articles have too few daily views.
data = data.drop(columns = ['Karakorum'])
display(data)

Unnamed: 0,day,London,Paris,Rome,Florence,Milan,Venice,Edinburgh,Glasgow,New_York_City,...,Riga,Shiraz,Kabul,Avignon,Marseille,Havana,Jakarta,Sydney,Melbourne,Detroit
0,20150701,14117,8974,4832,3285,3069,5028,3009,2793,14290,...,1387,580,1000,861,1862,2087,3059,3653,3670,5160
1,20150702,13871,9329,4824,3120,3077,4574,2990,2780,14127,...,1380,599,1079,887,1926,1661,3291,3552,3467,5481
2,20150703,12061,8274,4453,2970,2872,4745,2750,2461,13520,...,1378,646,973,816,1679,1383,2573,3276,3303,5093
3,20150704,11927,7351,4347,3064,2821,4417,2596,2453,13651,...,1201,558,1183,1086,1657,1290,2633,3015,3158,5063
4,20150705,12632,8195,4698,3110,2972,5046,2927,2633,14636,...,1219,594,1163,987,2011,1318,2540,3330,3406,5279
5,20150706,13897,9832,5200,3380,3690,5186,3345,2988,15917,...,1407,634,1149,960,2133,1375,2981,3919,3911,5953
6,20150707,16339,9325,5415,3379,3312,5109,3295,3092,18684,...,1478,631,1141,1030,2178,1779,3003,3864,3775,5562
7,20150708,14193,9468,5140,3567,3359,5185,3168,3054,16391,...,1565,691,1182,916,2122,1534,2979,3569,3850,5458
8,20150709,13622,8439,4990,3484,3428,5637,3677,2858,15043,...,2516,596,1066,880,1991,1275,2952,3644,3748,5355
9,20150710,16482,8524,5258,3228,3564,5156,3074,3046,14525,...,1438,736,985,869,1991,1285,2853,3452,3581,5391


In [11]:
data.to_csv('HistoryViewsCities.csv')