In [1]:
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl
import sqlite3
from collections import deque

In [2]:
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

In [3]:
seed_page = "https://en.wikipedia.org"
start_page = "/wiki/Mathematics"

In [4]:
def create_cursor():
    conn = sqlite3.connect('../data/wiki_crawler.sqlite')
    cur = conn.cursor()    
    cur.execute('''
                CREATE TABLE IF NOT EXISTS WikiPages
                (id INTEGER PRIMARY KEY, title TEXT, URL TEXT UNIQUE, retrived BOOLEAN, citedCount INTEGER)
                ''')
    cur.execute("""
                CREATE TABLE IF NOT EXISTS Cites
                (from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))
                """)
    return conn, cur

In [5]:
conn, cur = create_cursor()

In [7]:
def crawl_with_given_page(conn, cur, start_page = "/wiki/Mathematics", time = 100, seed_page = "https://en.wikipedia.org"):
    cur.execute('SELECT retrived from WikiPages WHERE URL = ? LIMIT 1', (start_page, ))
    try:
        retrived = cur.fetchone()[0]
        if retrived == 1:
            print("The given start_page has been crawled already.")
            return
        print("Start crawling")
    except:
        print("Start crawling")
        
    queue = deque([start_page])
    count = 0
    total_count = 0
    while queue and count<time:
        print(count)
        url = queue.popleft()

        cur.execute('SELECT retrived from WikiPages WHERE URL = ? LIMIT 1', (url, ))
        try:
            retrived = cur.fetchone()[0]
            if retrived == 1:
                print("The given website has been crawled already.")
                continue
            cur.execute('UPDATE WikiPages SET retrived = ? WHERE URL = ?', (1, url))
        except:
            total_count += 1
            cur.execute('INSERT INTO WikiPages (URL, retrived, citedCount) VALUES (?, 1, 0)', (url, ))

        page_count = crawl_one_page(url, cur, adding = True, queue = queue)
        
        if page_count>-1:    
            count += 1
            total_count += page_count
            if count%100==0:
                conn.commit()
    
    conn.commit()
    print("Total new pages crawled: ", count)
    print("Total new pages found: ", total_count)
    return

In [8]:
def crawl_fetch_30_candidates(queue, cur):
    cur.execute('SELECT URL from WikiPages WHERE retrived = ? ORDER BY citedCount DESC LIMIT 30', (0, ))
    for row in cur:
        queue.append(row[0])    
        
def crawl_one_page(url, cur, adding = False, queue = None, seed_page = "https://en.wikipedia.org"):
    cur.execute("SELECT id FROM WikiPages WHERE url = ?", (url, ))
    from_id = cur.fetchone()[0]
    try:
        html = urllib.request.urlopen(seed_page+url, context=ctx).read()
    except:
        # print(seed_page+url, " can't be retrived.")
        return -1   
    count = 0
    soup = BeautifulSoup(html, 'html.parser')
    title = soup.title.string
    print(title)
    cur.execute('UPDATE WikiPages SET title = ? WHERE URL = ?', (title, url))
    # Retrieve all of the anchor tags
    tags = soup('a')
    cited = set()
    for tag in tags:
        to_url = tag.get('href', None)
        if not to_url or not to_url.startswith('/') or to_url.startswith('//') or to_url in cited:
            continue
        cited.add(to_url)
        cur.execute('SELECT citedCount, id from WikiPages WHERE URL = ? LIMIT 1', (to_url, ))
        if adding:
            queue.append(to_url)
        try:
            num = cur.fetchone()[0]
            cur.execute('UPDATE WikiPages SET citedCount = ? WHERE URL = ?', (num+1, to_url))
        except:
            count += 1
            cur.execute('INSERT INTO WikiPages (URL, retrived, citedCount) VALUES (?, 0, 1)', (to_url, ))
        cur.execute('SELECT id FROM WikiPages WHERE url = ?', (to_url, ))
        to_id = cur.fetchone()[0]
        if from_id!=to_id:
            cur.execute('INSERT OR IGNORE INTO Cites (from_id, to_id) VALUES (?, ?)', (from_id, to_id))
    return count

def crawl_with_existing_page(conn, cur, time = 100, seed_page = "https://en.wikipedia.org"):
    queue = deque()
    crawl_fetch_30_candidates(queue, cur)
    if len(queue)==0:
        print('No avaiable link in the database to crawl')
        return
    else:
        print('Start crawling')
        
    count = 0
    total_count = 0
    while count<time:
        print(count)
        if len(queue)==0:
            crawl_fetch_30_candidates(queue, cur)
            if len(queue)==0:
                print('No available links')
                break
                       
        url = queue.popleft()
        cur.execute('UPDATE WikiPages SET retrived = ? WHERE URL = ?', (1, url))
        
        page_count = crawl_one_page(url, cur)
 
        if page_count>-1:    
            count += 1
            total_count += page_count
            if count%100==0:
                conn.commit()
    
    conn.commit()
    print("Total new pages crawled: ", count)
    print("Total new pages found: ", total_count)
    return

In [8]:
crawl_with_given_page(conn, cur, time = 2000)

Start crawling
0
Mathematics - Wikipedia
1
Wikipedia:Protection policy - Wikipedia
2
Mathematics (disambiguation) - Wikipedia
3
Math (disambiguation) - Wikipedia
4
File:Euclid.jpg - Wikipedia
5
Euclid - Wikipedia
6
Calipers - Wikipedia
7
Raphael - Wikipedia
8
The School of Athens - Wikipedia
9
Ancient Greek - Wikipedia
10
Quantity - Wikipedia
11
Number theory - Wikipedia
12
Mathematical structure - Wikipedia
13
Algebra - Wikipedia
14
Space - Wikipedia
15
Geometry - Wikipedia
16
Calculus - Wikipedia
17
Mathematical analysis - Wikipedia
18
Definition - Wikipedia
19
Pattern - Wikipedia
20
Conjecture - Wikipedia
21
Mathematical proof - Wikipedia
22
Abstraction (mathematics) - Wikipedia
23
Logic - Wikipedia
24
Counting - Wikipedia
25
Calculation - Wikipedia
26
Measurement - Wikipedia
27
Shape - Wikipedia
28
Motion - Wikipedia
29
Physical object - Wikipedia
30
History of mathematics - Wikipedia
31
Research - Wikipedia
32
Greek mathematics - Wikipedia
33
Euclid's Elements - Wikipedia
34
Giuse

Continuous function - Wikipedia
244
Complex number - Wikipedia
245
Quaternion - Wikipedia
246
Octonion - Wikipedia
247
Transfinite number - Wikipedia
248
Infinity - Wikipedia
249
Fundamental theorem of algebra - Wikipedia
250
Cardinal number - Wikipedia
251
Aleph number - Wikipedia
252
Function (mathematics) - Wikipedia
253
Operation (mathematics) - Wikipedia
254
Binary relation - Wikipedia
255
Abstraction - Wikipedia
256
Group (mathematics) - Wikipedia
257
Ring (mathematics) - Wikipedia
258
Abstract algebra - Wikipedia
259
Straightedge and compass construction - Wikipedia
260
Galois theory - Wikipedia
261
Linear algebra - Wikipedia
262
Vector space - Wikipedia
263
Euclidean vector - Wikipedia
264
Combinatorics - Wikipedia
265
File:Elliptic curve simple.svg - Wikipedia
266
File:Rubik's cube.svg - Wikipedia
267
File:Group diagdram D6.svg - Wikipedia
268
File:Lattice of the divisibility of 60.svg - Wikipedia
269
File:Braid-modular-group-cover.svg - Wikipedia
270
Graph theory - Wikipedia


Book sources - Wikipedia
471
Template:Areas of mathematics - Wikipedia
472
Template talk:Areas of mathematics - Wikipedia
473
Elementary algebra - Wikipedia
474
Multilinear algebra - Wikipedia
475
Discrete mathematics - Wikipedia
476
Finite geometry - Wikipedia
477
Algebraic number theory - Wikipedia
478
Analytic number theory - Wikipedia
479
Diophantine geometry - Wikipedia
480
Geometric topology - Wikipedia
481
Category:Fields of mathematics - Wikipedia
482
Wikipedia:WikiProject Mathematics - Wikipedia
483
Help:Authority control - Wikipedia
484
Integrated Authority File - Wikipedia
485
Historical Dictionary of Switzerland - Wikipedia
486
Library of Congress Control Number - Wikipedia
487
National Diet Library - Wikipedia
488
National Library of Israel - Wikipedia
489
Help:Category - Wikipedia
490
Category:Mathematics - Wikipedia
491
Category:Formal sciences - Wikipedia
492
Category:Mathematical terminology - Wikipedia
493
Category:Main topic articles - Wikipedia
494
Category:Webarchi

Template:Tl - Wikipedia
644
Template:Ambox - Wikipedia
645
Denial-of-service attack - Wikipedia
646
Help:Template - Wikipedia
647
Wikipedia:Lua - Wikipedia
648
Wikipedia:Protection policy - Wikipedia
649
Wikipedia:Template editor - Wikipedia
650
Template:Edit template-protected - Wikipedia
651
Wikipedia:Rough guide to semi-protection - Wikipedia
652
Wikipedia:Vandalism - Wikipedia
653
Sockpuppet (Internet) - Wikipedia
654
Wikipedia:Pending changes - Wikipedia
655
Template:Edit semi-protected - Wikipedia
656
Wikipedia:Requests for page protection - Wikipedia
657
Wikipedia:Requests for permissions/Confirmed - Wikipedia
658
Category:Wikipedia content policies - Wikipedia
659
Wikipedia:Biographies of living persons - Wikipedia
660
Wikipedia:Neutral point of view - Wikipedia
661
Wikipedia:Wikipedia in the media - Wikipedia
662
Wikipedia:Edit warring - Wikipedia
663
Wikipedia:Vandalism - Wikipedia
664
IP address - Wikipedia
665
IP address - Wikipedia
666
IP address spoofing - Wikipedia
667
W

Wikipedia:Proposed deletion (books) - Wikipedia
817
Wikipedia:Criteria for speedy deletion - Wikipedia
818
Wikipedia:Attack page - Wikipedia
819
Wikipedia:Oversight - Wikipedia
820
Wikipedia:Article size - Wikipedia
821
Wikipedia:Be bold - Wikipedia
822
Wikipedia:Disambiguation - Wikipedia
823
Wikipedia:Hatnote - Wikipedia
824
Wikipedia:Talk page guidelines - Wikipedia
825
Wikipedia:Signatures - Wikipedia
826
Wikipedia:Broad-concept article - Wikipedia
827
Wikipedia:Manual of Style - Wikipedia
828
Wikipedia:Manual of Style/Contents - Wikipedia
829
Wikipedia:Manual of Style/Accessibility - Wikipedia
830
Wikipedia:Make technical articles understandable - Wikipedia
831
Wikipedia:Manual of Style/Dates and numbers - Wikipedia
832
Wikipedia:Manual of Style/Images - Wikipedia
833
Wikipedia:Manual of Style/Layout - Wikipedia
834
Wikipedia:Manual of Style/Lead section - Wikipedia
835
Wikipedia:Manual of Style/Linking - Wikipedia
836
Wikipedia:Manual of Style/Lists - Wikipedia
837
Wikipedia:Cate

User:Abubakarjan518/sandbox - Wikipedia
936
User:Bootblack - Wikipedia
937
User:Chickstarr404/Gather lists/8153 – Ascent of Man: "The Music of the Spheres" - Wikipedia
938
User:DeusImperator/Math - Wikipedia
939
User:Dpm12/Userboxes - Wikipedia
940
User:Fdizile/All Knowladge - Wikipedia
941
User:HellNaraku - Wikipedia
942
User:HellNaraku/sandbox - Wikipedia
943
User:Jenova20/Barnstars and awards - Wikipedia
944
User:Mubarak Hossain Chowdhury/sandbox - Wikipedia
945
User:Sebastien Palcoux - Wikipedia
946
User:Sj/olpc dictionary images/500 - Wikipedia
947
User talk:Jenova20/Archive 5 - Wikipedia
948
Portal:Mathematics/Intro/2 - Wikipedia
949
Portal:Mathematics/Intro/Image - Wikipedia
950
Global usage for "File:Euclid.jpg" - Wikipedia
951
The given website has been crawled already.
951
The given website has been crawled already.
951
Create account - Wikipedia
952
Log in - Wikipedia
953
The given website has been crawled already.
953
File talk:Euclid.jpg - Wikipedia
954
The given website h

Sporus of Nicaea - Wikipedia
1120
Thales of Miletus - Wikipedia
1121
Theano (philosopher) - Wikipedia
1122
Theodorus of Cyrene - Wikipedia
1123
Theodosius of Bithynia - Wikipedia
1124
Theon of Smyrna - Wikipedia
1125
Thymaridas - Wikipedia
1126
Xenocrates - Wikipedia
1127
Zeno of Elea - Wikipedia
1128
Zeno of Sidon - Wikipedia
1129
Zenodorus (mathematician) - Wikipedia
1130
Almagest - Wikipedia
1131
Archimedes Palimpsest - Wikipedia
1132
Arithmetica - Wikipedia
1133
Apollonius of Perga - Wikipedia
1134
On the Sizes and Distances (Aristarchus) - Wikipedia
1135
On Sizes and Distances - Wikipedia
1136
The Sand Reckoner - Wikipedia
1137
Problem of Apollonius - Wikipedia
1138
Squaring the circle - Wikipedia
1139
Doubling the cube - Wikipedia
1140
Angle trisection - Wikipedia
1141
Cyrene, Libya - Wikipedia
1142
Platonic Academy - Wikipedia
1143
Template:Ancient Greece topics - Wikipedia
1144
Template talk:Ancient Greece topics - Wikipedia
1145
Ancient Greece - Wikipedia
1146
Timeline of anci

Olympia, Greece - Wikipedia
1366
Athenian Treasury - Wikipedia
1367
Lion Gate - Wikipedia
1368
Long Walls - Wikipedia
1369
Philippeion - Wikipedia
1370
Theatre of Dionysus - Wikipedia
1371
Tunnel of Eupalinos - Wikipedia
1372
Temple of Aphaea - Wikipedia
1373
Temple of Artemis - Wikipedia
1374
Temple of Athena Nike - Wikipedia
1375
Erechtheion - Wikipedia
1376
Temple of Hephaestus - Wikipedia
1377
Temple of Hera, Olympia - Wikipedia
1378
Parthenon - Wikipedia
1379
Samothrace temple complex - Wikipedia
1380
Temple of Zeus, Olympia - Wikipedia
1381
The given website has been crawled already.
1381
Proto-Greek language - Wikipedia
1382
Mycenaean Greek - Wikipedia
1383
Homeric Greek - Wikipedia
1384
Ancient Greek dialects - Wikipedia
1385
Aeolic Greek - Wikipedia
1386
Arcadocypriot Greek - Wikipedia
1387
Attic Greek - Wikipedia
1388
Doric Greek - Wikipedia
1389
Ionic Greek - Wikipedia
1390
Locrian Greek - Wikipedia
1391
Ancient Macedonian language - Wikipedia
1392
Pamphylian Greek - Wikiped

View source for Euclid - Wikipedia
1586
Euclid: Revision history - Wikipedia
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
The given website has been crawled already.
1587
Pages that link to "Euclid" - Wikipedia
1588
Related changes - Wikipedia
1589
The given website has been crawled already.
1589
The given website has been crawled already.
1589
Euclid - Wikipedia
1590
Information for "Euclid" - Wikipedia
1591
Cite This Page - Wikipedia
1592
Book creator - Wikipedia
1593
Download as PDF - Wikipedia
1594
Euclid - Wikipedia
1595
The given website has been crawled already.
1595
Caliper (disambiguation) - Wikipedia
1596
File:Caliper detail view.

1804
Lesbian rule - Wikipedia
1805
Measuring rod - Wikipedia
1806
Meterstick - Wikipedia
1807
Plumb bob - Wikipedia
1808
Protractor - Wikipedia
1809
Set square - Wikipedia
1810
Skirret (tool) - Wikipedia
1811
Sliding T bevel - Wikipedia
1812
Speed square - Wikipedia
1813
Spirit level - Wikipedia
1814
Steel square - Wikipedia
1815
Stencil - Wikipedia
1816
T-square - Wikipedia
1817
Theodolite - Wikipedia
1818
Try square - Wikipedia
1819
Weighing scale - Wikipedia
1820
Winding stick - Wikipedia
1821
Template:Types of tools - Wikipedia
1822
Template:Cleaning tools - Wikipedia
1823
Template:Cutting and abrasive tools - Wikipedia
1824
Template:Forestry tools - Wikipedia
1825
Template:Garden tools - Wikipedia
1826
Template:Hand tools - Wikipedia
1827
Template:Machine and metalworking tools - Wikipedia
1828
Template:Power tools - Wikipedia
1829
Template talk:Forestry tools - Wikipedia
1830
Tool - Wikipedia
1831
Heavy equipment - Wikipedia
1832
Tree planting - Wikipedia
1833
Afforestation - Wik

In [9]:
crawl_with_given_page(conn, cur, start_page = "/wiki/Mathematics", time = 400)

The given start_page has been crawled already.


In [10]:
crawl_with_given_page(conn, cur, start_page = "/wiki/Toeplitz_operator", time = 500)

Start crawling
0
Toeplitz operator - Wikipedia
1
The given website has been crawled already.
1
The given website has been crawled already.
1
Wikipedia:Citing sources - Wikipedia
2
Wikipedia:WikiProject Reliability - Wikipedia
3
Wikipedia:When to cite - Wikipedia
4
Help:Maintenance template removal - Wikipedia
5
Operator theory - Wikipedia
6
Dilation (operator theory) - Wikipedia
7
Multiplication operator - Wikipedia
8
Hardy space - Wikipedia
9
Editing Toeplitz operator (section) - Wikipedia
10
Basis (linear algebra) - Wikipedia
11
Editing Toeplitz operator (section) - Wikipedia
12
The given website has been crawled already.
12
Fredholm - Wikipedia
13
Mark Krein - Wikipedia
14
Harold Widom - Wikipedia
15
Atiyah–Singer index theorem - Wikipedia
16
Sheldon Axler - Wikipedia
17
Sun-Yung Alice Chang - Wikipedia
18
Donald Sarason - Wikipedia
19
Compact operator - Wikipedia
20
Editing Toeplitz operator (section) - Wikipedia
21
Category:CS1 maint: Multiple names: authors list - Wikipedia
22
Bi

Template:Cite AV media notes - Wikipedia
125
Template:Cite comic - Wikipedia
126
Template:Comic strip reference - Wikipedia
127
Template:Cite conference - Wikipedia
128
Template:Cite court - Wikipedia
129
Template:Cite encyclopedia - Wikipedia
130
Template:Cite episode - Wikipedia
131
Template:Cite mailing list - Wikipedia
132
Template:Cite map - Wikipedia
133
Template:Cite newsgroup - Wikipedia
134
Template:Cite patent - Wikipedia
135
Template:Cite press release - Wikipedia
136
Template:Cite thesis - Wikipedia
137
Template:Cite video game - Wikipedia
138
Help:References and page numbers - Wikipedia
139
E-book - Wikipedia
140
Bekker numbering - Wikipedia
141
Wikipedia:RefToolbar/2.0 - Wikipedia
142
Wikipedia:RefToolbar/2.0 - Wikipedia
143
International Standard Book Number - Wikipedia
144
The given website has been crawled already.
144
PubMed - Wikipedia
145
Wikipedia:Verifiability - Wikipedia
146
The given website has been crawled already.
146
The given website has been crawled alread

Wikipedia:WikiProject Citation cleanup - Wikipedia
321
Wikipedia:Citation overkill - Wikipedia
322
Wikipedia:You don't need to cite that the sky is blue - Wikipedia
323
Wikipedia:You do need to cite that the sky is blue - Wikipedia
324
Wikipedia:Video links - Wikipedia
325
Wikipedia:Reference database - Wikipedia
326
Wikipedia:Citing sources - Wikipedia
327
Help:Footnotes - Wikipedia
328
Wikipedia:Glossary - Wikipedia
329
Wikipedia talk:Citing sources/Archive 18 - Wikipedia
330
Book sources - Wikipedia
331
Book sources - Wikipedia
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been crawled already.
332
The given website has been 

Category:Pages with missing references list - Wikipedia
424
Category:Pages with broken reference names - Wikipedia
425
Category:Accuracy disputes - Wikipedia
426
Category:NPOV disputes - Wikipedia
427
Category:Wikipedia articles in need of updating - Wikipedia
428
Category:Articles that may contain original research - Wikipedia
429
Category:Self-contradictory articles - Wikipedia
430
Category:Talk pages requiring geodata verification - Wikipedia
431
The given website has been crawled already.
431
Wikipedia:Verifiability - Wikipedia
432
Creating User talk:1Shaggy1 - Wikipedia
433
User contributions for 1Shaggy1 - Wikipedia
434
User talk:A Quest For Knowledge - Wikipedia
435
User contributions for A Quest For Knowledge - Wikipedia
436
User talk:A garbage person - Wikipedia
437
User contributions for A garbage person - Wikipedia
438
User talk:A.Ou - Wikipedia
439
User contributions for A.Ou - Wikipedia
440
User talk:Adamsan - Wikipedia
441
User contributions for Adamsan - Wikipedia
442
Us

In [11]:
crawl_with_existing_page(conn, cur, 5000)

Start crawling
0
Greek language - Wikipedia
1
International Standard Serial Number - Wikipedia
2
File:Question book-new.svg - Wikipedia
3
Category:All articles needing additional references - Wikipedia
4
Geographic coordinate system - Wikipedia
5
Athens - Wikipedia
6
Category:Coordinates on Wikidata - Wikipedia
7
Wikipedia - Wikipedia
8
Wikipedia:Maintenance - Wikipedia
9
Wikipedia:Reference desk - Wikipedia
10
Strabo - Wikipedia
11
National Library of Australia - Wikipedia
12
Greece - Wikipedia
13
Public domain - Wikipedia
14
Roman Empire - Wikipedia
15
Wikipedia:Why create an account? - Wikipedia
16
Help:Cheatsheet - Wikipedia
17
Category:CS1: long volume value - Wikipedia
18
Latin - Wikipedia
19
Byzantine Empire - Wikipedia
20
Wikipedia:What is an article? - Wikipedia
21
Wikipedia:Help desk - Wikipedia
22
Wikipedia:Noticeboards - Wikipedia
23
Wikipedia:Template messages - Wikipedia
24
Wikipedia:Consensus - Wikipedia
25
Category:Commons category link from Wikidata - Wikipedia
26
Wiki

Wikipedia:How to improve image quality - Wikipedia
208
Help:Labeled section transclusion - Wikipedia
209
Wikipedia:Catalogue of CSS classes - Wikipedia
210
Wikipedia:Customizing watchlists - Wikipedia
211
Wikipedia:User scripts/Guide - Wikipedia
212
Wikipedia:User scripts/Techniques - Wikipedia
213
Help:Creating a bot - Wikipedia
214
Template talk:Wikipedia technical help - Wikipedia
215
Help:Directory - Wikipedia
216
Help:Entering special characters - Wikipedia
217
Help:Printing - Wikipedia
218
Wikipedia:Software notices - Wikipedia
219
Help:Notifications - Wikipedia
220
Help:Notifications/FAQ - Wikipedia
221
Wikipedia:Page Curation/Help - Wikipedia
222
Help:Switch parser function - Wikipedia
223
Help:Time function - Wikipedia
224
Help:Basic table markup - Wikipedia
225
Help:Conditional tables - Wikipedia
226
Wikipedia:Moving files to Commons - Wikipedia
227
Wikipedia:Graphics Lab/Resources - Wikipedia
228
Help:Family trees - Wikipedia
229
Wikipedia:Graphs and charts - Wikipedia
230
W

Wikipedia:Requests for permissions/Account creator - Wikipedia
393
Wikipedia:Requests for permissions/Event coordinator - Wikipedia
394
Wikipedia:Requests for permissions/File mover - Wikipedia
395
Wikipedia:Requests for permissions/New page reviewer - Wikipedia
396
Wikipedia:Requests for permissions/Rollback - Wikipedia
397
Wikipedia:Functionaries - Wikipedia
398
Create account - Wikipedia
399
Wikipedia:IP addresses are not people - Wikipedia
400
Wikipedia:IP hopper - Wikipedia
401
Wikipedia:New account - Wikipedia
402
Wikipedia:Sock puppetry - Wikipedia
403
Help:Two-factor authentication - Wikipedia
404
Wikipedia:Compromised accounts - Wikipedia
405
Wikipedia:Unblock Ticket Request System - Wikipedia
406
Wikipedia:Sleeper account - Wikipedia
407
Wikipedia:WikiProject User scripts/Scripts/WikiBreak Enforcer - Wikipedia
408
Wikipedia:Retiring - Wikipedia
409
Wikipedia:Event coordinator - Wikipedia
410
Wikipedia:Sockpuppet investigations/SPI/Clerks - Wikipedia
411
Template talk:Wikipedi

Wikipedia:Welcoming committee/Welcome to Wikipedia - Wikipedia
575
Wikipedia:Counter-Vandalism Unit - Wikipedia
576
Wikipedia:Community portal/Opentask - Wikipedia
577
Wikipedia:Mediation Committee - Wikipedia
578
Category:Wikipedians - Wikipedia
579
Talk:Main Page - Wikipedia
580
Wikipedia:Statistics - Wikipedia
581
Wikipedia:Requested articles/Images - Wikipedia
582
Wikipedia:Awards - Wikipedia
583
Wikipedia:Deletion process - Wikipedia
584
Wikipedia:XfD today - Wikipedia
585
Wikipedia:Featured list candidates - Wikipedia
586
Wikipedia:Kindness Campaign - Wikipedia
587
Help:Notifications/Thanks - Wikipedia
588
Wikipedia:WikiProject Democracy - Wikipedia
589
Wikipedia:A nice cup of tea and a sit down - Wikipedia
590
Wikipedia:Database reports/Forgotten articles - Wikipedia
591
Wikipedia:Adopt-a-user - Wikipedia
592
Wikipedia:News - Wikipedia
593
Wikipedia:Goings-on - Wikipedia
594
Wikipedia:Content assessment - Wikipedia
595
Wikipedia:Articles for creation/Redirects and categories - W

Wikipedia:Image use policy - Wikipedia
760
Wikipedia:Manual of Style/Video games - Wikipedia
761
Wikipedia:Manual of Style/Military history - Wikipedia
762
Wikipedia:Manual of Style/China and Chinese-related articles - Wikipedia
763
Wikipedia:Manual of Style/France and French-related articles - Wikipedia
764
Wikipedia:Manual of Style/Lists - Wikipedia
765
Wikipedia:Stand-alone lists - Wikipedia
766
Template:Style - Wikipedia
767
Wikipedia:Manual of Style/Capital letters - Wikipedia
768
Template:Manual of Style - Wikipedia
769
Wikipedia:Stand-alone lists - Wikipedia
770
Wikipedia:Manual of Style/Philosophy - Wikipedia
771
Wikipedia:Manual of Style/Stringed instrument tunings - Wikipedia
772
Wikipedia:WikiProject Computer science/Manual of style - Wikipedia
773
Wikipedia:Manual of Style/Computing - Wikipedia
774
Wikipedia:Manual of Style/Anime- and manga-related articles - Wikipedia
775
Template:Taxobox/doc - Wikipedia
776
Category:Wikipedia Manual of Style - Wikipedia
777
eBay - Wikiped

Thomas Hobbes - Wikipedia
972
Mind–body dualism - Wikipedia
973
John Stuart Mill - Wikipedia
974
Philosophy of chemistry - Wikipedia
975
Philosophy of technology - Wikipedia
976
Philosophy of computer science - Wikipedia
977
Philosophy of psychiatry - Wikipedia
978
Philosophy of geography - Wikipedia
979
Philosophy of engineering - Wikipedia
980
Individualism - Wikipedia
981
Post-structuralism - Wikipedia
982
Utilitarianism - Wikipedia
983
Virtue ethics - Wikipedia
984
Aristotelianism - Wikipedia
985
Ancient philosophy - Wikipedia
986
Social contract - Wikipedia
987
Nominalism - Wikipedia
988
Subjectivism - Wikipedia
989
Social philosophy - Wikipedia
990
Action theory (philosophy) - Wikipedia
991
Libertarianism (metaphysics) - Wikipedia
992
Cosmology (philosophy) - Wikipedia
993
Philosophy of history - Wikipedia
994
Collectivism - Wikipedia
995
Philosophy and economics - Wikipedia
996
Logical positivism - Wikipedia
997
Social constructionism - Wikipedia
998
Legalism (Chinese philosophy

Philosophy and literature - Wikipedia
1206
Philosophy of sport - Wikipedia
1207
Philosophy of war - Wikipedia
1208
Philosophy - Wikipedia
1209
Mazdak - Wikipedia
1210
Reformed epistemology - Wikipedia
1211
Utilitarianism - Wikipedia
1212
Theory of art - Wikipedia
1213
Epistemological particularism - Wikipedia
1214
Reasonism - Wikipedia
1215
Philosophical skepticism - Wikipedia
1216
Indigenous American philosophy - Wikipedia
1217
Aztec philosophy - Wikipedia
1218
Indonesian philosophy - Wikipedia
1219
Vietnamese philosophy - Wikipedia
1220
Australian philosophy - Wikipedia
1221
Czech philosophy - Wikipedia
1222
Danish philosophy - Wikipedia
1223
List of Slovene philosophers - Wikipedia
1224
Spanish philosophy - Wikipedia
1225
Turkish philosophy - Wikipedia
1226
Book:Philosophy - Wikipedia
1227
Martin Heidegger - Wikipedia
1228
Karl Marx - Wikipedia
1229
Søren Kierkegaard - Wikipedia
1230
Daniel Dennett - Wikipedia
1231
Arthur Schopenhauer - Wikipedia
1232
Confucius - Wikipedia
1233
Edwa

Tertullian - Wikipedia
1446
José Ortega y Gasset - Wikipedia
1447
Hypostatic abstraction - Wikipedia
1448
Experience - Wikipedia
1449
Richard Rorty - Wikipedia
1450
Voltaire - Wikipedia
1451
Philosophy of self - Wikipedia
1452
Naïve realism - Wikipedia
1453
Capitalism - Wikipedia
1454
Society - Wikipedia
1455
Friedrich Engels - Wikipedia
1456
Peter Singer - Wikipedia
1457
Quality (philosophy) - Wikipedia
1458
Object (philosophy) - Wikipedia
1459
List of epistemologists - Wikipedia
1460
Al-Ghazali - Wikipedia
1461
Thomas Jefferson - Wikipedia
1462
Bernard Williams - Wikipedia
1463
Thought - Wikipedia
1464
Category:Infobox person using alma mater - Wikipedia
1465
Nicholas Wolterstorff - Wikipedia
1466
Michel de Montaigne - Wikipedia
1467
R. M. Hare - Wikipedia
1468
Reinhold Niebuhr - Wikipedia
1469
Johann Gottlieb Fichte - Wikipedia
1470
Ayn Rand - Wikipedia
1471
Libertarianism - Wikipedia
1472
Leo Strauss - Wikipedia
1473
Consciousness - Wikipedia
1474
Dante Alighieri - Wikipedia
1475
S

Christian humanism - Wikipedia
1691
Theism - Wikipedia
1692
Pantheism - Wikipedia
1693
Brahman - Wikipedia
1694
Embodied cognition - Wikipedia
1695
Feminist metaphysics - Wikipedia
1696
Social science - Wikipedia
1697
Wilhelm Dilthey - Wikipedia
1698
Unobservable - Wikipedia
1699
Rule of law - Wikipedia
1700
Freedom of religion - Wikipedia
1701
Faith - Wikipedia
1702
Portal:Metaphysics - Wikipedia
1703
List of ethicists - Wikipedia
1704
Anselm of Canterbury - Wikipedia
1705
Virtue - Wikipedia
1706
London - Wikipedia
1707
Anima mundi - Wikipedia
1708
Choice - Wikipedia
1709
Interpretations of quantum mechanics - Wikipedia
1710
Gnosticism - Wikipedia
1711
Polytheism - Wikipedia
1712
Fundamentalism - Wikipedia
1713
Monotheism - Wikipedia
1714
Walter Kaufmann (philosopher) - Wikipedia
1715
Meaning (existential) - Wikipedia
1716
Freedom of speech - Wikipedia
1717
David Malet Armstrong - Wikipedia
1718
Index of metaphysics articles - Wikipedia
1719
Abstract object theory - Wikipedia
1720
Mat

Eschatology - Wikipedia
1927
Absolute (philosophy) - Wikipedia
1928
Sigmund Freud - Wikipedia
1929
History of Christian theology - Wikipedia
1930
Tian - Wikipedia
1931
Theory of forms - Wikipedia
1932
World view - Wikipedia
1933
Anthropology - Wikipedia
1934
Wikipedia:WikiProject Philosophy - Wikipedia
1935
Symbol - Wikipedia
1936
Category:Philosophy stubs - Wikipedia
1937
Gaston Bachelard - Wikipedia
1938
Toleration - Wikipedia
1939
Freedom of the press - Wikipedia
1940
Eastern Orthodox Church - Wikipedia
1941
Tawhid - Wikipedia
1942
Biblical canon - Wikipedia
1943
Autonomy - Wikipedia
1944
New York City - Wikipedia
1945
Paul Ricœur - Wikipedia
1946
Paganism - Wikipedia
1947
Nature - Wikipedia
1948
Critical thinking - Wikipedia
1949
Germanic paganism - Wikipedia
1950
Sin - Wikipedia
1951
New Testament - Wikipedia
1952
Unitarianism - Wikipedia
1953
Divinity - Wikipedia
1954
Classics - Wikipedia
1955
Supernatural - Wikipedia
1956
Evidence - Wikipedia
1957
Category:Epistemology - Wikiped

File:P christianity.svg - Wikipedia
2173
Wicca - Wikipedia
2174
Waldensians - Wikipedia
2175
Universalism - Wikipedia
2176
Conservative Judaism - Wikipedia
2177
Humanistic Judaism - Wikipedia
2178
Christian atheism - Wikipedia
2179
Ancient Egyptian religion - Wikipedia
2180
Orphism (religion) - Wikipedia
2181
Constantine the Great - Wikipedia
2182
Justin Martyr - Wikipedia
2183
Religious pluralism - Wikipedia
2184
Greco-Roman mysteries - Wikipedia
2185
Restoration Movement - Wikipedia
2186
Nath - Wikipedia
2187
Unitarian Universalism - Wikipedia
2188
History of Christianity - Wikipedia
2189
Constitutionalism - Wikipedia
2190
Charismatic movement - Wikipedia
2191
Nicene Creed - Wikipedia
2192
Hellenism (religion) - Wikipedia
2193
Sacrifice - Wikipedia
2194
Manichaeism - Wikipedia
2195
Religious persecution - Wikipedia
2196
Goddess movement - Wikipedia
2197
Theosophy (Blavatskian) - Wikipedia
2198
Haredi Judaism - Wikipedia
2199
Monasticism - Wikipedia
2200
Comparative religion - Wikiped

Ramanandi Sampradaya - Wikipedia
2412
Etruscan religion - Wikipedia
2413
Sant Mat - Wikipedia
2414
Tibetic languages - Wikipedia
2415
Religion in Korea - Wikipedia
2416
Religion in Japan - Wikipedia
2417
Adi Dharm - Wikipedia
2418
Brahmoism - Wikipedia
2419
Gallo-Roman religion - Wikipedia
2420
Swahili people - Wikipedia
2421
Religious behaviour - Wikipedia
2422
National church - Wikipedia
2423
Chinese ritual mastery traditions - Wikipedia
2424
Serer religion - Wikipedia
2425
Umbanda - Wikipedia
2426
Muisca mythology - Wikipedia
2427
Eckankar - Wikipedia
2428
Subud - Wikipedia
2429
Women and religion - Wikipedia
2430
Odinani - Wikipedia
2431
Ifá - Wikipedia
2432
Candomblé Bantu - Wikipedia
2433
Ryukyuan religion - Wikipedia
2434
Akan religion - Wikipedia
2435
Candomblé Jejé - Wikipedia
2436
Candomblé Ketu - Wikipedia
2437
Comfa - Wikipedia
2438
Convince - Wikipedia
2439
Kumina - Wikipedia
2440
Obeah - Wikipedia
2441
Palo (religion) - Wikipedia
2442
Quimbanda - Wikipedia
2443
Tambor de 

Holy Spirit in Christianity - Wikipedia
2646
Great Church - Wikipedia
2647
Apostolic Fathers - Wikipedia
2648
Chaldean Catholic Church - Wikipedia
2649
Baroque - Wikipedia
2650
Council of Trent - Wikipedia
2651
Liberation theology - Wikipedia
2652
Sermon on the Mount - Wikipedia
2653
Christianity in the modern era - Wikipedia
2654
Gospel - Wikipedia
2655
Ukrainian Greek Catholic Church - Wikipedia
2656
Bonaventure - Wikipedia
2657
Ecumenism - Wikipedia
2658
Book of Revelation - Wikipedia
2659
Medieval university - Wikipedia
2660
Byzantine Iconoclasm - Wikipedia
2661
Armenian Apostolic Church - Wikipedia
2662
Bishop - Wikipedia
2663
Russian Empire - Wikipedia
2664
Bernard of Clairvaux - Wikipedia
2665
Pope Benedict XVI - Wikipedia
2666
Arminianism - Wikipedia
2667
Pietism - Wikipedia
2668
Second Vatican Council - Wikipedia
2669
Catholic ecumenical councils - Wikipedia
2670
First seven ecumenical councils - Wikipedia
2671
Vulgate - Wikipedia
2672
Church of the East - Wikipedia
2673
John 

Consecrated life - Wikipedia
2851
Society of apostolic life - Wikipedia
2852
Carmelites - Wikipedia
2853
Marriage in the Catholic Church - Wikipedia
2854
Prior - Wikipedia
2855
Annuario Pontificio - Wikipedia
2856
Benedictines - Wikipedia
2857
Sacrament of Penance - Wikipedia
2858
Synod of Bishops in the Catholic Church - Wikipedia
2859
Properties of the Holy See - Wikipedia
2860
Antiochene Rite - Wikipedia
2861
L'Osservatore Romano - Wikipedia
2862
Vatican Radio - Wikipedia
2863
Primacy of Peter - Wikipedia
2864
List of popes - Wikipedia
2865
Cardinal (Catholic Church) - Wikipedia
2866
Swiss Guard - Wikipedia
2867
Abbess - Wikipedia
2868
Anglican Use - Wikipedia
2869
Catholic ecclesiology - Wikipedia
2870
Bishop in the Catholic Church - Wikipedia
2871
Ambrosian Rite - Wikipedia
2872
Legal history of the Catholic Church - Wikipedia
2873
Catholic Church and ecumenism - Wikipedia
2874
Catholic Church and science - Wikipedia
2875
Index of Vatican City-related articles - Wikipedia
2876
Out

Pope Alexander VI - Wikipedia
3063
Knights Templar - Wikipedia
3064
Bede - Wikipedia
3065
Basil of Caesarea - Wikipedia
3066
Seventy disciples - Wikipedia
3067
Gregory of Nazianzus - Wikipedia
3068
Fourth Council of the Lateran - Wikipedia
3069
Monophysitism - Wikipedia
3070
Dissolution of the Monasteries - Wikipedia
3071
Gregorian chant - Wikipedia
3072
Great Awakening - Wikipedia
3073
Abraham - Wikipedia
3074
Maximus the Confessor - Wikipedia
3075
Pope Benedict XIV - Wikipedia
3076
Episcopal see - Wikipedia
3077
Christianity in the 1st century - Wikipedia
3078
Pope Boniface VIII - Wikipedia
3079
Cyril of Jerusalem - Wikipedia
3080
Second Council of Lyon - Wikipedia
3081
History of the Eastern Orthodox Church - Wikipedia
3082
Brazil - Wikipedia
3083
Predestination - Wikipedia
3084
Conciliarism - Wikipedia
3085
International law - Wikipedia
3086
English Civil War - Wikipedia
3087
Laity - Wikipedia
3088
King James Version - Wikipedia
3089
Pope Urban II - Wikipedia
3090
2018 Moscow–Const

Criticism of Christianity - Wikipedia
3271
Christian liturgy - Wikipedia
3272
Karl Rahner - Wikipedia
3273
Hungary - Wikipedia
3274
Thomas the Apostle - Wikipedia
3275
David - Wikipedia
3276
Peter Canisius - Wikipedia
3277
Lawrence of Brindisi - Wikipedia
3278
Icon - Wikipedia
3279
Jesus - Wikipedia
3280
Peter Damian - Wikipedia
3281
Prosperity theology - Wikipedia
3282
John Climacus - Wikipedia
3283
Septuagint - Wikipedia
3284
Mariology - Wikipedia
3285
John of Ávila - Wikipedia
3286
Catholic Church - Wikipedia
3287
Modernism in the Catholic Church - Wikipedia
3288
Belgium - Wikipedia
3289
Sweden - Wikipedia
3290
Filioque - Wikipedia
3291
File:Christian cross.svg - Wikipedia
3292
Saint Thomas Christians - Wikipedia
3293
Catholic Church - Wikipedia
3294
Peter Lombard - Wikipedia
3295
Episcopal polity - Wikipedia
3296
Solomon - Wikipedia
3297
Adam - Wikipedia
3298
Pastor - Wikipedia
3299
John Scotus Eriugena - Wikipedia
3300
Peter Chrysologus - Wikipedia
3301
Pope Gregory VII - Wikipedi

Lübeck martyrs - Wikipedia
3521
Martyrs of Albania - Wikipedia
3522
Martyrs of Laos - Wikipedia
3523
Three Martyrs of Chimbote - Wikipedia
3524
Agnes of Rome - Wikipedia
3525
Euphemia - Wikipedia
3526
Genevieve - Wikipedia
3527
Fourteen Holy Helpers - Wikipedia
3528
Template:Catholic saints - Wikipedia
3529
Template talk:Catholic saints - Wikipedia
3530
Chariton the Confessor - Wikipedia
3531
Michael of Synnada - Wikipedia
3532
Salonius - Wikipedia
3533
Four Crowned Martyrs - Wikipedia
3534
Great martyr - Wikipedia
3535
Irish Catholic Martyrs - Wikipedia
3536
Martyr Saints of China - Wikipedia
3537
Martyrs of Natal - Wikipedia
3538
Passion of Saint Perpetua, Saint Felicitas, and their Companions - Wikipedia
3539
Patriarchs (Bible) - Wikipedia
3540
Dalua of Tibradden - Wikipedia
3541
Woman with seven sons - Wikipedia
3542
Eulalia of Mérida - Wikipedia
3543
Narcisa de Jesús - Wikipedia
3544
Patron saints of the military - Wikipedia
3545
Athleta Christi - Wikipedia
3546
Antipope - Wikiped

File:PD-icon.svg - Wikipedia
3756
Excommunication - Wikipedia
3757
Beatification - Wikipedia
3758
Category:All articles covered by WikiProject Wikify - Wikipedia
3759
Style (manner of address) - Wikipedia
3760
Holy Roman Emperor - Wikipedia
3761
Category:Wikipedia articles with NSK identifiers - Wikipedia
3762
Meister Eckhart - Wikipedia
3763
Alfred Delp - Wikipedia
3764
Patron saint - Wikipedia
3765
Mexico - Wikipedia
3766
Ramon Llull - Wikipedia
3767
Diocese - Wikipedia
3768
Photios I of Constantinople - Wikipedia
3769
Venice - Wikipedia
3770
Purgatory - Wikipedia
3771
Orosius - Wikipedia
3772
Creed - Wikipedia
3773
Martyr - Wikipedia
3774
Tommaso Campanella - Wikipedia
3775
Iraq - Wikipedia
3776
Indulgence - Wikipedia
3777
Sacred tradition - Wikipedia
3778
Hagia Sophia - Wikipedia
3779
Julian of Norwich - Wikipedia
3780
Louis de Montfort - Wikipedia
3781
Anselm of Laon - Wikipedia
3782
Epistle of Barnabas - Wikipedia
3783
Devotio Moderna - Wikipedia
3784
Penance - Wikipedia
3785
Bri

Africa - Wikipedia
3987
Sharia - Wikipedia
3988
Wikipedia:Manual of Style/Lead section - Wikipedia
3989
International Church of the Foursquare Gospel - Wikipedia
3990
United Pentecostal Church International - Wikipedia
3991
List of the largest Protestant denominations - Wikipedia
3992
Christian literature - Wikipedia
3993
Christianity and science - Wikipedia
3994
Afghanistan - Wikipedia
3995
Indonesia - Wikipedia
3996
Old Believers - Wikipedia
3997
Outline of Christianity - Wikipedia
3998
Religion in Nazi Germany - Wikipedia
3999
International Pentecostal Holiness Church - Wikipedia
4000
Free Methodist Church - Wikipedia
4001
Apostolic Church (denomination) - Wikipedia
4002
National Baptist Convention, USA, Inc. - Wikipedia
4003
Category:CS1 maint: BOT: original-url status unknown - Wikipedia
4004
Lists of cathedrals - Wikipedia
4005
New Zealand - Wikipedia
4006
Gospel of Luke - Wikipedia
4007
List of current Christian leaders - Wikipedia
4008
Estonian Orthodox Church of the Moscow Pat

Classical tradition - Wikipedia
4212
20th-century Western painting - Wikipedia
4213
Western dress codes - Wikipedia
4214
Western religions - Wikipedia
4215
Eurosphere - Wikipedia
4216
Chrismation - Wikipedia
4217
Synod of Jassy - Wikipedia
4218
Western law - Wikipedia
4219
Republic of Ireland - Wikipedia
4220
Evangelical Orthodox Church - Wikipedia
4221
Template:Western world - Wikipedia
4222
Template talk:Western world - Wikipedia
4223
Modern history - Wikipedia
4224
File:Parthenon-Restoration-Nov-2005-a.jpg - Wikipedia
4225
File:Arco de Septimio Severo Roma 02.jpg - Wikipedia
4226
File:Church of the Holy Seplica - 1216564651.jpg - Wikipedia
4227
Western media - Wikipedia
4228
Western culture - Wikipedia
4229
Europeanisation - Wikipedia
4230
Pan-European identity - Wikipedia
4231
Free World - Wikipedia
4232
Judeo-Christian ethics - Wikipedia
4233
Balkans - Wikipedia
4234
Time (magazine) - Wikipedia
4235
Luxembourg - Wikipedia
4236
Great Lent - Wikipedia
4237
Polish Orthodox Church - W

Bioethics - Wikipedia
4429
Cambodia - Wikipedia
4430
North Macedonia - Wikipedia
4431
Ethnic group - Wikipedia
4432
Halakha - Wikipedia
4433
Devil in Christianity - Wikipedia
4434
Köppen climate classification - Wikipedia
4435
Pew Research Center - Wikipedia
4436
Augustus - Wikipedia
4437
Category:Articles with Italian-language external links - Wikipedia
4438
Angel - Wikipedia
4439
Tajikistan - Wikipedia
4440
State of Palestine - Wikipedia
4441
Sudan - Wikipedia
4442
Northern Cyprus - Wikipedia
4443
Muslim world - Wikipedia
4444
Byzantine Empire - Wikipedia
4445
Kosovo - Wikipedia
4446
Turkmenistan - Wikipedia
4447
Theodosius I - Wikipedia
4448
Mongolia - Wikipedia
4449
Akrotiri and Dhekelia - Wikipedia
4450
Divine Comedy - Wikipedia
4451
Rationality - Wikipedia
4452
Phoenicia - Wikipedia
4453
First Bulgarian Empire - Wikipedia
4454
Iron Age - Wikipedia
4455
Help:Media - Wikipedia
4456
Smyrna - Wikipedia
4457
World Trade Organization - Wikipedia
4458
Ātman (Hinduism) - Wikipedia
4459
L

Antigua and Barbuda - Wikipedia
4676
Christian meditation - Wikipedia
4677
California - Wikipedia
4678
Palestine (region) - Wikipedia
4679
Niue - Wikipedia
4680
Saint Helena, Ascension and Tristan da Cunha - Wikipedia
4681
Pitcairn Islands - Wikipedia
4682
Congregation for Institutes of Consecrated Life and Societies of Apostolic Life - Wikipedia
4683
Tokelau - Wikipedia
4684
Bermuda - Wikipedia
4685
Gregorian Reform - Wikipedia
4686
Mysterii Paschalis - Wikipedia
4687
Code of Canons of the Eastern Churches - Wikipedia
4688
Code of Rubrics - Wikipedia
4689
Guatemala - Wikipedia
4690
Category:Articles containing Spanish-language text - Wikipedia
4691
Puerto Rico - Wikipedia
4692
Apostolic Constitutions - Wikipedia
4693
General Instruction of the Roman Missal - Wikipedia
4694
Altar in the Catholic Church - Wikipedia
4695
Benefice - Wikipedia
4696
Rector (ecclesiastical) - Wikipedia
4697
Tra le sollecitudini - Wikipedia
4698
Abstemius - Wikipedia
4699
Al-Andalus - Wikipedia
4700
Danube - 

Canon penitentiary - Wikipedia
4892
Sacramentum Poenitentiae - Wikipedia
4893
Mass stipend - Wikipedia
4894
Sacramental - Wikipedia
4895
Indulgentiarum Doctrina - Wikipedia
4896
Oratory (worship) - Wikipedia
4897
Banns of marriage - Wikipedia
4898
Dignitas connubii - Wikipedia
4899
Vetitum - Wikipedia
4900
Bigamy (canon law) - Wikipedia
4901
Clandestinity (canon law) - Wikipedia
4902
Impediment (canon law) - Wikipedia
4903
Disparity of cult - Wikipedia
4904
Ligamen - Wikipedia
4905
Public propriety - Wikipedia
4906
Dispensation (canon law) - Wikipedia
4907
Validation of marriage - Wikipedia
4908
Diocese - Wikipedia
4909
Aeque principaliter - Wikipedia
4910
Cathedraticum - Wikipedia
4911
In persona episcopi - Wikipedia
4912
Diocesan chancery - Wikipedia
4913
Person (canon law) - Wikipedia
4914
Trusteeism - Wikipedia
4915
Computation of time - Wikipedia
4916
Taxa Innocentiana - Wikipedia
4917
Canonical faculties - Wikipedia
4918
Interpretation (canon law) - Wikipedia
4919
Obreption and s

In [13]:
cur.execute('SELECT title, url, citedCount FROM WikiPages ORDER BY citedCOunt DESC LIMIT 30')

<sqlite3.Cursor at 0x10b37a260>

In [14]:
for row in cur:
    print(row)

('User talk:2601:641:500:5CC0:9026:D008:E47C:BA3B - Wikipedia', '/wiki/Special:MyTalk', 7500)
('User contributions for 2601:641:500:5CC0:9026:D008:E47C:BA3B - Wikipedia', '/wiki/Special:MyContributions', 7500)
('Wikipedia, the free encyclopedia', '/wiki/Main_Page', 7500)
('Portal:Contents - Wikipedia', '/wiki/Portal:Contents', 7500)
('Portal:Featured content - Wikipedia', '/wiki/Portal:Featured_content', 7500)
('Portal:Current events - Wikipedia', '/wiki/Portal:Current_events', 7500)
('Speed limits in Romania - Wikipedia', '/wiki/Special:Random', 7500)
('Help:Contents - Wikipedia', '/wiki/Help:Contents', 7500)
('Wikipedia:About - Wikipedia', '/wiki/Wikipedia:About', 7500)
('Wikipedia:Community portal - Wikipedia', '/wiki/Wikipedia:Community_portal', 7500)
('Recent changes - Wikipedia', '/wiki/Special:RecentChanges', 7500)
('Wikipedia:File Upload Wizard - Wikipedia', '/wiki/Wikipedia:File_Upload_Wizard', 7500)
('Special pages - Wikipedia', '/wiki/Special:SpecialPages', 7500)
('Wikipedia

In [15]:
# Get all cited urls from the given source
def get_cited_urls_from_source(cur, from_url):
    cur.execute("SELECT id FROM WikiPages WHERE URL = ? LIMIT 1", (from_url,))
    try:
        source = cur.fetchone()[0]
    except:
        print("The given url is not found!")
        return []
    res = []
    cur.execute("""
                SELECT URL, id from WikiPages JOIN Cites ON WikiPages.id = Cites.to_id
                WHERE Cites.from_id = ?
                """, (source, ))
    for row in cur:
        res.append(row)
    print("The given page cites %d different internal pages." % (len(res)))
    return res

# Get all urls that cite the given destination
def get_citing_urls_to_destination(cur, to_url):
    cur.execute("SELECT id FROM WikiPages WHERE URL = ? LIMIT 1", (to_url, ))
    try:
        dest = cur.fetchone()[0]
    except:
        print("The given url is not found!")
        return []
    res = []
    cur.execute("""
                SELECT URL, id from WikiPages JOIN Cites ON WikiPages.id = Cites.from_id
                WHERE Cites.to_id = ?
                """, (dest, ))
    for row in cur:
        res.append(row)
    print("There are %d pages found that cites the given link" % (len(res)))
    return res

In [16]:
urls = get_cited_urls_from_source(cur, "/wiki/Toeplitz_operator")
for i in range(10):
    print(urls[i])

The given page cites 69 different internal pages.
('/wiki/Mathematical_analysis', 18)
('/wiki/Continuous_function', 244)
('/wiki/International_Standard_Book_Number', 401)
('/wiki/Help:Category', 490)
('/wiki/Special:MyTalk', 517)
('/wiki/Special:MyContributions', 518)
('/wiki/Main_Page', 524)
('/wiki/Portal:Contents', 525)
('/wiki/Portal:Featured_content', 526)
('/wiki/Portal:Current_events', 527)


In [17]:
urls = get_citing_urls_to_destination(cur, "/wiki/Mathematics")
for i in range(10):
    print(urls[i])

There are 499 pages found that cites the given link
('/wiki/Mathematics_(disambiguation)', 3)
('/wiki/Math_(disambiguation)', 4)
('/wiki/File:Euclid.jpg', 5)
('/wiki/Euclid', 6)
('/wiki/Mathematical_structure', 13)
('/wiki/Algebra', 14)
('/wiki/Geometry', 16)
('/wiki/Calculus', 17)
('/wiki/Mathematical_analysis', 18)
('/wiki/Definition', 19)


In [18]:
def get_most_cites(cur, num = 20):
    cur.execute('SELECT URL, title, citedCount FROM WikiPages ORDER BY citedCount DESC LIMIT ?', (num, ))
    res = []
    try:
        for row in cur:
            res.append(row)
    except:
        print('No available links in WikiPages')
    return res
    

In [19]:
urls = get_most_cites(cur, 30)
for row in urls:
    print(row)

('/wiki/Special:MyTalk', 'User talk:2601:641:500:5CC0:9026:D008:E47C:BA3B - Wikipedia', 7500)
('/wiki/Special:MyContributions', 'User contributions for 2601:641:500:5CC0:9026:D008:E47C:BA3B - Wikipedia', 7500)
('/wiki/Main_Page', 'Wikipedia, the free encyclopedia', 7500)
('/wiki/Portal:Contents', 'Portal:Contents - Wikipedia', 7500)
('/wiki/Portal:Featured_content', 'Portal:Featured content - Wikipedia', 7500)
('/wiki/Portal:Current_events', 'Portal:Current events - Wikipedia', 7500)
('/wiki/Special:Random', 'Speed limits in Romania - Wikipedia', 7500)
('/wiki/Help:Contents', 'Help:Contents - Wikipedia', 7500)
('/wiki/Wikipedia:About', 'Wikipedia:About - Wikipedia', 7500)
('/wiki/Wikipedia:Community_portal', 'Wikipedia:Community portal - Wikipedia', 7500)
('/wiki/Special:RecentChanges', 'Recent changes - Wikipedia', 7500)
('/wiki/Wikipedia:File_Upload_Wizard', 'Wikipedia:File Upload Wizard - Wikipedia', 7500)
('/wiki/Special:SpecialPages', 'Special pages - Wikipedia', 7500)
('/wiki/Wik

In [11]:
def get_title_with_keyword(keyword, cur):
    cur.execute("SELECT URL, title FROM WikiPages WHERE title LIKE ?", ('%'+keyword+'%', ))
    res = []
    try:
        for row in cur:
            res.append(row)
        print("There are %d titles with given keyword." % (len(res)))
    except:
        print("No such title")
    return res

In [21]:
urls = get_title_with_keyword("Operator", cur)
for row in urls:
    print(row)

There are 16 titles with given keyword.
('/wiki/Operator_theory', 'Operator theory - Wikipedia')
('/wiki/Compact_operator', 'Compact operator - Wikipedia')
('/wiki/Multiplication_operator', 'Multiplication operator - Wikipedia')
('/wiki/Toeplitz_operator', 'Toeplitz operator - Wikipedia')
('/wiki/Dilation_(operator_theory)', 'Dilation (operator theory) - Wikipedia')
('/w/index.php?title=Toeplitz_operator&action=edit&section=1', 'Editing Toeplitz operator (section) - Wikipedia')
('/w/index.php?title=Toeplitz_operator&action=edit&section=2', 'Editing Toeplitz operator (section) - Wikipedia')
('/w/index.php?title=Toeplitz_operator&action=edit&section=3', 'Editing Toeplitz operator (section) - Wikipedia')
('/wiki/Category:Operator_theory', 'Category:Operator theory - Wikipedia')
('/wiki/Talk:Toeplitz_operator', 'Talk:Toeplitz operator - Wikipedia')
('/w/index.php?title=Toeplitz_operator&action=edit', 'Editing Toeplitz operator - Wikipedia')
('/w/index.php?title=Toeplitz_operator&action=his

In [14]:
def retrived_page_ratio(cur):
    cur.execute('SELECT COUNT(1), retrived FROM WikiPages GROUP BY retrived')
    uncrawled, crawled = cur
    print("Total number of pages in the dataset: ", uncrawled[0]+crawled[0])
    # print("Total number of pages crawled: ", crawled[0])
    print("Crawled ratio: %5.3f" % (crawled[0]/(uncrawled[0]+crawled[0])))

In [15]:
retrived_page_ratio(cur)

Total number of pages in the dataset:  896481
Crawled ratio: 0.008
