In [28]:
import requests
import re
import pandas as pd
from urllib.parse import quote

In [29]:
# Define a user agent string
user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'

# Specify the headers with the user agent
headers = {
    'User-Agent': user_agent,
    'accept': 'application/json'
}


In [30]:
#SUMMARY OF THE PAGE 

# URL for the Wikimedia REST API to get information about a page
url = f'https://en.wikipedia.org/api/rest_v1/page/summary/absurdism'

# Making a GET request
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Print the response content
    data = response.json()
    print(f"Title: {data['title']}")
    print(f"Description: {data['description']}")
    print(f"Extract: {data['extract']}")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")
    print(response.text)

Title: Absurdism
Description: Theory that life in general is meaningless
Extract: Absurdism is the philosophical theory that the universe is irrational and meaningless. It states that trying to find meaning leads people into a conflict with the world. This conflict can be between rational man and an irrational universe, between intention and outcome, or between subjective assessment and objective worth, but the precise definition of the term is disputed. Absurdism claims that the world as a whole is absurd. It differs in this regard from the less global thesis that some particular situations, persons, or phases in life are absurd.


In [31]:
#PAGEVIEW TEST FOR ONE
page_title= 'Absolute (philosophy)'

url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{page_title}/daily/2020031500/2020041500'


# Making a GET request
response_views = requests.get(url,headers=headers)

# Check if the request was successful (status code 200)
if response_views.status_code == 200:
    # Print the response content
    data = response_views.json()
else: 
    # Print an error message if the request was unsuccessful
    print(f"Error: {response_views.status_code}")
    print(page_title)

# Extract the 'items' list from the data
items_list = data['items']
# Create a DataFrame
df = pd.DataFrame(items_list)
display(df)

Unnamed: 0,project,article,granularity,timestamp,access,agent,views
0,en.wikipedia,Absolute_(philosophy),daily,2020031500,all-access,all-agents,244
1,en.wikipedia,Absolute_(philosophy),daily,2020031600,all-access,all-agents,260
2,en.wikipedia,Absolute_(philosophy),daily,2020031700,all-access,all-agents,262
3,en.wikipedia,Absolute_(philosophy),daily,2020031800,all-access,all-agents,261
4,en.wikipedia,Absolute_(philosophy),daily,2020031900,all-access,all-agents,235
5,en.wikipedia,Absolute_(philosophy),daily,2020032000,all-access,all-agents,228
6,en.wikipedia,Absolute_(philosophy),daily,2020032100,all-access,all-agents,249
7,en.wikipedia,Absolute_(philosophy),daily,2020032200,all-access,all-agents,280
8,en.wikipedia,Absolute_(philosophy),daily,2020032300,all-access,all-agents,258
9,en.wikipedia,Absolute_(philosophy),daily,2020032400,all-access,all-agents,268


In [32]:
#GET THE LIST OF PHILOSOPHIES PAGES from page list of philo (but only 5 languages)

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'List_of_philosophies',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# Make the API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
        
        # Use a regular expression to find section titles
        section_titles = re.findall(r'\[\[([^|\]]+)(?:\|[^]]+)?\]\]', content)
        
        while section_titles[0][0]!='A' or section_titles[-1][0]!='Z':
            if section_titles[0][0]!='A':
                section_titles.pop(0)
            if section_titles[-1][0]!='Z':
                section_titles.pop(-1)

        # Display the list of section titles
        #print("List of Section Titles:")
        print(section_titles)
    else:
        print("No revision content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")
    #print(response.text)


['Absurdism', 'Philosophy of action', 'Actual idealism', 'Actualism', 'Advaita Vedanta', 'Aesthetic Realism', 'Aesthetics', 'African philosophy', 'Afrocentrism', 'Agential realism', 'Agnosticism', 'Agnostic theism', 'American philosophy', 'Anarchy', 'Animism', 'Antinatalism', 'Antinomianism', 'Anti-psychiatry', 'Anti-realism', 'Antireductionism', 'Analytic philosophy', 'Anarchism', 'Ancient philosophy', 'Anthropocentrism', 'Anomalous monism', 'Applied ethics', 'Philosophy of archaeology', 'Aristotelianism', 'Philosophy of Arithmetic', 'Philosophy of art', 'Philosophy of artificial intelligence', 'Asceticism', 'Atheism', 'Authoritarianism', 'Averroism', 'Avicennism', 'Axiology', 'Baptism', 'Baptists', 'Behaviorism', 'Bayesianism', 'Bioconservatism', 'Philosophy of biology', 'Biosophy', 'Buddhist philosophy', 'Philosophy of business', 'Cartesianism', 'Categorical imperative', 'Charvaka', 'School of Naturalists', 'Neoplatonism and Christianity', 'Capitalism', 'The Philosophy of Chance', '

In [33]:
#GET THE LIST OF PHILOSOPHIES PAGES With GLOSSARY  pages because in more languages 

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'Glossary_of_philosophy',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# Make the API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
    else:
        print("No revision content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")
    

#Avoid problem of double description of terms (part before | = page title ; after | = philosophy name)
term_matches = re.findall(r'{{term\|\[\[([^|\]]+)(?:\|[^]]+)?\]\]\}}', content)
# Display the list of extracted terms
print("List of Terms:")
print(term_matches)

List of Terms:
['Absolute (philosophy)', 'absurdism', 'Tychism', 'acosmism', 'aestheticism', 'agnostic atheism', 'agnostic theism', 'agnosticism', 'altruism', 'anarchism', 'anarcho-capitalism', 'anarcho-primitivism', 'anarcho-syndicalism', 'animism', 'anthropocentrism', 'anthropomorphism', 'antinatalism', 'Antinomianism', 'anti-realism', 'Aristotelianism', 'Arminianism', 'asceticism', 'ascriptivism', 'associationalism', 'atheism', 'atomism', 'authoritarianism', 'Surrealist automatism', 'behavioralism', 'behaviorism', 'biologism', 'Buddhism', 'capitalism', 'careerism', 'Cartesianism', 'Christian existential humanism', 'Christian existentialism', 'Christian humanism', 'Christian materialism', 'Christianity', 'classical theism', 'Classicism', 'Cognitivism (ethics)', 'coherentism', 'Colbertism', 'Collectivism and individualism', 'Communal (disambiguation)', 'communism', 'communitarianism', 'Compatibilism and incompatibilism', 'Comtism', 'conceptualism', 'Consciousness', 'Confucianism', 'co

In [34]:
capitalized_list = [term.capitalize() for term in term_matches]

# Display the list of capitalized terms
print("Capitalized Terms:")
print(capitalized_list)


Capitalized Terms:
['Absolute (philosophy)', 'Absurdism', 'Tychism', 'Acosmism', 'Aestheticism', 'Agnostic atheism', 'Agnostic theism', 'Agnosticism', 'Altruism', 'Anarchism', 'Anarcho-capitalism', 'Anarcho-primitivism', 'Anarcho-syndicalism', 'Animism', 'Anthropocentrism', 'Anthropomorphism', 'Antinatalism', 'Antinomianism', 'Anti-realism', 'Aristotelianism', 'Arminianism', 'Asceticism', 'Ascriptivism', 'Associationalism', 'Atheism', 'Atomism', 'Authoritarianism', 'Surrealist automatism', 'Behavioralism', 'Behaviorism', 'Biologism', 'Buddhism', 'Capitalism', 'Careerism', 'Cartesianism', 'Christian existential humanism', 'Christian existentialism', 'Christian humanism', 'Christian materialism', 'Christianity', 'Classical theism', 'Classicism', 'Cognitivism (ethics)', 'Coherentism', 'Colbertism', 'Collectivism and individualism', 'Communal (disambiguation)', 'Communism', 'Communitarianism', 'Compatibilism and incompatibilism', 'Comtism', 'Conceptualism', 'Consciousness', 'Confucianism',

In [35]:
philo_views_en=pd.DataFrame()

#PAGEVIEWS 
for page_title in capitalized_list: 
    # URL for the Wikimedia Pagecounts API to get the number of views for a page
    url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{page_title}/daily/2020031529/2020041515'

    # Define a user agent to have acces to the API 
    user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'

    # Specify the headers with the user agent
    headers = {
        'User-Agent': user_agent,
        'accept': 'application/json'
    }

    # Making a GET request
    response_views = requests.get(url,headers=headers)

    # Check if the request was successful (status code 200)
    if response_views.status_code == 200:
        # Print the response content
        data = response_views.json()
    else:
        # Print an error message if the request was unsuccessful
        print(f"Error: {response_views.status_code}")
        print(page_title)


    # Extract the 'items' list from the data
    items_list = data['items']

    # Create a DataFrame
    df = pd.DataFrame(items_list)
    philo_views_en = pd.concat([philo_views_en,df])

Error: 404
Collectivism and individualism
Error: 404
Communal (disambiguation)
Error: 404
Wikt:egoism
Error: 404
Global estate
Error: 404
Philosophy of søren kierkegaard
Error: 404
Legalism (western philosophy)
Error: 404
New confucianism
Error: 404
Objectivism (ayn rand)
Error: 404
Orthocracy


In [37]:
display(philo_views_en)

Unnamed: 0,project,article,granularity,timestamp,access,agent,views
0,en.wikipedia,Absolute_(philosophy),daily,2020031500,all-access,all-agents,244
1,en.wikipedia,Absolute_(philosophy),daily,2020031600,all-access,all-agents,260
2,en.wikipedia,Absolute_(philosophy),daily,2020031700,all-access,all-agents,262
3,en.wikipedia,Absolute_(philosophy),daily,2020031800,all-access,all-agents,261
4,en.wikipedia,Absolute_(philosophy),daily,2020031900,all-access,all-agents,235
...,...,...,...,...,...,...,...
27,en.wikipedia,Zoroastrianism,daily,2020041100,all-access,all-agents,8265
28,en.wikipedia,Zoroastrianism,daily,2020041200,all-access,all-agents,8640
29,en.wikipedia,Zoroastrianism,daily,2020041300,all-access,all-agents,7657
30,en.wikipedia,Zoroastrianism,daily,2020041400,all-access,all-agents,7300
