In [2]:
import requests
import re
import pandas as pd
from urllib.parse import quote
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_rows', 500)

In [3]:
# Define a user agent string
user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

# Specify the headers with the user agent
headers = {
    'User-Agent': user_agent,
    'accept': 'application/json'
}


## RETRIEVE LIST OF PHILOSOPHIES IN **ENGLISH**

In [9]:
#GET THE PHILOSOPHIES FROM THE PAGE : List of philosophies

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'List_of_philosophies',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
        
        # Use a regular expression to find section titles
        section_titles = re.findall(r'\[\[([^|\]]+)(?:\|[^]]+)?\]\]', content)
        
        while section_titles[0][0]!='A' or section_titles[-1][0]!='Z':
            if section_titles[0][0]!='A':
                section_titles.pop(0)
            if section_titles[-1][0]!='Z':
                section_titles.pop(-1)

        #print(section_titles)
    else:
        print("No content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")


In [10]:
np.save('data/section_titles.npy', section_titles)

### DATAFRAME OF PHILOSOPHIES IN ENGLISH AND VIEWS (1 COLUMN ARTICLE)

In [12]:
philo_views_en=pd.DataFrame()

#PAGEVIEWS 
for page_title in section_titles: 
    # URL for the Wikimedia Pagecounts API to get the number of views for a page
    url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{page_title}/monthly/2018010100/2023010100'

    # Define a user agent to have acces to the API 
    user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

    # Specify the headers with the user agent
    headers = {
        'User-Agent': user_agent,
        'accept': 'application/json'
    }

    # Making a GET request
    response_views = requests.get(url,headers=headers)

    # Check if the request was successful (status code 200)
    if response_views.status_code == 200:
        # Print the response content
        data = response_views.json()
    else:
        # Print an error message if the request was unsuccessful
        print(f"Error: {response_views.status_code}")
        print(page_title)


    # Extract the 'items' list from the data
    items_list = data['items']

    # Create a DataFrame
    df = pd.DataFrame(items_list)
    philo_views_en = pd.concat([philo_views_en,df])

Error: 404
Nonduality (spirituality)


In [13]:
#display(philo_views_en)
columns_to_remove = ['granularity','access','agent']
philo_views_en.drop(columns=columns_to_remove,axis=1,inplace=True)
philo_views_en.to_csv('data/Philo_en.csv',index=False)

# Get the views for the diffrent languages

In [None]:
target_languages = ['fr', 'ja', 'de', 'it','da','nl','no','sr','sv','ko','fi'] 

In [None]:
def get_url_list(languages_list, page_titles):
    api_url = "https://en.wikipedia.org/w/api.php"
    language_links = []

    for page in page_titles : 

    # Make separate requests for each language
        for lang in target_languages:
            params = {
                'action': 'query',
                'titles': page,
                'prop': 'langlinks',
                'llprop': 'url',
                'format': 'json',
                'lllang': lang,
            }

            # Make the API request
            response = requests.get(api_url, params=params)
            data = response.json()

            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                # Extract language links from the API response
                pages = data['query']['pages']
                page_id = next(iter(pages))
                langlinks = pages[page_id].get('langlinks', [])
                for link in langlinks:
                    language_links.append( link['url'])
            else:
                print(f"Error for language {lang}: {response.status_code}")
    return language_links
        
        

In [None]:
def find_country_code_and_title(link): #finds the country code from a given link, used in  get_page_views_by_languages(links)
    state=0
    code=""
    title=""
    for i in range(len(link)):
        if state==2 and link[i]==".":
            state+=1
        if state==2:
            code+=link[i]
        if state==5:
            title+=link[i]
        if link[i]=="/":
            state+=1
    return code,title

In [None]:
def get_page_views_by_languages(links,page_titles):
    philo_views=pd.DataFrame()
    i=0
    index_subject=0
    for link in links:
        # Define a user agent to have acces to the API 
        user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'
        # Specify the headers with the user agent
        headers = {
            'User-Agent': user_agent,
            'accept': 'application/json'
        
        }
        # Find country code:
        code, title = find_country_code_and_title(link)
        
        subject=page_titles[index_subject]
        url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/{code}.wikipedia.org/all-access/all-agents/{title}/monthly/2018010100/2023010100'
        # Making a GET request
        response_views = requests.get(url,headers=headers)

        # Check if the request was successful (status code 200)
        if response_views.status_code == 200:
            # Print the response content
            data = response_views.json()
        #else:
            # Print an error message if the request was unsuccessful
            #print(f"Error: {response_views.status_code}")
            #print(title)


        # Extract the 'items' list from the data
        items_list = data['items']
        # Create a DataFrame
        df = pd.DataFrame(items_list)
        df["subject"]=subject
        df["code"] =code
        philo_views = pd.concat([philo_views,df])
        i+=1
        if i==11 :
            index_subject+=1
            i=0
    return philo_views

# DETERMINE BROADER TOPICS 

In [None]:
#More accurate way to classifiy philosophies...to be completed 

philosophies = [
    # List all the philosophies you provided
    'Absolute (philosophy)', 'Absurdism', 'Tychism', 'Acosmism', 'Aestheticism', 'Agnostic atheism', 'Agnostic theism', 'Agnosticism', 'Altruism', 'Anarchism', # ... (remaining philosophies)
]

# Define the class hierarchy using dictionaries
classifications = {
    'Ontological and Metaphysical Orientations': {
        'Metaphysical Theories': [
            'Absolute (philosophy)', 'Acosmism', 'Idealism', 'Materialism', 'Monism and Dualism'
        ],
        'Ontological Views': [
            'Pantheism', 'Realism and Nominalism', 'Substance Dualism'
        ]
    },
    'Epistemological Frameworks': {
        'Approaches to Knowledge': [
            'Empiricism', 'Rationalism', 'Skepticism'
        ],
        'Epistemological Theories': [
            'Constructivist Epistemology', 'Pragmatism', 'Reliabilism'
        ]
    },
    'Ethical and Moral Philosophies': {
    'Ethical Theories': [
        'Absolutism and Relativism', 'Deontological Ethics', 'Utilitarianism'
    ],
    'Moral Views': [
        'Altruism', 'Egoism', 'Moral Realism'
    ]
    },
    
}

# Function to find the class/subclass for a philosophy
def find_classification(philosophy):
    for classification, subclasses in classifications.items():
        for subclass, philosophies in subclasses.items():
            if philosophy in philosophies:
                return classification, subclass
    return "Not Classified", "Not Classified"

# Loop through all philosophies and print their classifications
#for philosophy in philosophies:
#    classification, subclass = find_classification(philosophy)
#    print(f"{philosophy} -> Class: {classification}, Subclass: {subclass}")


In [None]:
#Construct broaders philosophy topics 
#Create the new lists
metaphysics_and_ontology = ['Absolute (philosophy)', 'Acosmism', 'Animism', 'Atomism', 'Dualism (Mind-body dualism, Substance dualism)',
                            'Emergent materialism', 'Immaterialism', 'Monism', 'Pantheism']

existentialism_and_absurdism = ['Absurdism', 'Existentialism']# Je pense a split en 2 

epistemology = ['Agnosticism', 'Empiricism', 'Rationalism']

ethics_and_moral_philosophy = ['Altruism', 'Antinatalism', 'Consequentialism', 'Ethical egoism', 'Hedonism', 'Moral absolutism',
                               'Moral realism', 'Moral relativism', 'Moral universalism', 'Utilitarianism']

political_and_social_philosophy = ['Anarchism', 'Authoritarianism', 'Capitalism', 'Communism', 'Libertarianism', 'Socialism', 'Totalitarianism']

philosophy_of_mind = ['Behaviorism', 'Dualism (Mind-body dualism, Substance dualism)', 'Functionalism (philosophy of mind)',
                      'Idealism', 'Materialism', 'Phenomenalism', 'Physicalism', 'Solipsism']

religious_and_theological_philosophy = ['Deism', 'Monotheism', 'Polytheism', 'Religious humanism', 'Theism']

philosophy_of_time = ['Eternalism (philosophy of time)']

philosophy_of_science = ['Empiricism', 'Positivism', 'Scientism']

cultural_and_social_philosophy = ['Aestheticism', 'Anthropocentrism', 'Anthropomorphism', 'Cultural relativism', 'Environmentalism',
                                  'Feminism', 'Humanism', 'Secular humanism']


### DATAFRAME IN **ENGLISH** WITH BROADER TOPICS

In [None]:
# Create the new DataFrame with columns for each topic
broader_philosophy_topics = pd.DataFrame(index=philo_views_en['timestamp'].unique())

# Filter and aggregate views for each philosophy topic
topics = {
    'metaphysics_and_ontology': metaphysics_and_ontology,
    'existentialism_and_absurdism': existentialism_and_absurdism,
    'epistemology': epistemology,
    'ethics_and_moral_philosophy': ethics_and_moral_philosophy,
    'political_and_social_philosophy': political_and_social_philosophy,
    'philosophy_of_mind': philosophy_of_mind,
    'religious_and_theological_philosophy': religious_and_theological_philosophy,
    'philosophy_of_time': philosophy_of_time,
    'philosophy_of_science': philosophy_of_science,
    'cultural_and_social_philosophy': cultural_and_social_philosophy
}

for topic, articles in topics.items():
    topic_views = philo_views_en[philo_views_en['article'].isin(articles)]
    topic_views = topic_views.groupby('timestamp')['views'].sum()
    broader_philosophy_topics[topic] = topic_views

# Now, broader_philosophy_topics contains aggregated views for each philosophy topic in one DataFrame

#Drop last month of data because not complete
    if broader_philosophy_topics.index[-1] == '2023010100' :
        broader_philosophy_topics.drop(broader_philosophy_topics.index[-1], inplace=True)

In [None]:
#display(broader_philosophy_topics)
broader_philosophy_topics.to_csv('data/BroaderTopics_en.csv', index=True)