In [18]:
import requests
import re
import pandas as pd
from urllib.parse import unquote, urlsplit 
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_rows', 500)

In [19]:
# Define a user agent string
user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

# Specify the headers with the user agent
headers = {
    'User-Agent': user_agent,
    'accept': 'application/json'
}


## RETRIEVE LIST OF PHILOSOPHIES IN **ENGLISH**

In [20]:
#GET THE PHILOSOPHIES FROM THE PAGE : List of philosophies

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'List_of_philosophies',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
        
        # Use a regular expression to find section titles
        section_titles = re.findall(r'\[\[([^|\]]+)(?:\|[^]]+)?\]\]', content)
        
        while section_titles[0][0]!='A' or section_titles[-1][0]!='Z':
            if section_titles[0][0]!='A':
                section_titles.pop(0)
            if section_titles[-1][0]!='Z':
                section_titles.pop(-1)

        #print(section_titles)
    else:
        print("No content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")


In [21]:
np.save('data/section_titles.npy', section_titles)

### DATAFRAME OF PHILOSOPHIES IN ENGLISH AND VIEWS (1 COLUMN ARTICLE)

In [7]:
philo_views_en=pd.DataFrame()

#PAGEVIEWS 
for page_title in section_titles: 
    # URL for the Wikimedia Pagecounts API to get the number of views for a page
    url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{page_title}/daily/2019060100/2021010100'

    # Define a user agent to have acces to the API 
    user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

    # Specify the headers with the user agent
    headers = {
        'User-Agent': user_agent,
        'accept': 'application/json'
    }

    # Making a GET request
    response_views = requests.get(url,headers=headers)

    # Check if the request was successful (status code 200)
    if response_views.status_code == 200:
        # Print the response content
        data = response_views.json()
    else:
        # Print an error message if the request was unsuccessful
        print(f"Error: {response_views.status_code}")
        print(page_title)


    # Extract the 'items' list from the data
    items_list = data['items']

    # Create a DataFrame
    df = pd.DataFrame(items_list)
    philo_views_en = pd.concat([philo_views_en,df])

Error: 404
Nonduality (spirituality)
Error: 404
Xenofeminism


In [8]:
#display(philo_views_en)
columns_to_remove = ['granularity','access','agent']
philo_views_en.drop(columns=columns_to_remove,axis=1,inplace=True)
philo_views_en.to_csv('data/Philo_en.csv',index=False)

# Get the views for the diffrent languages

In [9]:
target_languages = ['fr', 'ja', 'de', 'it','da','nl','no','sr','sv','ko','fi'] 

In [70]:
def get_url_list(languages_list, page_titles):
    api_url = "https://en.wikipedia.org/w/api.php"
    language_links = []

    for page in page_titles : 

    # Make separate requests for each language
        for lang in target_languages:
            params = {
                'action': 'query',
                'titles': page,
                'prop': 'langlinks',
                'llprop': 'url',
                'format': 'json',
                'lllang': lang,
            }

            # Make the API request
            response = requests.get(api_url, params=params)
            data = response.json()

            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                # Extract language links from the API response
                pages = data['query']['pages']
                page_id = next(iter(pages))
                langlinks = pages[page_id].get('langlinks', [])
                for link in langlinks:
                    language_links.append( [link['url'],page])
            else:
                print(f"Error for language {lang}: {response.status_code}")
    return language_links
        
        

In [72]:
def get_page_views_by_languages(links,page_titles):
    philo_views=pd.DataFrame()
    #i=0
    #index_subject=0
    for link in links:
        # Define a user agent to have acces to the API 
        user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'
        # Specify the headers with the user agent
        headers = {
            'User-Agent': user_agent,
            'accept': 'application/json'
        
        }
        # Find country code:
        code = urlsplit(link[0]).hostname.split('.')[0]

        path = unquote(urlsplit(link[0]).path)
        # Use a regular expression to find the title part
        match = re.search(r'/wiki/(.+)', path)
        title = match.group(1)
        subject=link[1]

        #print(link,title,code,subject)
        url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/{code}.wikipedia.org/all-access/all-agents/{title}/daily/2019060100/2021010100'
        # Making a GET request
        response_views = requests.get(url,headers=headers)

        # Check if the request was successful (status code 200)
        if response_views.status_code == 200:
            # Print the response content
            data = response_views.json()
            # Extract the 'items' list from the data
            items_list = data['items']
            #print(items_list)
            # Create a DataFrame
            df = pd.DataFrame(items_list)
            df["subject"]=subject
            df["code"] =code
            #print(df)
            philo_views = pd.concat([philo_views,df])
        else:
            #Print an error message if the request was unsuccessful
            print(f"Error: {response_views.status_code}")
            print(title)


        #i+=1
        #if i==11 :
        #    index_subject+=1
        #    i=0
    return philo_views

In [71]:
language_links = get_url_list(target_languages,section_titles)

In [65]:
np.save('data/languages_links.npy', language_links)

In [73]:
Views_all_lang=get_page_views_by_languages(language_links,section_titles)

Error: 404
Philosophie_des_Absurden
Error: 404
Idéalisme_actuel
Error: 404
実在論的観念論
Error: 404
美的現実主義
Error: 404
アフリカーナ哲学
Error: 404
Afrocentrism
Error: 404
Amerikanische_Philosophie
Error: 404
Antinatalisme
Error: 404
반정신의학
Error: 404
Monisme_anomal
Error: 404
Averoizam
Error: 404
Avicennismo
Error: 404
아비센나주의
Error: 404
Biosophie
Error: 404
Čarvaka
Error: 404
新プラトン主義とキリスト教
Error: 404
Philosophie_des_Zufalls
Error: 404
キリスト教実存主義
Error: 404
기독교적_실존주의
Error: 404
認知主義
Error: 404
Communautarisme_(concept_politique)
Error: 404
Holisme_de_confirmation
Error: 404
Kosmisk_skräck
Error: 404
Kosminen_kauhu
Error: 404
Dekonstruksjon_(filosofi)
Error: 404
Negazionismo_scientifico
Error: 404
의무론
Error: 404
Écocentrisme
Error: 404
제거적_유물론
Error: 404
情緒主義
Error: 404
Vestlig_esoterik
Error: 404
Њемачки_идеализам
Error: 404
Tysk_filosofi
Error: 404
Istoricizam
Error: 404
Hongaku
Error: 404
Umanesimo_(filosofia)
Error: 404
Illuminazionismo
Error: 404
Logica_informale
Error: 404
直観主義_(数学の哲学)
Error: 404
P

In [74]:
print(Views_all_lang)

          project       article granularity   timestamp      access  \
0    fr.wikipedia       Absurde       daily  2019060100  all-access   
1    fr.wikipedia       Absurde       daily  2019060200  all-access   
2    fr.wikipedia       Absurde       daily  2019060300  all-access   
3    fr.wikipedia       Absurde       daily  2019060400  all-access   
4    fr.wikipedia       Absurde       daily  2019060500  all-access   
..            ...           ...         ...         ...         ...   
472  fi.wikipedia  Zurvalaisuus       daily  2020122800  all-access   
473  fi.wikipedia  Zurvalaisuus       daily  2020122900  all-access   
474  fi.wikipedia  Zurvalaisuus       daily  2020123000  all-access   
475  fi.wikipedia  Zurvalaisuus       daily  2020123100  all-access   
476  fi.wikipedia  Zurvalaisuus       daily  2021010100  all-access   

          agent  views    subject code  
0    all-agents    269  Absurdism   fr  
1    all-agents    311  Absurdism   fr  
2    all-agents    385  

In [76]:
#finding total visits over 5 years:
#Total_per_lang=Views_all_lang.groupby(['article','code','subject']).sum('views').sort_values(['subject','views'])
#Total_per_lang.head(500)
columns_to_remove = ['granularity','access','agent']
Views_all_lang.drop(columns=columns_to_remove,inplace=True)
Views_all_lang.to_csv('data/Philos_lang.csv',index=False)

# DETERMINE BROADER TOPICS 

In [29]:
#More accurate way to classifiy philosophies...to be completed 

philosophies = [
    # List all the philosophies you provided
    'Absolute (philosophy)', 'Absurdism', 'Tychism', 'Acosmism', 'Aestheticism', 'Agnostic atheism', 'Agnostic theism', 'Agnosticism', 'Altruism', 'Anarchism', # ... (remaining philosophies)
]

# Define the class hierarchy using dictionaries
classifications = {
    'Ontological and Metaphysical Orientations': {
        'Metaphysical Theories': [
            'Absolute (philosophy)', 'Acosmism', 'Idealism', 'Materialism', 'Monism and Dualism'
        ],
        'Ontological Views': [
            'Pantheism', 'Realism and Nominalism', 'Substance Dualism'
        ]
    },
    'Epistemological Frameworks': {
        'Approaches to Knowledge': [
            'Empiricism', 'Rationalism', 'Skepticism'
        ],
        'Epistemological Theories': [
            'Constructivist Epistemology', 'Pragmatism', 'Reliabilism'
        ]
    },
    'Ethical and Moral Philosophies': {
    'Ethical Theories': [
        'Absolutism and Relativism', 'Deontological Ethics', 'Utilitarianism'
    ],
    'Moral Views': [
        'Altruism', 'Egoism', 'Moral Realism'
    ]
    },
    
}

# Function to find the class/subclass for a philosophy
def find_classification(philosophy):
    for classification, subclasses in classifications.items():
        for subclass, philosophies in subclasses.items():
            if philosophy in philosophies:
                return classification, subclass
    return "Not Classified", "Not Classified"

# Loop through all philosophies and print their classifications
#for philosophy in philosophies:
#    classification, subclass = find_classification(philosophy)
#    print(f"{philosophy} -> Class: {classification}, Subclass: {subclass}")


In [30]:
#Construct broaders philosophy topics 
#Create the new lists
metaphysics_and_ontology = ['Absolute (philosophy)', 'Acosmism', 'Animism', 'Atomism', 'Dualism (Mind-body dualism, Substance dualism)',
                            'Emergent materialism', 'Immaterialism', 'Monism', 'Pantheism']

existentialism_and_absurdism = ['Absurdism', 'Existentialism']# Je pense a split en 2 

epistemology = ['Agnosticism', 'Empiricism', 'Rationalism']

ethics_and_moral_philosophy = ['Altruism', 'Antinatalism', 'Consequentialism', 'Ethical egoism', 'Hedonism', 'Moral absolutism',
                               'Moral realism', 'Moral relativism', 'Moral universalism', 'Utilitarianism']

political_and_social_philosophy = ['Anarchism', 'Authoritarianism', 'Capitalism', 'Communism', 'Libertarianism', 'Socialism', 'Totalitarianism']

philosophy_of_mind = ['Behaviorism', 'Dualism (Mind-body dualism, Substance dualism)', 'Functionalism (philosophy of mind)',
                      'Idealism', 'Materialism', 'Phenomenalism', 'Physicalism', 'Solipsism']

religious_and_theological_philosophy = ['Deism', 'Monotheism', 'Polytheism', 'Religious humanism', 'Theism']

philosophy_of_time = ['Eternalism (philosophy of time)']

philosophy_of_science = ['Empiricism', 'Positivism', 'Scientism']

cultural_and_social_philosophy = ['Aestheticism', 'Anthropocentrism', 'Anthropomorphism', 'Cultural relativism', 'Environmentalism',
                                  'Feminism', 'Humanism', 'Secular humanism']


### DATAFRAME IN **ENGLISH** WITH BROADER TOPICS

In [31]:
# Create the new DataFrame with columns for each topic
broader_philosophy_topics = pd.DataFrame(index=philo_views_en['timestamp'].unique())

# Filter and aggregate views for each philosophy topic
topics = {
    'metaphysics_and_ontology': metaphysics_and_ontology,
    'existentialism_and_absurdism': existentialism_and_absurdism,
    'epistemology': epistemology,
    'ethics_and_moral_philosophy': ethics_and_moral_philosophy,
    'political_and_social_philosophy': political_and_social_philosophy,
    'philosophy_of_mind': philosophy_of_mind,
    'religious_and_theological_philosophy': religious_and_theological_philosophy,
    'philosophy_of_time': philosophy_of_time,
    'philosophy_of_science': philosophy_of_science,
    'cultural_and_social_philosophy': cultural_and_social_philosophy
}

for topic, articles in topics.items():
    topic_views = philo_views_en[philo_views_en['article'].isin(articles)]
    topic_views = topic_views.groupby('timestamp')['views'].sum()
    broader_philosophy_topics[topic] = topic_views

# Now, broader_philosophy_topics contains aggregated views for each philosophy topic in one DataFrame

#Drop last month of data because not complete
    if broader_philosophy_topics.index[-1] == '2023010100' :
        broader_philosophy_topics.drop(broader_philosophy_topics.index[-1], inplace=True)

In [32]:
#display(broader_philosophy_topics)
broader_philosophy_topics.to_csv('data/BroaderTopics.csv', index=True)

In [25]:
dat=pd.read_csv('data/Philos_lang.csv')
dat

Unnamed: 0,project,article,timestamp,views,subject,code
0,fr.wikipedia,Absurde,2018010100,10119,Absurdism,fr
1,fr.wikipedia,Absurde,2018020100,8507,Absurdism,fr
2,fr.wikipedia,Absurde,2018030100,9485,Absurdism,fr
3,fr.wikipedia,Absurde,2018040100,9772,Absurdism,fr
4,fr.wikipedia,Absurde,2018050100,11073,Absurdism,fr
...,...,...,...,...,...,...
154616,fi.wikipedia,Zurvalaisuus,2022080100,64,Naïve realism,fi
154617,fi.wikipedia,Zurvalaisuus,2022090100,51,Naïve realism,fi
154618,fi.wikipedia,Zurvalaisuus,2022100100,33,Naïve realism,fi
154619,fi.wikipedia,Zurvalaisuus,2022110100,36,Naïve realism,fi


In [26]:
article_counts = dat.groupby('subject')['code'].nunique()
articles_to_keep = article_counts[article_counts == dat['code'].nunique()].index.tolist()
views_lang_filtered = dat[dat['subject'].isin(articles_to_keep)]
views_lang_filtered


Unnamed: 0,project,article,timestamp,views,subject,code
0,fr.wikipedia,Absurde,2018010100,10119,Absurdism,fr
1,fr.wikipedia,Absurde,2018020100,8507,Absurdism,fr
2,fr.wikipedia,Absurde,2018030100,9485,Absurdism,fr
3,fr.wikipedia,Absurde,2018040100,9772,Absurdism,fr
4,fr.wikipedia,Absurde,2018050100,11073,Absurdism,fr
...,...,...,...,...,...,...
153947,sr.wikipedia,Зороастризам,2022090100,1204,Mysticism,sr
153948,sr.wikipedia,Зороастризам,2022100100,1278,Mysticism,sr
153949,sr.wikipedia,Зороастризам,2022110100,1361,Mysticism,sr
153950,sr.wikipedia,Зороастризам,2022120100,1455,Mysticism,sr


In [None]:
dat.groupby('project','code')['article'].count()

In [38]:
article_counts
sort = article_counts[article_counts.isin([9,10])].index.tolist()

In [39]:
len(sort)

99

In [51]:
miss = []
for s in sort :
    miss.append([c for c in target_languages if c not in dat[dat['subject']==s]['code'].unique()])

In [52]:
miss

[['ko', 'fi'],
 ['da', 'ko'],
 ['de'],
 ['no', 'fi'],
 ['no'],
 ['ja', 'it'],
 ['da'],
 ['fi'],
 ['da'],
 ['sr', 'sv'],
 ['no'],
 ['ko'],
 ['ja', 'sr'],
 ['nl', 'no'],
 ['ja', 'da'],
 ['nl'],
 ['da'],
 ['ja', 'no'],
 ['no', 'fi'],
 ['it'],
 ['sr'],
 ['da'],
 ['no', 'sv'],
 ['ja'],
 ['sr', 'ko'],
 ['it'],
 ['sv', 'fi'],
 ['da', 'nl'],
 ['fi'],
 ['ja', 'da'],
 ['fr'],
 ['fi'],
 ['no', 'sr'],
 ['fi'],
 ['da'],
 ['it'],
 ['sr'],
 ['it', 'da'],
 ['da', 'no'],
 ['it', 'da'],
 ['no'],
 ['fr'],
 ['it', 'da'],
 ['ja'],
 ['sr'],
 ['da', 'ko'],
 ['fi'],
 ['de'],
 ['fr'],
 ['da', 'ko'],
 ['ja', 'no'],
 ['da'],
 ['no', 'sr'],
 ['sr', 'sv'],
 ['da'],
 ['it', 'da'],
 ['ja', 'no'],
 ['ja', 'no'],
 ['sr'],
 ['it'],
 ['da', 'sv'],
 ['ja', 'de'],
 ['fi'],
 ['ja'],
 ['de', 'it'],
 ['da'],
 ['sv'],
 ['no'],
 ['de', 'no'],
 ['no', 'ko'],
 ['da'],
 ['nl', 'no'],
 ['ja'],
 ['ko', 'fi'],
 ['fi'],
 ['da', 'no'],
 ['fr'],
 ['da', 'sv'],
 ['da', 'sr'],
 ['sr'],
 ['sr', 'sv'],
 ['da'],
 ['no', 'sr'],
 ['it'],
 ['s

In [18]:
len(views_lang_filtered['subject'].unique())

In [22]:
article_to_get = views_lang_filtered['subject'].unique()
article_to_get

array(['Absurdism', 'African philosophy', 'Afrocentrism',
       'American philosophy', 'Animism', 'Anti-realism',
       'Antireductionism', 'Anthropocentrism', 'Anomalous monism',
       'Philosophy of Arithmetic', 'Averroism', 'Avicennism', 'Baptism',
       'Behaviorism', 'Bayesianism', 'Categorical imperative',
       'School of Naturalists', 'Neoplatonism and Christianity',
       'Capitalism', 'The Philosophy of Chance', 'Cognitivism (ethics)',
       'Communism', 'Confucianism', 'Cosmicism', 'Cosmopolitanism',
       'Critical theory', 'Philosophy of culture', 'Czech philosophy',
       'Deconstruction', 'Deism', 'Deontology', 'Philosophy of design',
       'Dialectical materialism', 'Discordianism',
       'Philosophy of education', 'Empiricism',
       'Philosophy of engineering', 'Epiphenomenalism',
       'Epistemological nihilism', 'Epistemology', 'Eudaimonism',
       'Existentialism', 'Externalism', 'Fallibilism', 'Filial piety',
       'Foundationalism', 'Philosophy of 

In [53]:
len(article_to_get)

76