In [3]:
import useful_functions as f
import requests
import re
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)

In [4]:
# Define a user agent string
user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

# Specify the headers with the user agent
headers = {
    'User-Agent': user_agent,
    'accept': 'application/json'
}


## RETRIEVE LIST OF PHILOSOPHIES IN **ENGLISH**

In [20]:
#GET THE PHILOSOPHIES FROM THE PAGE : List of philosophies

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'List_of_philosophies',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
        
        # Use a regular expression to find section titles
        section_titles = re.findall(r'\[\[([^|\]]+)(?:\|[^]]+)?\]\]', content)
        
        while section_titles[0][0]!='A' or section_titles[-1][0]!='Z':
            if section_titles[0][0]!='A':
                section_titles.pop(0)
            if section_titles[-1][0]!='Z':
                section_titles.pop(-1)

        #print(section_titles)
    else:
        print("No content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")


In [21]:
np.save('data/section_titles.npy', section_titles)

### DATAFRAME OF PHILOSOPHIES IN ENGLISH AND VIEWS (1 COLUMN ARTICLE)

In [32]:
philo_views_en=pd.DataFrame()

#PAGEVIEWS 
for page_title in section_titles: 
    # URL for the Wikimedia Pagecounts API to get the number of views for a page
    url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/all-agents/{page_title}/daily/2019010100/2022010100'

    # Define a user agent to have acces to the API 
    user_agent = 'MandarineCorp (louis.brun@epfl.ch)'

    # Specify the headers with the user agent
    headers = {
        'User-Agent': user_agent,
        'accept': 'application/json'
    }

    # Making a GET request
    response_views = requests.get(url,headers=headers)

    # Check if the request was successful (status code 200)
    if response_views.status_code == 200:
        # Print the response content
        data = response_views.json()
    else:
        # Print an error message if the request was unsuccessful
        print(f"Error: {response_views.status_code}")
        print(page_title)


    # Extract the 'items' list from the data
    items_list = data['items']

    # Create a DataFrame
    df = pd.DataFrame(items_list)
    philo_views_en = pd.concat([philo_views_en,df])

Error: 404
Nonduality (spirituality)


In [33]:
#display(philo_views_en)
columns_to_remove = ['granularity','access','agent']
philo_views_en.drop(columns=columns_to_remove,axis=1,inplace=True)
philo_views_en.to_csv('data/Philo_en.csv',index=False)

# Get the views for the diffrent languages

In [None]:
def get_url_list(target_languages, page_titles):
    api_url = "https://en.wikipedia.org/w/api.php"
    language_links = []

    for page in page_titles : 

    # Make separate requests for each language
        for lang in target_languages:
            params = {
                'action': 'query',
                'titles': page,
                'prop': 'langlinks',
                'llprop': 'url',
                'format': 'json',
                'lllang': lang,
            }

            # Make the API request
            response = requests.get(api_url, params=params)
            data = response.json()

            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                # Extract language links from the API response
                pages = data['query']['pages']
                page_id = next(iter(pages))
                langlinks = pages[page_id].get('langlinks', [])
                for link in langlinks:
                    language_links.append( [link['url'],page])
            else:
                print(f"Error for language {lang}: {response.status_code}")
    return language_links
        

def get_page_views_by_languages(links,page_titles):
    philo_views=pd.DataFrame()

    for link in links:
        # Define a user agent to have acces to the API 
        user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'
        # Specify the headers with the user agent
        headers = {
            'User-Agent': user_agent,
            'accept': 'application/json'
        
        }
        # Find country code:
        code = urlsplit(link[0]).hostname.split('.')[0]

        path = unquote(urlsplit(link[0]).path)
        # Use a regular expression to find the title part
        match = re.search(r'/wiki/(.+)', path)
        title = match.group(1)
        subject=link[1]

        #print(link,title,code,subject)
        url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/{code}.wikipedia.org/all-access/all-agents/{title}/daily/2019010100/2022010100'
        # Making a GET request
        response_views = requests.get(url,headers=headers)

        # Check if the request was successful (status code 200)
        if response_views.status_code == 200:
            # Print the response content
            data = response_views.json()
            # Extract the 'items' list from the data
            items_list = data['items']
            #print(items_list)
            # Create a DataFrame
            df = pd.DataFrame(items_list)
            df["subject"]=subject
            df["code"] =code
            #print(df)
            philo_views = pd.concat([philo_views,df])
        else:
            #Print an error message if the request was unsuccessful
            print(f"Error: {response_views.status_code}")
            print(title)


        #i+=1
        #if i==11 :
        #    index_subject+=1
        #    i=0
    return philo_views

In [34]:
target_languages = ['fr', 'ja', 'de', 'it','da','nl','no','sr','sv','ko','fi'] 

In [15]:
language_links = get_url_list(target_languages,section_titles)

In [16]:
np.save('data/languages_links.npy', language_links)

In [17]:
section_titles=np.load('data/section_titles.npy')

In [18]:
language_links =np.load('data/languages_links.npy')

In [19]:
Views_all_lang=get_page_views_by_languages(language_links,section_titles)

Error: 404
Philosophie_des_Absurden
Error: 404
実在論的観念論
Error: 404
美的現実主義
Error: 404
アフリカーナ哲学
Error: 404
Afrocentrism
Error: 404
Antinatalisme
Error: 404
Monisme_anomal
Error: 404
Averoizam
Error: 404
Avicennismo
Error: 404
아비센나주의
Error: 404
Biosophie
Error: 404
新プラトン主義とキリスト教
Error: 404
Philosophie_des_Zufalls
Error: 404
キリスト教実存主義
Error: 404
기독교적_실존주의
Error: 404
認知主義
Error: 404
Communautarisme_(concept_politique)
Error: 404
Holisme_de_confirmation
Error: 404
Kosmisk_skräck
Error: 404
Negazionismo_scientifico
Error: 404
의무론
Error: 404
제거적_유물론
Error: 404
情緒主義
Error: 404
Vestlig_esoterik
Error: 404
Њемачки_идеализам
Error: 404
Istoricizam
Error: 404
Umanesimo_(filosofia)
Error: 404
Illuminazionismo
Error: 404
Logica_informale
Error: 404
Persisk_filosofi
Error: 404
Irrealismi
Error: 404
カント主義
Error: 404
Logica_informale
Error: 404
Philosophie_der_Logik
Error: 404
道徳的相対主義
Error: 404
Uusluddismi
Error: 404
新ピタゴラス主義
Error: 404
New_thought
Error: 404
Нова_мисао
Error: 404
非認知主義
Error: 404
열린_개인

In [42]:
columns_to_remove = ['granularity','access','agent']
Views_all_lang.drop(columns=columns_to_remove,inplace=True)
Views_all_lang.to_csv('data/Philos_lang.csv',index=False)

KeyError: "['granularity', 'access', 'agent'] not found in axis"

# DETERMINE BROADER TOPICS 

### Select only articles in all languages 

In [36]:
philo_lang=pd.read_csv('data/Philos_lang.csv')

In [37]:
article_counts = philo_lang.groupby('subject')['code'].nunique()
articles_to_keep = article_counts[article_counts == philo_lang['code'].nunique()].index.tolist()
views_lang_filtered = philo_lang[philo_lang['subject'].isin(articles_to_keep)]
views_lang_filtered.to_csv('data/Philos_lang.csv')


In [38]:
df = pd.read_csv('data/Philo_en.csv')
df_pivoted = df.pivot_table(index='timestamp'   ,columns='article', values='views')

total_views_per_date = df.groupby('timestamp')['views'].sum()
# Merge the total views per date back to the original DataFrame
df_pivoted['ViewsTotal']=total_views_per_date

df_pivoted.head(10)
df_reg_en = df_pivoted.fillna(0)
df_reg_en.columns = df_reg_en.columns.str.replace('_', ' ')
df_reg_en.rename(columns={"'Pataphysics":'Pataphysics'}, inplace=True)
final_df = df_reg_en[articles_to_keep]
final_df.to_csv('data/df_en.csv')

In [39]:
df_en = pd.read_csv('data/df_en.csv')
target_languages = ['fr', 'ja', 'de', 'it','da','nl','no','sr','sv','ko','fi'] 

for lang in target_languages:
    df_piv = philo_lang[philo_lang['code']==lang].pivot_table(index='timestamp',columns='subject', values='views')

    total_views_per_date = philo_lang[philo_lang['code']==lang].groupby('timestamp')['views'].sum()
    # Merge the total views per date back to the original DataFrame
    df_piv['ViewsTotal']=total_views_per_date
    

    # Identify missing columns in each DataFrame
    missing_cols= df_en.columns.difference(df_piv.columns)

    # Add missing columns with NaN values
    df_piv = pd.concat([df_piv, pd.DataFrame(columns=missing_cols)], axis=1)

    # Reorder columns while keeping values in the correct order
    df_piv = df_piv.loc[:, df_en.columns]
    df_piv.drop(columns='timestamp',inplace=True)


    df_reg = df_piv.fillna(0) 
    df_reg.drop(columns=df_reg.columns[0])
    df_reg.rename(columns={"'Pataphysics":'Pataphysics'},inplace=True)
    filepath = 'data/df_'+lang+'.csv'
    df_reg.to_csv(filepath,index='False')


    

(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)
(1097, 108)


In [41]:
df_en

Unnamed: 0,timestamp,Aesthetics,Agnosticism,Analytic philosophy,Anarchism,Anarchy,Animism,Asceticism,Atheism,Authoritarianism,...,Teleology,Theism,Theology,Thomism,Transhumanism,Utilitarianism,Vienna Circle,Vitalism,Zen,Zoroastrianism
0,2019010100,1736.0,3413.0,684.0,1820.0,1495.0,1385.0,1301.0,2696.0,937.0,...,697.0,636.0,951.0,343.0,942.0,1451.0,177.0,366.0,1805.0,9866.0
1,2019010200,1926.0,3801.0,677.0,2074.0,1691.0,1498.0,1508.0,3069.0,1156.0,...,855.0,767.0,1146.0,392.0,1153.0,1898.0,206.0,495.0,1894.0,9525.0
2,2019010300,1898.0,3493.0,688.0,2132.0,1583.0,1529.0,1469.0,3094.0,1408.0,...,777.0,782.0,1188.0,359.0,1797.0,2050.0,206.0,719.0,1840.0,8196.0
3,2019010400,1989.0,3655.0,711.0,1969.0,1442.0,1602.0,1368.0,2991.0,1389.0,...,817.0,800.0,1205.0,336.0,1229.0,1928.0,201.0,446.0,1886.0,8294.0
4,2019010500,1739.0,3423.0,697.0,2348.0,1148.0,1460.0,1334.0,2894.0,1259.0,...,777.0,773.0,1101.0,341.0,1209.0,1805.0,200.0,458.0,1904.0,9097.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,2021122800,1950.0,2473.0,655.0,2322.0,915.0,1579.0,1845.0,2416.0,1313.0,...,607.0,629.0,1534.0,319.0,1699.0,1902.0,201.0,408.0,1549.0,5874.0
1093,2021122900,1917.0,3152.0,635.0,2369.0,910.0,1686.0,1483.0,2472.0,1734.0,...,579.0,538.0,1352.0,314.0,1818.0,1828.0,196.0,414.0,1470.0,5841.0
1094,2021123000,1782.0,2683.0,608.0,2289.0,965.0,2353.0,1702.0,2340.0,1643.0,...,581.0,656.0,1144.0,370.0,1984.0,1714.0,205.0,339.0,1477.0,5633.0
1095,2021123100,1518.0,2474.0,671.0,2055.0,785.0,1682.0,1319.0,2306.0,1377.0,...,568.0,581.0,853.0,349.0,1732.0,1540.0,163.0,355.0,5046.0,5803.0
