In [None]:
import requests
import re
import pandas as pd
from urllib.parse import quote
from datetime import datetime
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)

In [None]:
# Define a user agent string
user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'

# Specify the headers with the user agent
headers = {
    'User-Agent': user_agent,
    'accept': 'application/json'
}


In [None]:
#GET THE PHILOSOPHIES FROM THE PAGE : List of philosophies

# Specify the API endpoint URL
api_url = "https://en.wikipedia.org/w/api.php"

# Specify the parameters for the API request
params = {
    'action': 'query',
    'prop': 'revisions',
    'titles': 'List_of_philosophies',
    'rvslots': '*',
    'rvprop': 'content',
    'formatversion': 2,
    'format': 'json'
}

# API request
response = requests.get(api_url, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()

    # Extract the content of the first revision
    revisions = data['query']['pages'][0]['revisions']
    
    if revisions:
        content = revisions[0]['slots']['main']['content']
        
        # Use a regular expression to find section titles
        section_titles = re.findall(r'\[\[([^|\]]+)(?:\|[^]]+)?\]\]', content)
        
        while section_titles[0][0]!='A' or section_titles[-1][0]!='Z':
            if section_titles[0][0]!='A':
                section_titles.pop(0)
            if section_titles[-1][0]!='Z':
                section_titles.pop(-1)

        #print(section_titles)
    else:
        print("No content found.")
else:
    # Print an error message if the request was unsuccessful
    print(f"Error: {response.status_code}")


### Get the views for the diffrent languages

In [None]:
target_languages = ['fr', 'ja', 'de', 'it','da','nl','no','sr','sv','ko','fi'] 

In [None]:
def get_url_list(languages_list, page_titles):
    api_url = "https://en.wikipedia.org/w/api.php"
    language_links = []

    for page in page_titles : 

    # Make separate requests for each language
        for lang in target_languages:
            params = {
                'action': 'query',
                'titles': page,
                'prop': 'langlinks',
                'llprop': 'url',
                'format': 'json',
                'lllang': lang,
            }

            # Make the API request
            response = requests.get(api_url, params=params)
            data = response.json()

            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                # Extract language links from the API response
                pages = data['query']['pages']
                page_id = next(iter(pages))
                langlinks = pages[page_id].get('langlinks', [])
                for link in langlinks:
                    language_links.append( link['url'])
            else:
                print(f"Error for language {lang}: {response.status_code}")
    return language_links
        
        

In [None]:
def find_country_code_and_title(link): #finds the country code from a given link, used in  get_page_views_by_languages(links)
    state=0
    code=""
    title=""
    for i in range(len(link)):
        if state==2 and link[i]==".":
            state+=1
        if state==2:
            code+=link[i]
        if state==5:
            title+=link[i]
        if link[i]=="/":
            state+=1
    return code,title

In [None]:
def get_page_views_by_languages(links,page_titles):
    philo_views=pd.DataFrame()
    i=0
    index_subject=0
    for link in links:
        # Define a user agent to have acces to the API 
        user_agent = 'MandarineCorp (clementine.naim@epfl.ch)'
        # Specify the headers with the user agent
        headers = {
            'User-Agent': user_agent,
            'accept': 'application/json'
        
        }
        # Find country code:
        code, title = find_country_code_and_title(link)
        
        subject=page_titles[index_subject]
        url = f'https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/{code}.wikipedia.org/all-access/all-agents/{title}/monthly/2018010100/2023010100'
        # Making a GET request
        response_views = requests.get(url,headers=headers)

        # Check if the request was successful (status code 200)
        if response_views.status_code == 200:
            # Print the response content
            data = response_views.json()
        else:
            # Print an error message if the request was unsuccessful
            print(f"Error: {response_views.status_code}")
            print(title)


        # Extract the 'items' list from the data
        items_list = data['items']
        # Create a DataFrame
        df = pd.DataFrame(items_list)
        df["subject"]=subject
        df["code"] =code
        philo_views = pd.concat([philo_views,df])
        i+=1
        if i==11 :
            index_subject+=1
            i=0
    return philo_views