# Loading Necessary Libraries

In [4]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, csv
# These are standard python modules
import json, time, urllib.parse
# The 'requests' module is not a standard Python module. You will need to install this with pip/pip3 if you do not already have it
import requests

# Data Acquisition

I used the example code provided by Dr. David McDonald to set up the API pageview parameter template and the `request_pageinfo_per_article` and `request_ores_score_per_article` functions. I read in the CSV file with the politician names as these are the article titles that I will need to make the API request for the page information.

In [None]:
politicians = pd.read_csv('../politicians_by_country_AUG.2024.csv')

In [27]:
#########
#
#    CONSTANTS
#

# The basic English Wikipedia API endpoint
API_ENWIKIPEDIA_ENDPOINT = "https://en.wikipedia.org/w/api.php"
API_HEADER_AGENT = 'User-Agent'

# We'll assume that there needs to be some throttling for these requests - we should always be nice to a free data resource
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = (1.0/100.0)-API_LATENCY_ASSUMED

# When making automated requests we should include something that is unique to the person making the request
# This should include an email - your UW email would be good to put in there
REQUEST_HEADERS = {
    'User-Agent': 'igokhale@uw.edu, University of Washington, MSDS DATA 512 - AUTUMN 2024'
}

# This is just a list of English Wikipedia article titles that we can use for example requests
ARTICLE_TITLES = [ 'Bison', 'Northern flicker', 'Red squirrel', 'Chinook salmon', 'Horseshoe bat' ]

# This is a string of additional page properties that can be returned see the Info documentation for
# what can be included. If you don't want any this can simply be the empty string
PAGEINFO_EXTENDED_PROPERTIES = "talkid|url|watched|watchers"
#PAGEINFO_EXTENDED_PROPERTIES = ""

# This template lists the basic parameters for making this
PAGEINFO_PARAMS_TEMPLATE = {
    "action": "query",
    "format": "json",
    "titles": "",           # to simplify this should be a single page title at a time
    "prop": "info",
    "inprop": PAGEINFO_EXTENDED_PROPERTIES
}

In [28]:
#########
#
#    PROCEDURES/FUNCTIONS
#

def request_pageinfo_per_article(article_title = None, 
                                 endpoint_url = API_ENWIKIPEDIA_ENDPOINT, 
                                 request_template = PAGEINFO_PARAMS_TEMPLATE,
                                 headers = REQUEST_HEADERS):
    
    # article title can be as a parameter to the call or in the request_template
    if article_title:
        request_template['titles'] = article_title

    if not request_template['titles']:
        raise Exception("Must supply an article title to make a pageinfo request.")

    if API_HEADER_AGENT not in headers:
        raise Exception(f"The header data should include a '{API_HEADER_AGENT}' field that contains your UW email address.")

    if 'uwnetid@uw' in headers[API_HEADER_AGENT]:
        raise Exception(f"Use your UW email address in the '{API_HEADER_AGENT}' field.")

    # make the request
    try:
        # we'll wait first, to make sure we don't exceed the limit in the situation where an exception
        # occurs during the request processing - throttling is always a good practice with a free
        # data source like Wikipedia - or any other community sources
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(endpoint_url, headers=headers, params=request_template)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

In [29]:
#########
#
#    CONSTANTS
#

#    The current LiftWing ORES API endpoint and prediction model
#
API_ORES_LIFTWING_ENDPOINT = "https://api.wikimedia.org/service/lw/inference/v1/models/{model_name}:predict"
API_ORES_EN_QUALITY_MODEL = "enwiki-articlequality"

#
#    The throttling rate is a function of the Access token that you are granted when you request the token. The constants
#    come from dissecting the token and getting the rate limits from the granted token. An example of that is below.
#
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = ((60.0*60.0)/5000.0)-API_LATENCY_ASSUMED  # The key authorizes 5000 requests per hour

#    When making automated requests we should include something that is unique to the person making the request
#    This should include an email - your UW email would be good to put in there
#    
#    Because all LiftWing API requests require some form of authentication, you need to provide your access token
#    as part of the header too
#
REQUEST_HEADER_TEMPLATE = {
    'User-Agent': "<{email_address}>, University of Washington, MSDS DATA 512 - AUTUMN 2024",
    'Content-Type': 'application/json',
    'Authorization': "Bearer {access_token}"
}
#
#    This is a template for the parameters that we need to supply in the headers of an API request
#
REQUEST_HEADER_PARAMS_TEMPLATE = {
    'email_address' : "igokhale@uw.edu",         # your email address should go here
    'access_token'  : ""          # the access token you create will need to go here
}

#
#    A dictionary of English Wikipedia article titles (keys) and sample revision IDs that can be used for this ORES scoring example
#
ARTICLE_REVISIONS = { 'Bison':1085687913 , 'Northern flicker':1086582504 , 'Red squirrel':1083787665 , 'Chinook salmon':1085406228 , 'Horseshoe bat':1060601936 }

#
#    This is a template of the data required as a payload when making a scoring request of the ORES model
#
ORES_REQUEST_DATA_TEMPLATE = {
    "lang":        "en",     # required that its english - we're scoring English Wikipedia revisions
    "rev_id":      "",       # this request requires a revision id
    "features":    True
}

#
#    These are used later - defined here so they, at least, have empty values
#
USERNAME = ""
ACCESS_TOKEN = ""
#

In [32]:
#########
#
#    PROCEDURES/FUNCTIONS
#

def request_ores_score_per_article(article_revid = None, email_address=None, access_token=None,
                                   endpoint_url = API_ORES_LIFTWING_ENDPOINT, 
                                   model_name = API_ORES_EN_QUALITY_MODEL, 
                                   request_data = ORES_REQUEST_DATA_TEMPLATE, 
                                   header_format = REQUEST_HEADER_TEMPLATE, 
                                   header_params = REQUEST_HEADER_PARAMS_TEMPLATE):
    
    #    Make sure we have an article revision id, email and token
    #    This approach prioritizes the parameters passed in when making the call
    if article_revid:
        request_data['rev_id'] = article_revid
    if email_address:
        header_params['email_address'] = email_address
    if access_token:
        header_params['access_token'] = access_token
    
    #   Making a request requires a revision id - an email address - and the access token
    if not request_data['rev_id']:
        raise Exception("Must provide an article revision id (rev_id) to score articles")
    if not header_params['email_address']:
        raise Exception("Must provide an 'email_address' value")
    if not header_params['access_token']:
        raise Exception("Must provide an 'access_token' value")
    
    # Create the request URL with the specified model parameter - default is a article quality score request
    request_url = endpoint_url.format(model_name=model_name)
    
    # Create a compliant request header from the template and the supplied parameters
    headers = dict()
    for key in header_format.keys():
        headers[str(key)] = header_format[key].format(**header_params)
    
    # make the request
    try:
        # we'll wait first, to make sure we don't exceed the limit in the situation where an exception
        # occurs during the request processing - throttling is always a good practice with a free data
        # source like ORES - or other community sources
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        #response = requests.get(request_url, headers=headers)
        response = requests.post(request_url, headers=headers, data=json.dumps(request_data))
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

# Data Processing

Below is the function I used to get the article quality score from the ORES API and write the information to a CSV file. The function does not take in any inputs but outputs a CSV file with the article title and quality score prediciton. I actually ran the function below is a separate python script `get_data.py`. The function can also run in the notebook cell shown below. 

In [None]:
def get_final_csv():
    # read in poltician names from given CSV file
    politicians = pd.read_csv('Downloads/politicians_by_country_AUG.2024.csv')

    # set up empty dictionary to store article names and quality prediction 
    pred = {}

    # iterate through poltician names needed to make the API call
    for article in politicians.name.to_list():

        #make API request inputing article title (poltician name)
        info = request_pageinfo_per_article(article)

        #get page ID to access last revision ID in dictionary
        page_id = list(info["query"]["pages"].keys())[0]

        #skip articles that do not have revision ID
        if "lastrevid" not in list(info["query"]["pages"][page_id].keys()):
            print("could not find lastrevid for", article)
            continue

        # access revision ID
        rev_id = info["query"]["pages"][page_id]["lastrevid"]

        # make ORES API request using revision ID, email, and access token
        score = request_ores_score_per_article(article_revid= rev_id,
                                        email_address="igokhale@uw.edu",
                                        access_token=ACCESS_TOKEN)
        
        # skip article if score does not exist
        if score is None:
            continue

        #some dictionaries did not contain 'enwiki' key 
        if 'enwiki' not in list(score.keys()):
            print('nothing in score from this', article)
            continue

        #convert revision id from interger to string
        rev_id = str(rev_id)

        # skip article if revision ID is not located
        if rev_id not in list(score['enwiki']['scores'].keys()):
            print("could not find rev id")
            continue

        # access quality prediction score
        quality = score['enwiki']['scores'][rev_id]['articlequality']['score']['prediction']
        
        # save article title as key and quality as value in dictionary
        pred[article] = quality

    # write article and quality prediction to file
    with open('quality_pred.csv', mode='w', newline='') as file:
        print('writing csv')
        writer = csv.writer(file)
        
        # write the header
        writer.writerow(['article', 'quality_pred'])
        
        # write the data
        for key, value in pred.items():
            writer.writerow([key, value])

get_final_csv()

```
could not find lastrevid for Barbara Eibinger-Miedl  
could not find lastrevid for Mehrali Gasimov  
nothing in score from this Presidency of Eurico Gaspar Dutra  
could not find lastrevid for Kyaw Myint  
could not find lastrevid for André Ngongang Ouandji  
nothing in score from this Wang Wei (politician, born 1974)  
could not find lastrevid for Tomás Pimentel  
could not find lastrevid for Richard Sumah  
nothing in score from this Ajoy Edwards  
nothing in score from this Anup Dhotre  
nothing in score from this Viola Carofalo  
nothing in score from this Ferdinando Palasciano  
nothing in score from this Kim Ok-gyun  
nothing in score from this Pierre Ramampy  
could not find lastrevid for Segun ''Aeroland'' Adewale  
nothing in score from this Anwar Ali Hyder  
nothing in score from this Nancy Steele  
could not find lastrevid for Bashir Bililiqo  
nothing in score from this Abdul-Aziz al-Khair  
nothing in score from this Chada Thaiseth  
nothing in score from this Radhia Haddad
```

In [62]:
# calculate percent of articles I was unable to get quality prediction scores for.
21/7090

0.0029619181946403386

There wasn't revision id or score data for 21 out of 7090 articles (~0.3%). 

I forgot to save the revision id from the `get_final_csv` function written above, so I ran a separate function to extract the revision id for each article. I saved the article and last revision id in a csv file. The function does not take in any inputs or return anything, rather it just makes the page info API call and writes the article ttile and last revision id to a CSV file. However, there were 8 articles that I was unable to final revision ID for. 

In [51]:
def get_rev_id():
    # set up dictionary to store article title as key and revision ID as value
    revision = {}

    # iterate through politician name
    for article in politicians.name.to_list():

        # make pageinfo API requst using article title (politician name)
        info = request_pageinfo_per_article(article)

        # access page ID to get to revision ID
        page_id = list(info["query"]["pages"].keys())[0]

        # skip articles that do not have last revision ID
        if "lastrevid" not in list(info["query"]["pages"][page_id].keys()):
            print("could not find lastrevid for", article)
            continue

        # access revision ID
        rev_id = info["query"]["pages"][page_id]["lastrevid"]

        # save revision ID as value and article title as key in revision dictionary
        revision[article] = rev_id
    
    #write article title and revision id from dictionary to CSV file
    with open('rev_id.csv', mode='w', newline='') as file:
        print('writing csv')
        writer = csv.writer(file)
            
        # Write the header
        writer.writerow(['article', 'rev_id'])
            
        # Write the data
        for key, value in revision.items():
            writer.writerow([key, value])

get_rev_id()

could not find lastrevid for Barbara Eibinger-Miedl
could not find lastrevid for Mehrali Gasimov
could not find lastrevid for Kyaw Myint
could not find lastrevid for André Ngongang Ouandji
could not find lastrevid for Tomás Pimentel
could not find lastrevid for Richard Sumah
could not find lastrevid for Segun ''Aeroland'' Adewale
could not find lastrevid for Bashir Bililiqo


The function `get_region` below assigns each country to its approrate region based on the hierarchial structurein the Geography column in the population CSV file. The function takes in a dataframe as input and returns a dataframe with country, the region it belongs to, and the country's population as columns.

In [62]:
def get_region(df):
    # define lists to store region and country
    regions = []
    countries = []

    # store current region 
    current_region = None

    # iterate through each entry in geography column 
    for entry in df.Geography:

        # check if the value is region 
        if entry.isupper():  

            #update current region
            current_region = entry  

        # current value is a country 
        else:
            # assign current region to country
            regions.append(current_region)  

            # add country to countries list
            countries.append(entry)  

    # create new DataFrame with regions and countries list
    new_pop = pd.DataFrame({
        'Country': countries,
        'Region': regions
    })
    return new_pop

The function `write_final_df` combines the intermediary files containing the article title, revision ID, quality prediction, and country information into one data frame. Then, it writes the dataframe to CSV file. 

In [67]:
def write_final_df():
    # read in intermediary files
    rev_id_df = pd.read_csv('rev_id.csv')
    pred = pd.read_csv('../../quality_pred.csv')
    population  = pd.read_csv('../population_by_country_AUG.2024.csv')
    
    # combine article title, revision id, and quality prediction information into one dataframe
    data = pd.merge(rev_id_df, pred, on = 'article').drop_duplicates()

    # get country data from politician CSV file
    pol_df = pd.merge(politicians, data, left_on = 'name', right_on = 'article')[['country', 'article', 'rev_id' , 'quality_pred']]

    # call the get_region function to get separate region and country column
    new_pop = get_region(population)

    # get population per country
    pop_df = pd.merge(new_pop, population, left_on = 'Country', right_on = 'Geography')[['Country', 'Region', 'Population']]

    # rename column names
    final_df = pd.merge(pol_df, pop_df, left_on = 'country', 
                    right_on = 'Country')[['country', 'Region', 'Population','article', 'rev_id', 
                    'quality_pred']].rename(columns={'Region': 'region',
                                                     'Population': 'population', 
                                                     'article': 'article_title',
                                                     'rev_id': 'revision_id',
                                                     'quality_pred': 'article_quality'})
    
    # write dataframe to CSV file
    final_df.to_csv('./wp_politicians_by_country.csv')
    
write_final_df()


The function `write_no_match` below identifies the countries that have no matches. It writes the countries that do not have a match to a txt file. First, I identified the unique countries in my merged CSV: `wp_politicians_by_country.csv`, then I did the same for the population CSV: `population_by_country_AUG.2024.csv`. I made sure to exclude regions from my countries list from the population CSV. I converted both lists to sets and found the difference between the sets to identify countries that did not have a match in either CSV file. THe function writes the no match countries to a txt file: `wp_countries-no_match.txt`.

In [122]:
def write_no_match():
    # read in merged CSV
    pol_by_country = pd.read_csv('wp_politicians_by_country.csv')

    #remove rows with region
    filtered_df = population[~population['Geography'].str.isupper()]
    
    # set of countries from population csv
    population_countries = set(filtered_df['Geography'].unique())

    # set of countries from politician csv
    all_countries = set(pol_by_country['country'].dropna().unique())

    # identify no match countries 
    no_match = population_countries - all_countries

    # Save the unmatched countries to a text file
    with open('wp_countries-no_match.txt', 'w') as f:
        for country in no_match:
            f.write(f"{country}\n")

write_no_match()

# Analysis

### Articles per Capita (County)

The `total_articles_per_capita_country` function defined below calculates and displays the top 10 and bottom 10 total articles per person on a country by country basis. Since the population data is originally in the millions, I mulitplied it by 1 million to convert it back to single units. There were some countries with a population of 0, so I excluded them from the table as dividing by zero to get the total-articles-per-capita was producing nonsensical values. 

In [95]:
def total_articles_per_capita_country():
    # get the total number of articles published for each country
    num_articles = final_df.groupby('country')['article_title'].count().reset_index(name='num_articles')

    # get each countries population
    pop_data = final_df[['country', 'population']].drop_duplicates()

    #combine the population and number of article data into one dataframe
    df2 = pd.merge(num_articles, pop_data, on='country')

    #convert from million to ones
    df2['population_'] = df2['population'] * 1000000

    #calculate total-articles-per-capita
    df2['total-articles-per-capita'] = df2['num_articles'] / df2['population_']

    #display top 10 and bottom 10 countries 
    print("Top 10 Countries with Total Articles per Capita")
    display(df2.sort_values(by = 'total-articles-per-capita', ascending = False)[2:12][['country', 'total-articles-per-capita']])
    print("Bottom 10 Countries with Total Articles per Capita")
    display(df2.sort_values(by = 'total-articles-per-capita', ascending = True)[0:10][['country', 'total-articles-per-capita']])


total_articles_per_capita_country()

Top 10 Countries with Total Articles per Capita


Unnamed: 0,country,total-articles-per-capita
4,Antigua and Barbuda,0.00033
51,Federated States of Micronesia,0.00014
93,Marshall Islands,0.00013
149,Tonga,0.0001
12,Barbados,8.3e-05
98,Montenegro,6e-05
125,Seychelles,6e-05
90,Maldives,5.5e-05
17,Bhutan,5.5e-05
121,Samoa,4e-05


Bottom 10 Countries with Total Articles per Capita


Unnamed: 0,country,total-articles-per-capita
31,China,1.06285e-08
57,Ghana,8.797654e-08
66,India,1.042979e-07
122,Saudi Arabia,1.355014e-07
164,Zambia,1.485149e-07
108,Norway,1.818182e-07
70,Israel,2.040816e-07
45,Egypt,3.041825e-07
37,Cote d'Ivoire,3.236246e-07
50,Ethiopia,3.478261e-07


### Articles per Capita (Region)

The `total_articles_per_capita_region` function defined below calculates and displays the top 10 and bottom 10 articles per person on a region by region basis. I summed the population from all countires within a region to get the population for a region. 

In [104]:
def total_articles_per_capita_region():
    # get total number of articles per region
    num_articles = final_df.groupby('region')['article_title'].count().reset_index(name='num_articles')

    # get population per region
    pop_data = final_df.groupby('region').population.sum().reset_index()

    df2 = pd.merge(num_articles, pop_data, on='region').drop_duplicates()

    # convert from millions to ones
    df2['population_'] = df2['population'] * 1000000

    # calculate total-articles-per-capita_region
    df2['total-articles-per-capita_region'] = df2['num_articles'] /df2['population_']
    
    #display top 10 and bottom 10 regions
    print("Top 10 Regions with Total Articles per Capita")
    display(df2.sort_values(by = 'total-articles-per-capita_region', ascending = False)[:10][['region', 'total-articles-per-capita_region']])

    print("Bottom 10 Regions with Total Articles per Capita")
    display(df2.sort_values(by = 'total-articles-per-capita_region', ascending = True)[:10][['region', 'total-articles-per-capita_region']])
    
total_articles_per_capita_region()

Top 10 Regions with Total Articles per Capita


Unnamed: 0,region,total-articles-per-capita_region
9,OCEANIA,6.480648e-07
8,NORTHERN EUROPE,1.643576e-07
0,CARIBBEAN,1.553149e-07
1,CENTRAL AMERICA,1.325063e-07
2,CENTRAL ASIA,5.343819e-08
16,WESTERN ASIA,4.559054e-08
14,SOUTHERN EUROPE,4.456528e-08
4,EASTERN AFRICA,2.774906e-08
17,WESTERN EUROPE,2.621211e-08
7,NORTHERN AFRICA,2.474922e-08


Bottom 10 Regions with Total Articles per Capita


Unnamed: 0,region,total-articles-per-capita_region
11,SOUTH ASIA,2.55493e-09
3,EAST ASIA,4.19427e-09
15,WESTERN AFRICA,8.720044e-09
12,SOUTHEAST ASIA,8.773688e-09
10,SOUTH AMERICA,1.655489e-08
13,SOUTHERN AFRICA,2.065734e-08
6,MIDDLE AFRICA,2.421868e-08
5,EASTERN EUROPE,2.441048e-08
7,NORTHERN AFRICA,2.474922e-08
17,WESTERN EUROPE,2.621211e-08


### High Quality Articles per Capita (Country)

The function `high_quality_articles_per_capita_country` defined below calculates and displays the top 10 and bottom 10 number of high quality articles per person on a country by country basis. An article is defined as high quality if it received a rating of 'FA' (featured article) or 'GA' (good article).

In [98]:
def high_quality_articles_per_capita_country():
    # calculate number of high quality articles per country
    high_quality = final_df[(final_df['article_quality'] == 'FA') | (final_df['article_quality'] == 'GA')].groupby('country')['article_title'].count().reset_index(name='num_high_quality')

    # get population per country
    pop_data = final_df[['country', 'population']].drop_duplicates()

    quality = pd.merge(high_quality, pop_data, on='country')

    # convert from million to ones 
    quality['population_'] = quality['population'] * 1000000

    # calculate high-quality-articles-per-capita
    quality['high-quality-articles-per-capita'] = quality['num_high_quality'] /quality['population_']

    # display top 10 and bottom 10 countries
    print("Top 10 Countries with High Quality Articles per Capita")
    display(quality.sort_values(by = 'high-quality-articles-per-capita', ascending = False)[:10][['country', 'high-quality-articles-per-capita']])
    print("Bottom 10 Countries with High Quality Articles per Capita")
    display(quality.sort_values(by = 'high-quality-articles-per-capita', ascending = True)[:10][['country', 'high-quality-articles-per-capita']])
    
high_quality_articles_per_capita_country()

Top 10 Countries with High Quality Articles per Capita


Unnamed: 0,country,high-quality-articles-per-capita
63,Montenegro,5e-06
56,Luxembourg,2.857143e-06
1,Albania,2.592593e-06
50,Kosovo,2.352941e-06
58,Maldives,1.666667e-06
55,Lithuania,1.37931e-06
25,Croatia,1.315789e-06
40,Guyana,1.25e-06
70,Palestinian Territory,1.090909e-06
81,Slovenia,9.52381e-07


Bottom 10 Countries with High Quality Articles per Capita


Unnamed: 0,country,high-quality-articles-per-capita
9,Bangladesh,5.763689e-09
29,Egypt,9.505703e-09
31,Ethiopia,1.581028e-08
46,Japan,1.606426e-08
69,Pakistan,1.663202e-08
22,Colombia,1.915709e-08
23,Congo DR,1.955034e-08
100,Vietnam,2.022245e-08
95,Uganda,2.057613e-08
2,Algeria,2.136752e-08


### High Quality Articles per Capita (Region)

The `high_quality_articles_per_capita_region` function defined below calculates and displays the top 10 and bottom 10 high quality articles per person on a region by region basis. 

In [103]:
def high_quality_articles_per_capita_region():
    # calculate total number of high quality articles 
    high_quality = final_df[(final_df['article_quality'] == 'FA') | (final_df['article_quality'] == 'GA')].groupby('region')['article_title'].count().reset_index(name='num_high_quality')

    # get population per region
    pop_data = final_df.groupby('region').population.sum().reset_index()

    quality = pd.merge(high_quality, pop_data, on='region').drop_duplicates()

    # convert from millions to ones
    quality['population_'] = quality['population'] * 1000000

    # calculate high-quality-articles-per-capita for each region
    quality['high-quality-articles-per-capita_region'] = quality['num_high_quality'] / quality['population_']

    #display top 10 and bottom 10 regions
    print("Top 10 Regions with High Quality Articles per Capita")
    display(df2.sort_values(by = 'high-quality-articles-per-capita_region', ascending = False)[:10][['region', 'high-quality-articles-per-capita_region']])
    print("Bottom 10 Regions with High Quality Articles per Capita")
    display(df2.sort_values(by = 'high-quality-articles-per-capita_region', ascending = True)[:10][['region', 'high-quality-articles-per-capita_region']])
    
high_quality_articles_per_capita_region()

Top 10 Regions with High Quality Articles per Capita


Unnamed: 0,region,high-quality-articles-per-capita_region
14,SOUTHERN EUROPE,0.348684
0,CARIBBEAN,0.204545
5,EASTERN EUROPE,0.133333
13,SOUTHERN AFRICA,0.114286
17,WESTERN EUROPE,0.105528
16,WESTERN ASIA,0.090301
8,NORTHERN EUROPE,0.083333
7,NORTHERN AFRICA,0.066406
2,CENTRAL ASIA,0.0625
1,CENTRAL AMERICA,0.054945


Bottom 10 Regions with High Quality Articles per Capita


Unnamed: 0,region,high-quality-articles-per-capita_region
3,EAST ASIA,0.00182
11,SOUTH ASIA,0.01035
9,OCEANIA,0.022222
15,WESTERN AFRICA,0.029412
4,EASTERN AFRICA,0.035197
12,SOUTHEAST ASIA,0.036657
6,MIDDLE AFRICA,0.039604
10,SOUTH AMERICA,0.042254
1,CENTRAL AMERICA,0.054945
2,CENTRAL ASIA,0.0625
