In [26]:
import pandas as pd
import os
import numpy as np

## Step 4: Analysis

### total-articles-per-population analysis

In [None]:
politicians_final_df = pd.read_csv(os.path.join("..","data","wp_politicians_by_country.csv"))

In [39]:
total_articles_per_capita = (politicians_final_df.groupby('country')['article_title'].count())/(politicians_final_df.groupby('country')['population'].mean()*1000000)
total_articles_df = total_articles_per_capita.to_frame(name="total_articles").reset_index()
total_articles_df

Unnamed: 0,country,total_articles
0,Afghanistan,2.871046e-06
1,Albania,2.964286e-05
2,Algeria,7.572383e-07
3,Andorra,1.000000e-04
4,Angola,1.179775e-06
...,...,...
179,Venezuela,2.190813e-06
180,Vietnam,2.716298e-07
181,Yemen,1.810089e-06
182,Zambia,6.500000e-07


### Articles per capita by region

In [38]:
total_articles_per_capita_by_region = (politicians_final_df.groupby('region')['article_title'].count())/(politicians_final_df.groupby('region')['population'].sum()*1000000)
total_articles_region_df = total_articles_per_capita_by_region.to_frame(name="total_articles").reset_index()
total_articles_region_df

Unnamed: 0,region,total_articles
0,CARIBBEAN,1.621622e-07
1,CENTRAL AMERICA,1.110668e-07
2,CENTRAL ASIA,5.927086e-08
3,EAST ASIA,1.125728e-08
4,EASTERN AFRICA,3.408853e-08
5,EASTERN EUROPE,1.977305e-08
6,MIDDLE AFRICA,2.563455e-08
7,NORTHERN AFRICA,2.971243e-08
8,NORTHERN EUROPE,1.943044e-07
9,OCEANIA,7.811081e-07


### High Quality Articles per population by country

In [23]:
high_quality_check_df = politicians_final_df[politicians_final_df['article_quality'].isin(['GA', 'FA'])]
high_quality_article_df = (high_quality_check_df.groupby('country')['article_title'].count())/(high_quality_check_df.groupby('country')['population'].mean()*1000000)
high_quality_article_df = high_quality_article_df.to_frame(name="high_quality_articles").reset_index()
high_quality_article_df.head(10)

Unnamed: 0,country,high_quality_articles
0,Afghanistan,1.459854e-07
1,Albania,2.142857e-06
2,Andorra,2e-05
3,Armenia,3.333333e-07
4,Azerbaijan,9.803922e-08
5,Belarus,1.086957e-07
6,Belgium,8.62069e-08
7,Benin,7.462687e-08
8,Bolivia,1.639344e-07
9,Bosnia-Herzegovina,1.470588e-06


### High quality articles per population by region

In [36]:
high_quality_articles_by_region = (high_quality_check_df.groupby('region')['article_title'].count())/(high_quality_check_df.groupby('region')['population'].mean()*1000000)
high_quality_region_df = high_quality_articles_by_region.to_frame(name="high_quality_articles_by_region").reset_index()
high_quality_region_df.head(10)

Unnamed: 0,region,high_quality_articles_by_region
0,CARIBBEAN,7.142857e-07
1,CENTRAL AMERICA,9.784736e-07
2,CENTRAL ASIA,1.99115e-07
3,EAST ASIA,2.70413e-07
4,EASTERN AFRICA,3.443526e-07
5,EASTERN EUROPE,5.062759e-07
6,MIDDLE AFRICA,5.694761e-07
7,NORTHERN AFRICA,3.695324e-07
8,NORTHERN EUROPE,1.373391e-06
9,OCEANIA,4.301075e-07


# Step 5: Results

### Top 10 countries by coverage: The 10 countries with the highest total articles per capita (in descending order) .

In [28]:
total_articles_df = total_articles_df.replace([np.inf, -np.inf], np.nan)
total_articles_df = total_articles_df.dropna()
total_articles_df.sort_values(by='total_articles', ascending=False).head(10)

Unnamed: 0,country,total_articles
5,Antigua and Barbuda,0.00017
54,Federated States of Micronesia,0.00013
3,Andorra,0.0001
13,Barbados,9.3e-05
104,Marshall Islands,9e-05
110,Montenegro,6e-05
143,Seychelles,6e-05
97,Luxembourg,5.3e-05
18,Bhutan,5.1e-05
64,Grenada,5e-05


### Bottom 10 countries by coverage: The 10 countries with the lowest total articles per capita (in ascending order) .

In [29]:
total_articles_df.sort_values(by='total_articles', ascending=True).head(10)

Unnamed: 0,country,total_articles
32,China,1.392176e-09
106,Mexico,7.843137e-09
140,Saudi Arabia,8.174387e-08
134,Romania,1.052632e-07
73,India,1.255998e-07
153,Sri Lanka,1.339286e-07
48,Egypt,1.352657e-07
53,Ethiopia,2.025932e-07
161,Taiwan,2.155172e-07
180,Vietnam,2.716298e-07


### Top 10 countries by high quality: The 10 countries with the highest high quality articles per capita (in descending order) .

In [31]:
high_quality_article_df = high_quality_article_df.replace([np.inf, -np.inf], np.nan)
high_quality_article_df = high_quality_article_df.dropna()
high_quality_article_df.sort_values(by='high_quality_articles', ascending=False).head(10)

Unnamed: 0,country,high_quality_articles
2,Andorra,2e-05
53,Montenegro,5e-06
1,Albania,2.142857e-06
80,Suriname,1.666667e-06
9,Bosnia-Herzegovina,1.470588e-06
49,Lithuania,1.071429e-06
19,Croatia,1.052632e-06
74,Slovenia,9.52381e-07
61,Palestinian Territory,9.259259e-07
28,Gabon,8.333333e-07


### Bottom 10 countries by high quality: The 10 countries with the lowest high quality articles per capita (in ascending order).

In [32]:
high_quality_article_df.sort_values(by='high_quality_articles', ascending=True).head(10)

Unnamed: 0,country,high_quality_articles
35,India,4.2337e-09
84,Thailand,1.497006e-08
39,Japan,1.601281e-08
58,Nigeria,1.830664e-08
91,Vietnam,2.012072e-08
17,Colombia,2.03666e-08
87,Uganda,2.118644e-08
60,Pakistan,2.120441e-08
79,Sudan,2.132196e-08
37,Iran,2.257336e-08


### Geographic regions by total coverage: A rank ordered list of geographic regions (in descending order) by total articles per capita.

In [33]:
total_articles_region_df.sort_values(by='total_articles',ascending=False)

Unnamed: 0,region,total_articles
9,OCEANIA,7.811081e-07
8,NORTHERN EUROPE,1.943044e-07
0,CARIBBEAN,1.621622e-07
1,CENTRAL AMERICA,1.110668e-07
2,CENTRAL ASIA,5.927086e-08
14,SOUTHERN EUROPE,4.577106e-08
16,WESTERN ASIA,4.416518e-08
4,EASTERN AFRICA,3.408853e-08
7,NORTHERN AFRICA,2.971243e-08
6,MIDDLE AFRICA,2.563455e-08


### Geographic regions by high quality coverage: Rank ordered list of geographic regions (in descending order) by high quality articles per capita.

In [37]:
high_quality_region_df.sort_values(by='high_quality_articles_by_region',ascending=False)

Unnamed: 0,region,high_quality_articles_by_region
14,SOUTHERN EUROPE,2.323743e-06
16,WESTERN ASIA,1.455085e-06
8,NORTHERN EUROPE,1.373391e-06
1,CENTRAL AMERICA,9.784736e-07
0,CARIBBEAN,7.142857e-07
6,MIDDLE AFRICA,5.694761e-07
10,SOUTH AMERICA,5.590473e-07
5,EASTERN EUROPE,5.062759e-07
9,OCEANIA,4.301075e-07
17,WESTERN EUROPE,4.215661e-07
