In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pycountry_convert as pycountry
import plotly.express as px
from pywaffle import Waffle
%matplotlib inline

In [2]:
NGO_country_stats = pd.read_csv("2021_NGO_country_stats.csv", index_col = 0)

In [3]:
NGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3
0,United States of America,362,USA
1,United Kingdom of Great Britain and Northern I...,285,GBR
2,Germany,91,DEU
3,France,85,FRA
4,Canada,66,CAN


In [4]:
IGO_country_stats = pd.read_csv("2021_IGO_country_stats.csv", index_col = 0)

In [5]:
IGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3
0,France,9,FRA
1,Finland,6,FIN
2,Switzerland,4,CHE
3,United Kingdom of Great Britain and Northern I...,4,GBR
4,Kenya,3,KEN


In [6]:
IGO_country_stats.tail(20)

Unnamed: 0,Country,Count,ISO3
25,Guyana,1,GUY
26,Ethiopia,1,ETH
27,Venezuela (Bolivarian Republic of),1,VEN
28,Germany,1,DEU
29,Belgium,1,BEL
30,San Marino,1,SMR
31,Belize,1,BLZ
32,Japan,1,JPN
33,Republic of Korea,1,KOR
34,Qatar,1,QAT


In [7]:
IGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3
0,France,9,FRA
1,Finland,6,FIN
2,Switzerland,4,CHE
3,United Kingdom of Great Britain and Northern I...,4,GBR
4,Kenya,3,KEN


In [8]:
def iso3_to_continent_name(iso3):
    try:
        iso2 = pycountry.country_alpha3_to_country_alpha2(iso3)
        continent_code = pycountry.country_alpha2_to_continent_code(iso2)
        continent_name = pycountry.convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except:
        return ""

In [9]:
NGO_country_stats["Continent"] = [iso3_to_continent_name(iso3) for iso3 in list(NGO_country_stats.ISO3)]

In [10]:
NGO_country_stats

Unnamed: 0,Country,Count,ISO3,Continent
0,United States of America,362,USA,North America
1,United Kingdom of Great Britain and Northern I...,285,GBR,Europe
2,Germany,91,DEU,Europe
3,France,85,FRA,Europe
4,Canada,66,CAN,North America
...,...,...,...,...
84,Côte d'Ivoire,1,CIV,Africa
85,Tunisia,1,TUN,Africa
86,Egypt,1,EGY,Africa
87,Dominican Republic,1,DOM,North America


In [11]:
NGO_country_stats[NGO_country_stats.Continent==""]

Unnamed: 0,Country,Count,ISO3,Continent
88,Holy See,1,VAT,


In [12]:
NGO_country_stats.Continent[88] = "Europe"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  NGO_country_stats.Continent[88] = "Europe"


In [13]:
NGO_country_stats[50:53]

Unnamed: 0,Country,Count,ISO3,Continent
50,Nicaragua,2,NIC,North America
51,Poland,2,POL,Europe
52,Democratic Republic of the Congo,2,COD,Africa


In [14]:
NGO_country_stats.to_csv("2021_NGO_country_stats.csv")

In [15]:
IGO_country_stats["Continent"] = [iso3_to_continent_name(iso3) for iso3 in list(IGO_country_stats.ISO3)]

In [16]:
IGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3,Continent
0,France,9,FRA,Europe
1,Finland,6,FIN,Europe
2,Switzerland,4,CHE,Europe
3,United Kingdom of Great Britain and Northern I...,4,GBR,Europe
4,Kenya,3,KEN,Africa


In [17]:
IGO_country_stats[IGO_country_stats.Continent==""]

Unnamed: 0,Country,Count,ISO3,Continent


In [18]:
IGO_country_stats.to_csv("2021_IGO_country_stats.csv")

In [19]:
NGO_country_stats.Continent.unique()

array(['North America', 'Europe', 'South America', 'Asia', 'Africa',
       'Oceania'], dtype=object)

In [20]:
unique_continents = list(NGO_country_stats.Continent.unique())

In [21]:
sum_by_continent = [NGO_country_stats.Count[NGO_country_stats.Continent == continent].sum() for continent in unique_continents]

In [22]:
sum_by_continent

[452, 730, 89, 193, 103, 30]

In [23]:
sum(sum_by_continent) == sum(NGO_country_stats.Count)

True

In [24]:
NGO_continent_stats = pd.DataFrame(list(zip(unique_continents, sum_by_continent)), 
                                  columns = ["Continent", "Count"])

In [25]:
NGO_continent_stats

Unnamed: 0,Continent,Count
0,North America,452
1,Europe,730
2,South America,89
3,Asia,193
4,Africa,103
5,Oceania,30


In [26]:
NGO_continent_stats.to_csv("2021_NGO_continent_stats.csv")

In [27]:
IGO_country_stats.Continent.unique()

array(['Europe', 'Africa', 'Asia', 'North America', 'South America',
       'Oceania'], dtype=object)

In [28]:
unique_continents = list(IGO_country_stats.Continent.unique())

In [29]:
sum_by_continent = [IGO_country_stats.Count[IGO_country_stats.Continent == continent].sum() for continent in unique_continents]

In [30]:
sum_by_continent

[34, 15, 16, 6, 3, 1]

In [31]:
sum(sum_by_continent) == sum(IGO_country_stats.Count)

True

In [32]:
IGO_continent_stats = pd.DataFrame(list(zip(unique_continents, sum_by_continent)), 
                                  columns = ["Continent", "Count"])

In [33]:
IGO_continent_stats

Unnamed: 0,Continent,Count
0,Europe,34
1,Africa,15
2,Asia,16
3,North America,6
4,South America,3
5,Oceania,1


In [34]:
IGO_continent_stats.to_csv("2021_IGO_continent_stats.csv")

The previous file, `2015_IGO_continent_stats.csv`, had to be modified so as to conform to the structure of the `2015_NGO_continent_stats.csv`(that is, the order of continents has to be the same for the pie chart to preserve the same color order!)

In [35]:
# IGO_continent_stats = pd.read_csv("2015_IGO_continent_stats.csv", index_col=0)

In [36]:
# plt.figure(figsize=(20,10))
# plt.subplot(1, 2, 1)
# plt.pie(NGO_continent_stats.Count,labels=NGO_continent_stats.Continent,
#         autopct='%1.1f%%',
#         startangle = 90,
#         explode = [0,0.03,0,0,0,0])
# plt.title('Continent Distribution of NGOs (COP2015, n = 1078)')
# plt.axis('equal')

# plt.subplot(1, 2, 2)
# plt.pie(IGO_continent_stats.Count,labels=IGO_continent_stats.Continent,
#         autopct='%1.1f%%',
#         startangle = 90,
#         explode = [0,0.03,0,0,0,0])
# plt.title('Continent Distribution of IGOs (COP2015, n = 67)')
# plt.axis('equal')
# plt.savefig('COP2015_continent_distribution_NGOs_vs_IGOs.png', facecolor = "white")

# Income Group Statistics

We are using the most recent year classification for now, but at some point we may want to adjust the income classifications so that they track changes over multiple years and are pegged to the specific CoP round we're looking at. For example, maybe Belize is classified as a low-income country in 2017 but then moves to a lower-mid income country in 2018. **We may want to backtrack at some point and reclassify countries for each year to incorporate that change!**

In [37]:
df_income = pd.read_excel("WORLD_BANK_CLASS.xlsx", index_col = 0)
df_income.head()


Unnamed: 0_level_0,Code,Region,Income group,Lending category,Other (EMU or HIPC)
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aruba,ABW,Latin America & Caribbean,High income,,
Afghanistan,AFG,South Asia,Low income,IDA,HIPC
Angola,AGO,Sub-Saharan Africa,Lower middle income,IBRD,
Albania,ALB,Europe & Central Asia,Upper middle income,IBRD,
Andorra,AND,Europe & Central Asia,High income,,


In [38]:
df_income[df_income["Income group"] == "nan"]

Unnamed: 0_level_0,Code,Region,Income group,Lending category,Other (EMU or HIPC)
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [39]:
NGO_country_stats["Income_Group"] = len(NGO_country_stats.Country) * [np.nan]
IGO_country_stats["Income_Group"] = len(IGO_country_stats.Country) * [np.nan]

In [40]:
income_iso3_list = list(df_income.Code)
for i in range(len(NGO_country_stats.ISO3)):
    current_iso3 = str(NGO_country_stats.ISO3.iloc[i])
    try:
        index = income_iso3_list.index(current_iso3)
        NGO_country_stats.Income_Group.iloc[i] = str(df_income["Income group"].iloc[index])
    except:
        pass
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [41]:
NGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3,Continent,Income_Group
0,United States of America,362,USA,North America,High income
1,United Kingdom of Great Britain and Northern I...,285,GBR,Europe,High income
2,Germany,91,DEU,Europe,High income
3,France,85,FRA,Europe,High income
4,Canada,66,CAN,North America,High income


In [42]:
NGO_country_stats.Country[NGO_country_stats.Income_Group.isna()]

88    Holy See
Name: Country, dtype: object

**NOTE: Unfortunately, the Holy See is not in our current list of countries, so the organizations based there will not be counted in this current iteration.**

In [43]:
for i in range(len(IGO_country_stats.ISO3)):
    current_iso3 = str(IGO_country_stats.ISO3.iloc[i])
    try:
        index = income_iso3_list.index(current_iso3)
        IGO_country_stats.Income_Group.iloc[i] = str(df_income["Income group"].iloc[index])
    except:
        pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [44]:
IGO_country_stats.Country[NGO_country_stats.Income_Group.isna()]

Series([], Name: Country, dtype: object)

In [45]:
IGO_country_stats.head()

Unnamed: 0,Country,Count,ISO3,Continent,Income_Group
0,France,9,FRA,Europe,High income
1,Finland,6,FIN,Europe,High income
2,Switzerland,4,CHE,Europe,High income
3,United Kingdom of Great Britain and Northern I...,4,GBR,Europe,High income
4,Kenya,3,KEN,Africa,Lower middle income


In [46]:
NGO_country_stats.to_csv("2021_NGO_country_stats.csv")
IGO_country_stats.to_csv("2021_IGO_country_stats.csv")

In [47]:
income_unique = list(NGO_country_stats.Income_Group.unique())
income_unique.pop(-1)
income_unique

['High income', 'Upper middle income', 'Lower middle income', 'Low income']

In [48]:
sum_by_income_group = [NGO_country_stats.Count[NGO_country_stats.Income_Group == income_grp].sum() for income_grp in income_unique]
sum_by_income_group

[1233, 177, 175, 11]

In [49]:
NGO_income_group_stats = pd.DataFrame(list(zip(income_unique, sum_by_income_group)), 
                                  columns = ["Income_Group", "Count"])


In [50]:
NGO_income_group_stats.head()

Unnamed: 0,Income_Group,Count
0,High income,1233
1,Upper middle income,177
2,Lower middle income,175
3,Low income,11


In [51]:
sum_by_income_group = [IGO_country_stats.Count[IGO_country_stats.Income_Group == income_grp].sum() for income_grp in income_unique]
sum_by_income_group

[44, 8, 17, 5]

In [52]:
IGO_income_group_stats = pd.DataFrame(list(zip(income_unique, sum_by_income_group)), 
                                  columns = ["Income_Group", "Count"])

In [53]:
IGO_income_group_stats.head()

Unnamed: 0,Income_Group,Count
0,High income,44
1,Upper middle income,8
2,Lower middle income,17
3,Low income,5


In [54]:
# %matplotlib inline
# plt.figure(figsize=(20,10))
# plt.subplot(1, 2, 1)
# plt.pie(NGO_income_group_stats.Count,labels=NGO_income_group_stats.Income_Group,
#         autopct='%1.1f%%',
#         startangle = 90)
# plt.title('Income Group Distribution of the Countries of Origin of NGOs (COP2015, n = 1078) \n(Note: excludes NGOs based in the Holy See)')
# plt.axis('equal')

# plt.subplot(1, 2, 2)
# plt.pie(IGO_income_group_stats.Count,labels=IGO_income_group_stats.Income_Group,
#         autopct='%1.1f%%',
#         startangle = 90)
# plt.title('Income Group Distribution of the Countries of Origin of IGOs (COP2015, n = 67)')
# plt.axis('equal')
# plt.savefig('COP2015_income_group_distribution_NGOs_vs_IGOs.png', facecolor = "white")