In [43]:
# import libraries
import pandas as pd

### About this Dataset:

The third edition of the global Women Peace and Security Index (WPS Index) draws on recognized data sources to measure women’s inclusion, justice, and security in 170 countries. 

Main page: https://giwps.georgetown.edu/the-index/

Access the dataset at: https://giwps.georgetown.edu/wp-content/uploads/2021/10/WPS-Index-2021-Data.csv

In [44]:
# read data, change path as needed - by default it looks in the directory you are currently in
path = "./original_datasets/"
file_name = "WPS-Index-2021-Data.csv"

world_index = pd.read_csv(f"{path}{file_name}", encoding="latin1")

In [45]:
# Examine the dataframe and determine what needs to be cleaned - we see that there are many blank rows at the end and blank columns

world_index.head()

Unnamed: 0,WPS Index 2021,Unnamed: 1,Unnamed: 2,Inclusion,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Security,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24
0,WPS Index rank,Country,WPS Index score,Education (years),,Financial inclusion (%),,Employment (%),,Cell phone use (%) ^m,...,Sex ratio at birth (male to female ratio),,Discriminatry norms (%),,Intimate partner violence (%),,Perception of community safety (%) ^m,,"Organized violence (battle deaths per 100,000 ...",
1,TOP QUINTILE,,,,,,,,,,...,,,,,,,,,,
2,1,Norway,0.922,13,,100,,58.9,,99,...,1.06,,0,,4,,89.5,,0,
3,2,Finland,0.909,12.9,,99.6,,52.7,,100,...,1.05,,1,,8,,80.9,,0,
4,3,Iceland,0.907,12.6,,92.4,f,64.2,,99.6,...,1.05,,0,,3,,72.6,,0,


In [46]:
world_index.tail()

Unnamed: 0,WPS Index 2021,Unnamed: 1,Unnamed: 2,Inclusion,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Security,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24
256,,Son bias: United Nations Department of Social ...,,,,,,,,,...,,,,,,,,,,
257,,"Discriminatory norms: Gallup Inc., and Interna...",,,,,,,,,...,,,,,,,,,,
258,,Intimate Partner Violence: WHO 2018. (https://...,,,,,,,,,...,,,,,,,,,,
259,,Perception of Community Safety: Gallup World P...,,,,,,,,,...,,,,,,,,,,
260,,Organised Violence: UCDP (Uppsala Conflict Dat...,,,,,,,,,...,,,,,,,,,,


In [47]:
# Blank columns - the pattern is every other column starting from the 5th column
# (the 4th column when counting the 1st as 0)

# Create a list of the columns to drop by index
columns_to_drop = list(range(4,25,2))

# Drop blank columns
world_index = world_index.drop(world_index.columns[columns_to_drop], axis = 1)

In [48]:
world_index.head()

Unnamed: 0,WPS Index 2021,Unnamed: 1,Unnamed: 2,Inclusion,Unnamed: 5,Unnamed: 7,Unnamed: 9,Unnamed: 11,Justice,Unnamed: 15,Unnamed: 17,Security,Unnamed: 21,Unnamed: 23
0,WPS Index rank,Country,WPS Index score,Education (years),Financial inclusion (%),Employment (%),Cell phone use (%) ^m,Parliamentary representation (%),Absensce of legal discrimination (aggregate sc...,Sex ratio at birth (male to female ratio),Discriminatry norms (%),Intimate partner violence (%),Perception of community safety (%) ^m,"Organized violence (battle deaths per 100,000 ..."
1,TOP QUINTILE,,,,,,,,,,,,,
2,1,Norway,0.922,13,100,58.9,99,45.6,96.9,1.06,0,4,89.5,0
3,2,Finland,0.909,12.9,99.6,52.7,100,46,97.5,1.05,1,8,80.9,0
4,3,Iceland,0.907,12.6,92.4,64.2,99.6,39.7,100,1.05,0,3,72.6,0


In [49]:
# Set the second row to be the header

world_index.columns = world_index.iloc[0]
world_index = world_index.iloc[1:].reset_index(drop=True)
world_index.head()

Unnamed: 0,WPS Index rank,Country,WPS Index score,Education (years),Financial inclusion (%),Employment (%),Cell phone use (%) ^m,Parliamentary representation (%),Absensce of legal discrimination (aggregate score),Sex ratio at birth (male to female ratio),Discriminatry norms (%),Intimate partner violence (%),Perception of community safety (%) ^m,"Organized violence (battle deaths per 100,000 people)"
0,TOP QUINTILE,,,,,,,,,,,,,
1,1,Norway,0.922,13.0,100.0,58.9,99.0,45.6,96.9,1.06,0.0,4.0,89.5,0.0
2,2,Finland,0.909,12.9,99.6,52.7,100.0,46.0,97.5,1.05,1.0,8.0,80.9,0.0
3,3,Iceland,0.907,12.6,92.4,64.2,99.6,39.7,100.0,1.05,0.0,3.0,72.6,0.0
4,4,Denmark,0.903,13.1,100.0,54.0,100.0,39.7,100.0,1.06,2.0,3.0,79.1,0.0


In [50]:
# Now to remove rows where every value is NaN 

world_index = world_index.dropna(how="all")
world_index.tail()

# There are blank rows separating the dataset by quintiles (see head output above) 
# and rows with only definitions (see tail output below) 
# There are also values for countries not in the index and certain groups of countries ex. by continent that we don't want to remove (yet)

Unnamed: 0,WPS Index rank,Country,WPS Index score,Education (years),Financial inclusion (%),Employment (%),Cell phone use (%) ^m,Parliamentary representation (%),Absensce of legal discrimination (aggregate score),Sex ratio at birth (male to female ratio),Discriminatry norms (%),Intimate partner violence (%),Perception of community safety (%) ^m,"Organized violence (battle deaths per 100,000 people)"
255,,Son bias: United Nations Department of Social ...,,,,,,,,,,,,
256,,"Discriminatory norms: Gallup Inc., and Interna...",,,,,,,,,,,,
257,,Intimate Partner Violence: WHO 2018. (https://...,,,,,,,,,,,,
258,,Perception of Community Safety: Gallup World P...,,,,,,,,,,,,
259,,Organised Violence: UCDP (Uppsala Conflict Dat...,,,,,,,,,,,,


In [51]:
# In order to be more targeted with what we are removing - let's keep rows that do NOT have 13 nulls
# (the quintile separation rows and definition rows)

world_index = world_index[world_index.isnull().sum(axis=1) != 13]

In [52]:
# As mentioned before, there are rows for certain groups of countries 
# or individual countries with missing data that do not have an index rank

sum(world_index['WPS Index rank'].isnull()) # Counts 36 of them
world_index.tail()

# For this analysis, our group is focusing on individual countries with a rank, so we create two separate dataframes
# for ranked and unranked countries (the latter perhaps for a future analysis)
world_index_unranked = world_index[world_index['WPS Index rank'].isnull()]
world_index = world_index[~world_index['WPS Index rank'].isnull()]

In [53]:
# Export dataframes to .csv

world_index.to_csv("WIPS-Index-2021-Ranked.csv", index=False)

# Uncomment below if interested in the unranked countries & groups of countries
# world_index_unranked.to_csv("WIPS-Index-2021-Unranked.csv",index=False)