### Obtain full text peace agreements

In [1]:
### Load libraries -------
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import os
from datetime import datetime, date, timedelta
import numpy as np
import io
import requests
from PyPDF2 import PdfFileReader
from pdfminer.high_level import extract_text # TODO: check if this solves encoding issues

# Pandas parameters
#pd.set_option('display.max_colwidth', -1)
#pd.set_option('display.max_rows', 2000)
#pd.set_option('display.max_columns', 500)

In [2]:
### Define out paths ------
if not os.path.exists('/Users/hannahfrank/desktop/qca_peace_agreements_out'):
    os.makedirs('/Users/hannahfrank/desktop/qca_peace_agreements_out')

out_paths = {
    "desciptive_plots_outcome": '/Users/hannahfrank/desktop/qca_peace_agreements_out/desciptive_plots_outcome',
    "data": '/Users/hannahfrank/desktop/qca_peace_agreements_out/data'
}

for key, val in out_paths.items():
    if not os.path.exists(val):
        os.makedirs(val)
        
out_paths["data"]

'/Users/hannahfrank/desktop/qca_peace_agreements_out/data'

In [3]:
### Load data from local folder ----------
ucdp = pd.read_csv(os.path.join(out_paths["data"], 
                                "ucdp.csv"))
ucdp.head(3)

Unnamed: 0,paid,gwno,dyad_id,incompatibility,dyad_name,pa_name,year,pa_date,pa_comment,pa_sign,...,co impl,active_conflict,termdur,txt,linktofulltextagreement,inclusive,no dyad,pa type,out_iss,frame
0,1565,700,"729, 726, 727, 725, 730, 732",2,Government of Afghanistan - Mahaz-i Milli-yi I...,Peshawar Accord,1992,1992-04-24,Partial Peace Agreement \n\nThis agreement set...,"No signatures, but agreement mentions: Jamiat-...",...,0,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,6,2,2,1
1,1128,700,"726, 732",2,Government of Afghanistan - Hizb-i Islami-yi A...,Afghan Peace Accord and Annex on the Division ...,1993,1993-03-07,Partial peace agreement \n\nA number of war...,Professor Burhan-ud-Din Rabbani of Jamiat-i-Is...,...,0,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,2,2,3,2
2,1132,700,726,2,Government of Afghanistan - Hizb-i Islami-yi A...,Jalalabad accord,1993,1993-05-20,Partial peace agreement.\n\nThe Jalalabad Acco...,"Burhanuddin Rabbani, President \nGulbuddin Hek...",...,0,1,-99,0,,2,1,2,3,3


In [4]:
### Only consider peace agreements with full text access ---------
ucdp_full = ucdp.loc[ucdp["txt"]==1].copy()
ucdp_full.head(3)

Unnamed: 0,paid,gwno,dyad_id,incompatibility,dyad_name,pa_name,year,pa_date,pa_comment,pa_sign,...,co impl,active_conflict,termdur,txt,linktofulltextagreement,inclusive,no dyad,pa type,out_iss,frame
0,1565,700,"729, 726, 727, 725, 730, 732",2,Government of Afghanistan - Mahaz-i Milli-yi I...,Peshawar Accord,1992,1992-04-24,Partial Peace Agreement \n\nThis agreement set...,"No signatures, but agreement mentions: Jamiat-...",...,0,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,6,2,2,1
1,1128,700,"726, 732",2,Government of Afghanistan - Hizb-i Islami-yi A...,Afghan Peace Accord and Annex on the Division ...,1993,1993-03-07,Partial peace agreement \n\nA number of war...,Professor Burhan-ud-Din Rabbani of Jamiat-i-Is...,...,0,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,2,2,3,2
4,1488,700,726,2,Government of Afghanistan - Hizb-i Islami-yi A...,Kabul Agreement,2016,2016-09-22,Full peace agreement\n\nAfter periods of negot...,For the Government of Afghanistan: President M...,...,1,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,1,1,0,2


### Webscraping
    Get full text peace agreements

In [5]:
### Add empty column for full texts ------
ucdp_full["fulltext"] = ""
ucdp_full.head(3)

Unnamed: 0,paid,gwno,dyad_id,incompatibility,dyad_name,pa_name,year,pa_date,pa_comment,pa_sign,...,active_conflict,termdur,txt,linktofulltextagreement,inclusive,no dyad,pa type,out_iss,frame,fulltext
0,1565,700,"729, 726, 727, 725, 730, 732",2,Government of Afghanistan - Mahaz-i Milli-yi I...,Peshawar Accord,1992,1992-04-24,Partial Peace Agreement \n\nThis agreement set...,"No signatures, but agreement mentions: Jamiat-...",...,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,6,2,2,1,
1,1128,700,"726, 732",2,Government of Afghanistan - Hizb-i Islami-yi A...,Afghan Peace Accord and Annex on the Division ...,1993,1993-03-07,Partial peace agreement \n\nA number of war...,Professor Burhan-ud-Din Rabbani of Jamiat-i-Is...,...,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,2,2,3,2,
4,1488,700,726,2,Government of Afghanistan - Hizb-i Islami-yi A...,Kabul Agreement,2016,2016-09-22,Full peace agreement\n\nAfter periods of negot...,For the Government of Afghanistan: President M...,...,1,-99,1,http://ucdpged.uu.se/peaceagreements/fulltext/...,2,1,1,0,2,


In [6]:
### Scrape full texts --------
# https://stackoverflow.com/questions/45470964/python-extracting-text-from-webpage-pdf

# loop through each peace agreement
for i in range(0, len(ucdp_full)):
    
    # copy url for row
    url = ucdp_full["linktofulltextagreement"].iloc[i]
    print(url)
    
    # request webpage contet
    request = requests.get(url)
    req_content = io.BytesIO(request.content)
    
    # iniate pdf reading
    pdf = PdfFileReader(req_content)
    
    # count number of pages
    page_n = pdf.numPages
    
    # loop through each page of the peace agreement, extract and append text
    # https://stackoverflow.com/questions/60199068/how-to-retrieve-all-pages-from-pdf-as-a-single-string-in-python-3-using-pypdf2
    text = []
    for num in range(page_n):
        #print(n)
        text_page = pdf.getPage(num)
        text.append(text_page.extractText().split(" "))

    ucdp_full["fulltext"].iloc[i] = text

http://ucdpged.uu.se/peaceagreements/fulltext/Afg%2019920424_PESHAWAR%20ACCORD.pdf


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy [indexing.py:1637]


http://ucdpged.uu.se/peaceagreements/fulltext/Afg%2019930307.pdf




http://ucdpged.uu.se/peaceagreements/fulltext/Kabul%20Agreement%20between%20the%20Government%20of%20Afghanistan%20and%20Hizb.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ang%2020060801po.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ang%2019890622.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ang%2019910531.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ang%2019941120.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ang20020404.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/Ban%2019971202.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BA%2019930518_Medjugorje%20Agreement.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BoH%2019940301.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BA_940318_DeclarationConcerningConstitutonFederationBosniaHerzegovina.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BA%2019941220.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BA_950908_AgreedBasicPrinciples.pdf
http://ucdpged.uu.se/peaceagreements/fulltext/BA_950926_FurtherA

### Validate webscraping approach
    Check text document manually

In [7]:
### Check every text peace agreement manually --------
pd.options.display.max_colwidth = 1000
print(ucdp_full[["paid"]].iloc[13]) # <----- change here 
print(ucdp_full[["pa_name"]].iloc[13])
ucdp_full[["fulltext"]].iloc[13]

paid    1568
Name: 15, dtype: int64
pa_name    Agreed Basic Principles signed on 8 September 1995 at Geneva
Name: 15, dtype: object


fulltext    [[AgreedBasicPrinciplessignedon8September1995atGenevaTextofprinciplesagreedtoFridayatGenevaasthebasisfortalkson\nendingthewarinBosnia:\n---AgreedBasicPrinciples\n\n1.BosniaandHerzegovinawillcontinueitslegalexistencewithits\npresentbordersandcontinuinginternationalrecognition.\n2.BosniaandHerzegovinawillconsistoftwoentities,theFederationof\nBosniaandHerzegovinaasestablishedbytheWashingtonAgreements,andthe\nRepublicaSrpska(RS).\n2.1The51:49parameteroftheterritorialproposaloftheContactGroup\nisthebasisforasettlement.Thisterritorialproposalisopenforadjustment\n\nbymutualagreement.\n2.2Eachentitywillcontinuetoexistunderitspresentconstitution\n(amendedtoaccommodatethesebasicprinciples).\n2.3Bothentitieswillhavetherighttoestablishparallelspecial\nrelationshipswithneighbouringcountries,consistentwiththesovereigntyand\n\nterritorialintegrityofBosniaandHerzegovina.\n2.4Thetwoentitieswillenterintoreciprocalcommitments(a)tohold\ncompleteelectionsunderinternationalauspices;(b)toadoptand

In [8]:
### Remove documents which cannot be used for now --------
# TODO fix this

# Add empty column
ucdp_full["ready"] = 0

# Remove documents 
ucdp_full.loc[ucdp_full["paid"]==1565, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1128, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1488, "ready"]=1 
#ucdp_full.loc[ucdp_full["paid"]==1353, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1021, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1022, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1023, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1126, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1057, "ready"]=1 
ucdp_full.loc[ucdp_full["paid"]==1564, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1072, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1566, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1567, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1568, "ready"]=1 no spacing
#ucdp_full.loc[ucdp_full["paid"]==1569, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1073, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1570, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1533, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1003, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1065, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1287, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1285, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1489, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1099, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1532, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1335, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1336, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1490, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1491, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1386, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1113, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1571, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1333, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1358, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1355, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1373, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1493, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1454, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1469, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1494, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1497, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1486, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1498, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1420, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1421, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1049, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1499, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1501, "ready"]=1 in Spanish
ucdp_full.loc[ucdp_full["paid"]==1502, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1572, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1573, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1574, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==11575, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1079, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1503, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1505, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1329, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1578, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1122, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1328, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1120, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1447, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1579, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1506, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1507, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1580, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1581, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1508, "ready"]=1 in Spanish
ucdp_full.loc[ucdp_full["paid"]==1576, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1024, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1025, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1127, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1614, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1026, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1582, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1074, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1432, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1009, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1010, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1146, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1004, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1288, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1291, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1005, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1289, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1290, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1384, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1468, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1125, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1583, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1102, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1103, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1104, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1105, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1106, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1107, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1108, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1109, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1110, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1591, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1593, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1592, "ready"]=1 no spacing
#ucdp_full.loc[ucdp_full["paid"]==1594, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1511, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1008, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1620, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1619, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1596, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1597, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1598, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1130, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1082, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1083, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1084, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1085, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1086, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1087, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1088, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1089, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1090, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1091, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1092, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1093, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1094, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1095, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1081, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1097, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1019, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1599, "ready"]=1 no spacing
#ucdp_full.loc[ucdp_full["paid"]==1066, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1512, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1513, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1071, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1423, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1121, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1600, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1111, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1276, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1406, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1407, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1408, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1409, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1410, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1411, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1412, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1413, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1414, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1415, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1416, "ready"]=1 # encoding?
ucdp_full.loc[ucdp_full["paid"]==1417, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1133, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1134, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1135, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1515, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1136, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1137, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1138, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1140, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1354, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1139, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1292, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1283, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1273, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1608, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1359, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1360, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1361, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1362, "ready"]=1 in French
ucdp_full.loc[ucdp_full["paid"]==1376, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1301, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1075, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1100, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1014, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1062, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1300, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1015, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1016, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1299, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1017, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1018, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1141, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1101, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1098, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1039, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1040, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1609, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1517, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==30, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1403, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1610, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1070, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1615, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1616, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1617, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1618, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1123, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1391, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1308, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1302, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1303, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1306, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1307, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1020, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1613, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1485, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1611, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1518, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1438, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1437, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1436, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1337, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1520, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1521, "ready"]=1 in Spanish
ucdp_full.loc[ucdp_full["paid"]==1522, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1037, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1038, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1563, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1557, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1076, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1078, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1558, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1559, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1560, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1561, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1562, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1080, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1058, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1422, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1424, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1524, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1064, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1332, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1526, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1527, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1528, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1529, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1530, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1439, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1470, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1471, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1472, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1476, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1478, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1477, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1320, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1321, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1523, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1322, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1323, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1324, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1325, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1002, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1284, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1069, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1011, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1013, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1143, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1050, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1142, "ready"]=1 no spacing
ucdp_full.loc[ucdp_full["paid"]==1060, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1374, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1556, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1385, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1393, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1394, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1395, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1396, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1397, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1389, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1444, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1456, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1457, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1531, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==31, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1480, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1482, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1484, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==24, "ready"]=1 no spacing
#ucdp_full.loc[ucdp_full["paid"]==28, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==29, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1625, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1425, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1534, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1404, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1277, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1552, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1553, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1278, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1310, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1279, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1280, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1281, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1545, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1546, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1543, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1547, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1548, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1549, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1282, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1351, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1554, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1334, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1430, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1434, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1483, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1542, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1144, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1147, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1539, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1313, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1312, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1318, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1315, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1317, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1119, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1387, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1388, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1145, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1367, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1368, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1377, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1378, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1379, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1380, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1381, "ready"]=1 empty
#ucdp_full.loc[ucdp_full["paid"]==1536, "ready"]=1 encoding
#ucdp_full.loc[ucdp_full["paid"]==1537, "ready"]=1 encoding
ucdp_full.loc[ucdp_full["paid"]==1063, "ready"]=1
ucdp_full.loc[ucdp_full["paid"]==1535, "ready"]=1
#ucdp_full.loc[ucdp_full["paid"]==1402, "ready"]=1 empty
ucdp_full.loc[ucdp_full["paid"]==1399, "ready"]=1

In [9]:
### Make table for all peace agreements -----
ucdp_full[[#"paid", 
           "pa_date",
           "pa_name"]].to_latex(os.path.join(out_paths["desciptive_plots_outcome"],
                                             "peace_agreements_table.tex"), longtable=True,
                                                                            index=False)

In [10]:
### Check which ones are missing ----
# Subset for ones which are excluded
ucdp_missing = ucdp_full.loc[ucdp_full["ready"]==0]
ucdp_ready = ucdp_full.loc[ucdp_full["ready"]==1]

# Print number of peace agreements 
print(len(ucdp_missing))
print(len(ucdp_ready))
print(len(ucdp_full))
print(len(ucdp))

# Print proportions
print(len(ucdp_missing)/len(ucdp_full))
print(len(ucdp_ready)/len(ucdp))
print(len(ucdp_ready)/len(ucdp_full))

96
236
332
355
0.2891566265060241
0.6647887323943662
0.7108433734939759


In [11]:
### Make table for peace agreements that are missing ------
ucdp_missing[[ #"paid", 
           "pa_date",
           "pa_name"]].to_latex(os.path.join(out_paths["desciptive_plots_outcome"],
                                             "peace_agreements_table_missing.tex"), longtable=True,
                                                                            index=False)

In [12]:
### Save dataset -------
ucdp_full.to_csv(os.path.join(out_paths["data"], 
                           "ucdp_full.csv"), index=False, sep=',')