# Start of IGO Data Transformation

Task: use Pandas to transform csv files into DataFrames that match desired tables for database schema

Tables:

- IGO (done)
- IGO_MEMBERSHIP (done)

In [1]:
import pandas as pd
import numpy as np

In [2]:
!ls ../SourceData/CorrelatesOfWar/

[34mCodebooks[m[m                    MID_Narratives_2002-2010.pdf
CowWarList.csv               NMC_5_0-wsupplementary.csv
CowWarList.pdf               Non-StateWarData_v4.0.csv
[31mEntities.pdf[m[m                 Territories.csv
Extra-StateWarData_v4.0.csv  alliance_v4.1_by_member.csv
IGO_stateunit_v2.3.csv       contdir.csv
Inter-StateWarData_v4.0.csv  igounit_v2.3.csv
Intra-StateWarData_v4.1.csv  majors2016.csv
[31mMIDA_4.2.csv[m[m                 states2016.csv
[31mMIDB_4.2.csv[m[m                 system2016.csv
[31mMIDLOCA_2.0.csv[m[m              tc2014.csv
MID_Narratives_1993-2001.pdf


## Create 'IGO' table

Task: transform 'igounit_v2.3.csv' into a table with the following attributes:

- igoID
- igoAbbr
- igoShortName
- igoLongName
- StartYear
- EndYear
- EndReason
- Notes

In [3]:
dfigo = pd.read_csv('../SourceData/CorrelatesOfWar/igounit_v2.3.csv', usecols=['ioname', 'orgname', 'deaddate', 'integrated', 'replaced', 'ionum', 'longorgname', 'sdate'])
dfigo

Unnamed: 0,ioname,orgname,deaddate,integrated,replaced,ionum,longorgname,sdate
0,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
1,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
2,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
3,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
4,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
5,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
6,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
7,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
8,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
9,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0


In [4]:
dfigo.drop_duplicates(inplace=True)

In [5]:
dfigo['ioname'].value_counts()

SITTDEC     3
EIPA        2
AU          2
IVWO        2
NCRR        2
AIC         2
CELC        2
CARIFTA     1
NWHF        1
LAEO        1
ATPC        1
EEC         1
JNOLCRH     1
OCAS        1
AFPU        1
SACU        1
SELA        1
APCC        1
ICDR        1
EAPO        1
OAPEC       1
WCO         1
OSLO        1
ICAI        1
CEAO        1
IUPCT       1
ASPAC       1
CSLF        1
Africare    1
PAP         1
           ..
ACWL        1
BCSC        1
ANZUS       1
CONFEJES    1
GHSI        1
ECSC        1
IAFC        1
UDEAC       1
ASCBC       1
NATO        1
IARadiO     1
IMBSlav     1
PICS        1
IRU         1
LAFDO       1
CIFC        1
AOAD        1
OECS        1
EACM        1
MCWCASM     1
ECCB        1
CAMSF       1
CAECC       1
ARIPO       1
ICTM        1
IPGRI       1
CEC         1
AMU         1
ACU         1
CCOM        1
Name: ioname, Length: 529, dtype: int64

In [6]:
dfigo.loc[dfigo['ioname'] == 'IVWO']

Unnamed: 0,ioname,orgname,deaddate,integrated,replaced,ionum,longorgname,sdate
8686,IVWO,Intl Vine & Wine Office,2003.0,0,1,3240,International Wine Office,1924.0
8694,IVWO,Intl Vine & Wine Office,2003.0,0,1,3240,International Vine and Wine Office,1924.0


In [7]:
dfigo.drop([11730, 11733, 1820, 9734, 4379, 3102, 745, 8686], inplace=True)

In [8]:
dfigo['ioname'].value_counts()

CARIFTA     1
WCO         1
EADB        1
ICDR        1
APCC        1
SELA        1
SACU        1
AFPU        1
OCAS        1
EEC         1
EIPA        1
ATPC        1
LAEO        1
NWHF        1
EAPO        1
JNOLCRH     1
OSLO        1
UASC        1
OAPEC       1
CEAO        1
IUPCT       1
ASPAC       1
CSLF        1
Africare    1
PAP         1
OMDKR       1
FAPED       1
CIMA        1
WHO         1
IOOC        1
           ..
ACWL        1
BCSC        1
ANZUS       1
CONFEJES    1
GHSI        1
ECSC        1
IAFC        1
UDEAC       1
ASCBC       1
NATO        1
IARadiO     1
IMBSlav     1
PICS        1
IRU         1
LAFDO       1
CIFC        1
AOAD        1
OECS        1
EACM        1
MCWCASM     1
ECCB        1
CAMSF       1
CAECC       1
ARIPO       1
ICTM        1
IPGRI       1
CEC         1
AMU         1
ACU         1
CCOM        1
Name: ioname, Length: 529, dtype: int64

In [9]:
dfigo.rename(columns={'ioname':'igoAbbr', 'orgname':'igoShortName', 'deaddate':'EndYear', 'ionum':'igoID', 'longorgname':'igoLongName', 'sdate':'StartYear'}, inplace=True)
dfigo

Unnamed: 0,igoAbbr,igoShortName,EndYear,integrated,replaced,igoID,igoLongName,StartYear
0,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0
30,AAB,Alliance Against Biopiracy,,0,0,305,Alliance Against Biopiracy,2002.0
34,AACB,Assoc. of African Central Banks,,0,0,690,Association of African Central Banks (AACB),1968.0
72,AACarib,Anglo-Am Caribbean Comm,1946.0,0,1,350,Anglo-American Caribbean Commission,1942.0
73,AALCO,Asian-African Legal Consultative Org,,0,0,630,Asian-African Legal Consultative Committee,1956.0
116,AARO,Afro-Asian Rural Development Org,,0,0,230,Afro-Asian Rural Reconstruction Organization,1962.0
158,AATA,Assoc. of African Tax Administrators,,0,0,700,Association of African Tax Administrators (AATA),1987.0
177,AATPO,Assoc. of Afr. Trade Promotion Orgs.,,0,0,710,Association of African Trade Promotion Organiz...,1974.0
209,ABEDA,Arab Bank for Econ. Dev. in Africa,,0,0,380,Arab Bank for Economic Development in Africa,1974.0
241,ABEPSEAC,Assoc. B/t EEC and States of East Afr. Community,1975.0,0,1,680,Association between the European Economic Comm...,1968.0


In [10]:
dfigo['EndReason'] = ''
dfigo['EndReason'] [dfigo['integrated'] == 1] = 'integrated'
dfigo['EndReason'] [dfigo['replaced'] == 1] = 'replaced'
dfigo

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,igoAbbr,igoShortName,EndYear,integrated,replaced,igoID,igoLongName,StartYear,EndReason
0,AAAID,Arab Auth. for Ag. Invest. & Development,,0,0,370,Arab Authority for Agricultural Investment and...,1976.0,
30,AAB,Alliance Against Biopiracy,,0,0,305,Alliance Against Biopiracy,2002.0,
34,AACB,Assoc. of African Central Banks,,0,0,690,Association of African Central Banks (AACB),1968.0,
72,AACarib,Anglo-Am Caribbean Comm,1946.0,0,1,350,Anglo-American Caribbean Commission,1942.0,replaced
73,AALCO,Asian-African Legal Consultative Org,,0,0,630,Asian-African Legal Consultative Committee,1956.0,
116,AARO,Afro-Asian Rural Development Org,,0,0,230,Afro-Asian Rural Reconstruction Organization,1962.0,
158,AATA,Assoc. of African Tax Administrators,,0,0,700,Association of African Tax Administrators (AATA),1987.0,
177,AATPO,Assoc. of Afr. Trade Promotion Orgs.,,0,0,710,Association of African Trade Promotion Organiz...,1974.0,
209,ABEDA,Arab Bank for Econ. Dev. in Africa,,0,0,380,Arab Bank for Economic Development in Africa,1974.0,
241,ABEPSEAC,Assoc. B/t EEC and States of East Afr. Community,1975.0,0,1,680,Association between the European Economic Comm...,1968.0,replaced


In [11]:
dfigo = dfigo[['igoID', 'igoAbbr', 'igoShortName', 'igoLongName', 'StartYear', 'EndYear', 'EndReason']]
dfigo

Unnamed: 0,igoID,igoAbbr,igoShortName,igoLongName,StartYear,EndYear,EndReason
0,370,AAAID,Arab Auth. for Ag. Invest. & Development,Arab Authority for Agricultural Investment and...,1976.0,,
30,305,AAB,Alliance Against Biopiracy,Alliance Against Biopiracy,2002.0,,
34,690,AACB,Assoc. of African Central Banks,Association of African Central Banks (AACB),1968.0,,
72,350,AACarib,Anglo-Am Caribbean Comm,Anglo-American Caribbean Commission,1942.0,1946.0,replaced
73,630,AALCO,Asian-African Legal Consultative Org,Asian-African Legal Consultative Committee,1956.0,,
116,230,AARO,Afro-Asian Rural Development Org,Afro-Asian Rural Reconstruction Organization,1962.0,,
158,700,AATA,Assoc. of African Tax Administrators,Association of African Tax Administrators (AATA),1987.0,,
177,710,AATPO,Assoc. of Afr. Trade Promotion Orgs.,Association of African Trade Promotion Organiz...,1974.0,,
209,380,ABEDA,Arab Bank for Econ. Dev. in Africa,Arab Bank for Economic Development in Africa,1974.0,,
241,680,ABEPSEAC,Assoc. B/t EEC and States of East Afr. Community,Association between the European Economic Comm...,1968.0,1975.0,replaced


Now to take care of those notes...

In [12]:
dfigonotes = pd.read_csv('SourceData/CorrelatesOfWar/igounit_v2.3.csv', usecols=['ionum', 'Sources', 'Sources2'])
dfigonotes.rename(columns={'ionum':'igoID'}, inplace=True)
dfigonotes.dropna(subset=['Sources'], inplace=True)
dfigonotes.Sources2 = dfigonotes.Sources2.fillna('')
dfigonotes['Notes'] = dfigonotes['Sources'] + '; ' + dfigonotes['Sources2'].map(str)
dfigonotes.drop(columns=['Sources', 'Sources2'], inplace=True)
dfigonotesjoined = dfigonotes.groupby('igoID')['Notes'].apply(lambda x: '; '.join(x))
dfigonotesjoined

  interactivity=interactivity, compiler=compiler, result=result)


igoID
25      http://www.africa-union.org/; ; http://www.pft...
125     http://www.boip.int/; ; http://www.bibalex.org...
275     http://www.gene.ch/gentech/2002/Feb/msg00125.h...
305     www.rainforestcoalition.org; ; http://www.s-ci...
725     http://www.icc-cpi.int/home.html&l=en; ; http:...
795     http://portal.unesco.org/education/en/ev.php-U...
825     http://www.boip.int/; ; http://www.bibalex.org...
873     www.rainforestcoalition.org; ; http://www.s-ci...
1115                           http://www.marri-rc.org/; 
1345    http://www.nwhf.no/;   Website (accessed 10/31...
1355    http://www.unctad.org/infocomm/anglais/rubber/...
1415    http://www.acwl.ch/;   [Per org's website (acc...
1855    http://www.wmdcommission.org/; The Commission ...
1905    http://en.wikipedia.org/wiki/Bank_of_the_South...
1960    http://www.s-cica.org/page.php?page_id=7&lang=3; 
2015    http://www.earthobservations.org/; ; http://ww...
2315    http://www.marri-rc.org/; ; http://www.acdi-ci...
2492    

In [13]:
dfigonew = pd.merge(dfigo, dfigonotesjoined.to_frame(), on='igoID', how='outer')
dfigonew.Notes = dfigonew.Notes.fillna('')
dfigonew

Unnamed: 0,igoID,igoAbbr,igoShortName,igoLongName,StartYear,EndYear,EndReason,Notes
0,370,AAAID,Arab Auth. for Ag. Invest. & Development,Arab Authority for Agricultural Investment and...,1976.0,,,
1,305,AAB,Alliance Against Biopiracy,Alliance Against Biopiracy,2002.0,,,www.rainforestcoalition.org; ; http://www.s-ci...
2,690,AACB,Assoc. of African Central Banks,Association of African Central Banks (AACB),1968.0,,,
3,350,AACarib,Anglo-Am Caribbean Comm,Anglo-American Caribbean Commission,1942.0,1946.0,replaced,
4,630,AALCO,Asian-African Legal Consultative Org,Asian-African Legal Consultative Committee,1956.0,,,
5,230,AARO,Afro-Asian Rural Development Org,Afro-Asian Rural Reconstruction Organization,1962.0,,,
6,700,AATA,Assoc. of African Tax Administrators,Association of African Tax Administrators (AATA),1987.0,,,
7,710,AATPO,Assoc. of Afr. Trade Promotion Orgs.,Association of African Trade Promotion Organiz...,1974.0,,,
8,380,ABEDA,Arab Bank for Econ. Dev. in Africa,Arab Bank for Economic Development in Africa,1974.0,,,
9,680,ABEPSEAC,Assoc. B/t EEC and States of East Afr. Community,Association between the European Economic Comm...,1968.0,1975.0,replaced,


In [28]:
dfigonew['Notes'].replace(to_replace='; +;', value='; ', regex=True, inplace=True)
dfigonew

Unnamed: 0,igoID,igoAbbr,igoShortName,igoLongName,StartYear,EndYear,EndReason,Notes
0,370,AAAID,Arab Auth. for Ag. Invest. & Development,Arab Authority for Agricultural Investment and...,1976.0,,,
1,305,AAB,Alliance Against Biopiracy,Alliance Against Biopiracy,2002.0,,,www.rainforestcoalition.org; http://www.s-cic...
2,690,AACB,Assoc. of African Central Banks,Association of African Central Banks (AACB),1968.0,,,
3,350,AACarib,Anglo-Am Caribbean Comm,Anglo-American Caribbean Commission,1942.0,1946.0,replaced,
4,630,AALCO,Asian-African Legal Consultative Org,Asian-African Legal Consultative Committee,1956.0,,,
5,230,AARO,Afro-Asian Rural Development Org,Afro-Asian Rural Reconstruction Organization,1962.0,,,
6,700,AATA,Assoc. of African Tax Administrators,Association of African Tax Administrators (AATA),1987.0,,,
7,710,AATPO,Assoc. of Afr. Trade Promotion Orgs.,Association of African Trade Promotion Organiz...,1974.0,,,
8,380,ABEDA,Arab Bank for Econ. Dev. in Africa,Arab Bank for Economic Development in Africa,1974.0,,,
9,680,ABEPSEAC,Assoc. B/t EEC and States of East Afr. Community,Association between the European Economic Comm...,1968.0,1975.0,replaced,


In [29]:
NotesMaxLength = int(dfigonew['Notes'].str.encode(encoding='utf-8').str.len().max())
print(NotesMaxLength)

816


In [30]:
dfigonew.to_csv('../FinalData/igo.csv', encoding='utf-8', index=False)

## Create 'IGO_MEMBERSHIP' table

Task: transform 'IGO_stateunit_v2.3.csv' into a table with the following attributes:

- igoID
- StateID
- JoinYear
- LeaveYear

In [16]:
import csv

In [17]:
with open('../SourceData/CorrelatesOfWar/IGO_stateunit_v2.3.csv',  'r', encoding = 'utf-8') as fin:
    csvin = csv.reader(fin)
    headers = next(csvin)
    data = [r for r in csvin]

In [18]:
igos = headers[4:] # create a list of igo IDs
countries = list(set([row[0] for row in data])) # create a list of country Ids

In [19]:
def joinleaveyrs(sorted_years_list):
    
    # first, make a new list of lists, with each sequential section of years as a list element
    
    yearlist = [int(x) for x in sorted_years_list]
    partitioned_years_list = [] # the new list of lists
    
    startindex = 0
    endindex = 0
    
    for index, year in enumerate(yearlist):
        if index < len(yearlist) - 1:
            if yearlist[index+1] > year + 1:
                endindex = index
                partitioned_years_list.append(yearlist[startindex:endindex+1])
                startindex = endindex + 1
            else:
                continue
        else:
            partitioned_years_list.append(yearlist[startindex:len(yearlist)])
    
    # next, get first and last years in each sub list
    
    allyrpairs = []
    
    for yrlist in partitioned_years_list:
        startyr = yrlist[0]
        endyr = yrlist[-1]
        yrpair = [startyr, endyr]
        allyrpairs.append(yrpair)
    
    return allyrpairs

note: code from the first portion of the joinleaveyrs function comes from [this stackoverflow answer](https://stackoverflow.com/a/49314031)

In [20]:
countrymemdata = []

# assign an index position for each igo
for igo in igos: 
    igopos = headers.index(igo)

    # iterates through the set of all countries, and for each country gets the cell values for the year column and each igo column
    for country in countries:
        # countryfilter draws from the set of countries, and narrows the rows being examined down to one country per iteration
        countryfilter = [r for r in data if r[0] == country] # r[0] because the 1st column is country IDs
        # years gets all year numbers for that specific country for the iteration
        years = [r[3] for r in countryfilter] # r[3] because the 4th column is years
        # nums gets the igo index position for each igo
        nums = [r[igopos] for r in countryfilter] # nums possible values: 1, 0, -1, -9
        
        yearswithin = [] # empty list for all years country is part of igo to be accumulated in
        for year, num in list(zip(years, nums)): # zip creates an iterable tuple of the years and their igo-num
            if int(num) == 1: # 1 means membership
                yearswithin.append(int(year)) # adds years of membership to list. seperate list for each igo (per country)
        yearswithin.sort() # puts membership years in order
        #print(country, igo, yearswithin)
        
        # end of information gathering; start of information condensing
        
        if len(yearswithin) >= 1:
            membershipyears = joinleaveyrs(yearswithin)
        else:
            continue
        
        for yrset in membershipyears:
            countrymemdatarow = []
            countrymemdatarow.append(igo)
            countrymemdatarow.append(country)
            countrymemdatarow.append(yrset[0])
            countrymemdatarow.append(yrset[1])
            countrymemdata.append(countrymemdatarow)

In [21]:
outfileheaders = ['igoID', 'StateID', 'JoinYear', 'LeaveYear']

with open('../FinalData/proto_igo_membership.csv', 'w', newline = '') as outfile:
    csvout = csv.writer(outfile)
    csvout.writerow(outfileheaders)
    csvout.writerows(countrymemdata)

In [22]:
dfigomem = pd.read_csv('../FinalData/proto_igo_membership.csv')
dfigomem.rename(columns={'igoID':'igoAbbr'},inplace=True)
dfigomem

Unnamed: 0,igoAbbr,StateID,JoinYear,LeaveYear
0,AAAID,625,1985,2005
1,AAAID,698,1994,2005
2,AAAID,645,1985,2005
3,AAAID,651,1985,2005
4,AAAID,435,1985,2005
5,AAAID,670,1985,2005
6,AAAID,581,2005,2005
7,AAAID,663,1994,2005
8,AAAID,615,1985,2005
9,AAAID,679,2005,2005


In [23]:
dfigoids = dfigo[['igoID', 'igoAbbr']]
dfigoids

Unnamed: 0,igoID,igoAbbr
0,370,AAAID
30,305,AAB
34,690,AACB
72,350,AACarib
73,630,AALCO
116,230,AARO
158,700,AATA
177,710,AATPO
209,380,ABEDA
241,680,ABEPSEAC


In [24]:
dfigomemfinal = pd.merge(dfigomem, dfigoids, on='igoAbbr')
dfigomemfinal = dfigomemfinal[['igoID', 'StateID', 'JoinYear', 'LeaveYear']]
dfigomemfinal

Unnamed: 0,igoID,StateID,JoinYear,LeaveYear
0,370,625,1985,2005
1,370,698,1994,2005
2,370,645,1985,2005
3,370,651,1985,2005
4,370,435,1985,2005
5,370,670,1985,2005
6,370,581,2005,2005
7,370,663,1994,2005
8,370,615,1985,2005
9,370,679,2005,2005


In [25]:
dfigomemfinal.to_csv('../FinalData/igo_membership.csv', encoding='utf-8', index=False)