### Code for joining datasets (BMR, FH, VDEM)

### Sanittawan Tan

In [1]:
import pandas as pd
import numpy as np

### check for country name discrepancies and standardize

In [2]:
fh = pd.read_csv('./cleaned_FH.csv')
bmr = pd.read_csv('./cleaned_BMR.csv')
vdem = pd.read_csv('./cleaned_VDEM.csv')

In [3]:
# drop Federal Republic of Yugoslavia (Predecessor of Serbia and Montenegro)
fh = fh.drop(fh[fh['country'] == 'yugoslavia (serbia & montenegro)'].index)
fh = fh.drop(fh[fh['country'] == 'yugoslavia'].index)
bmr = bmr.drop(bmr[bmr['country'] == 'yugoslavia, fed. rep.'].index)

In [4]:
FH_set = set(fh['country'].unique())

In [5]:
len(FH_set)

195

In [6]:
BMR_set = set(bmr['country'].unique())

In [7]:
len(BMR_set)

196

In [8]:
VDEM_set = set(vdem['country'].unique())

In [9]:
len(VDEM_set)

179

In [10]:
FH_BMR = FH_set - BMR_set
FH_BMR

{'antigua and barbuda',
 'bosnia and herzegovina',
 'central african republic',
 'congo (brazzaville)',
 'congo (kinshasa)',
 'liechtenstein',
 'micronesia',
 'north korea',
 'samoa',
 'sao tome and principe',
 'south korea',
 'south sudan',
 'st. kitts and nevis',
 'st. vincent and the grenadines',
 'the gambia',
 'timor-leste',
 'trinidad and tobago'}

In [11]:
fh['country'].replace({'congo (brazzaville)': 'rep. of congo',
                       'congo (kinshasa)': 'dem. rep. of congo'
                       ''}, inplace=True)

In [12]:
BMR_FH = BMR_set - FH_set
BMR_FH

{'antigua',
 'bosnia',
 'central african rep.',
 'congo, dem. rep.',
 'congo, rep.',
 'east timor',
 'gambia',
 'korea, north',
 'korea, south',
 'liechstenstein',
 'micronesia, fed.',
 'samoa (western)',
 'sao tome & principe',
 'st. kitts & nevis',
 'st. vincent & gren.',
 'sudan, north',
 'sudan, south',
 'trinidad & tobago'}

In [13]:
bmr_replace = {'antigua': 'antigua and barbuda',
 'bosnia': 'bosnia and herzegovina',
 'central african rep.': 'central african republic',
 'congo, dem. rep.': 'dem. rep. of congo',
 'congo, rep.': 'rep. of congo',
 'east timor': 'timor-leste',
 'gambia': 'the gambia',
 'korea, north': 'north korea',
 'korea, south': 'south korea',
 'liechstenstein': 'liechtenstein',
 'micronesia, fed.': 'micronesia',
 'samoa (western)': 'samoa',
 'sao tome & principe': 'sao tome and principe',
 'st. kitts & nevis': 'st. kitts and nevis',
 'st. vincent & gren.': 'st. vincent and the grenadines',
 'sudan, north': 'sudan',
 'sudan, south': 'south sudan',
 'trinidad & tobago': 'trinidad and tobago'}
bmr['country'].replace(bmr_replace, inplace=True)

In [14]:
FH_set = set(fh['country'].unique())
BMR_set = set(bmr['country'].unique())
print(FH_set - BMR_set)
print(BMR_set - FH_set)

set()
set()


In [15]:
FH_VDEM = FH_set - VDEM_set
FH_VDEM

{'andorra',
 'antigua and barbuda',
 'bahamas',
 'belize',
 'brunei',
 "cote d'ivoire",
 'dem. rep. of congo',
 'dominica',
 'grenada',
 'kiribati',
 'liechtenstein',
 'marshall islands',
 'micronesia',
 'monaco',
 'myanmar',
 'nauru',
 'palau',
 'rep. of congo',
 'samoa',
 'san marino',
 'st. kitts and nevis',
 'st. lucia',
 'st. vincent and the grenadines',
 'tonga',
 'tuvalu'}

In [16]:
VDEM_FH = VDEM_set - FH_set
VDEM_FH

{'burma/myanmar',
 'democratic republic of the congo',
 'hong kong',
 'ivory coast',
 'palestine/gaza',
 'palestine/west bank',
 'republic of the congo',
 'somaliland',
 'zanzibar'}

In [17]:
vdem_replace = {'burma/myanmar': 'myanmar',
 'democratic republic of the congo': 'dem. rep. of congo',
 'republic of the congo': 'rep. of congo',
 'ivory coast': "cote d'ivoire"}
vdem['country'].replace(vdem_replace, inplace=True)

In [18]:
FH_set = set(fh['country'].unique())
VDEM_set = set(vdem['country'].unique())
print(FH_set - VDEM_set)
print(VDEM_set - FH_set)

{'san marino', 'monaco', 'belize', 'st. vincent and the grenadines', 'dominica', 'grenada', 'st. lucia', 'marshall islands', 'brunei', 'palau', 'tuvalu', 'kiribati', 'andorra', 'antigua and barbuda', 'samoa', 'nauru', 'tonga', 'micronesia', 'liechtenstein', 'bahamas', 'st. kitts and nevis'}
{'palestine/gaza', 'hong kong', 'somaliland', 'zanzibar', 'palestine/west bank'}


In [19]:
len(FH_set)

195

In [20]:
len(BMR_set)

195

In [21]:
len(VDEM_set)

179

In [22]:
fh.dtypes

country        object
year            int64
PR              int64
CL              int64
Status         object
FH_category     int64
FH_F            int64
FH_NF           int64
FH_PF           int64
dtype: object

In [23]:
bmr.dtypes

country                      object
year                          int64
BMR_democracy               float64
BMR_democracy_trans         float64
BMR_democracy_breakdowns    float64
BMR_democracy_duration      float64
dtype: object

### Join datasets

In [24]:
mid_data = fh.merge(bmr, on=['country', 'year'], how='left')

In [25]:
final_data = mid_data.merge(vdem, on=['country', 'year'], how='left')

In [26]:
final_data

Unnamed: 0,country,year,PR,CL,Status,FH_category,FH_F,FH_NF,FH_PF,BMR_democracy,...,v2eldonate_ord,v2elpubfin_ord,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim
0,afghanistan,2000,7,7,NF,0,0,1,0,0.0,...,0.0,0.0,0.0,,,,,,,
1,afghanistan,2001,7,7,NF,0,0,1,0,0.0,...,0.0,0.0,0.0,,,,,,,
2,afghanistan,2002,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,1.0,,,,,,,
3,afghanistan,2003,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,,,,,,,
4,afghanistan,2004,5,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,-0.240
5,afghanistan,2005,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,-0.240
6,afghanistan,2006,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,,,,,,,
7,afghanistan,2007,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,,,,,,,
8,afghanistan,2008,5,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,,,,,,,
9,afghanistan,2009,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,3.0,0.0,2.0,2.0,3.0,2.0,-0.421


In [27]:
# for reference
final_data_cp = final_data.copy()

### Classify countries that are free, partly free, not free as 2, 1, 0 respectively

In [28]:
final_data['FH_category'] = 0

In [29]:
final_data.loc[final_data['Status'] == 'F', 'FH_category'] = 2

In [30]:
final_data.loc[final_data['Status'] == 'PF', 'FH_category'] = 1

In [31]:
final_data.head(10)

Unnamed: 0,country,year,PR,CL,Status,FH_category,FH_F,FH_NF,FH_PF,BMR_democracy,...,v2eldonate_ord,v2elpubfin_ord,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim
0,afghanistan,2000,7,7,NF,0,0,1,0,0.0,...,0.0,0.0,0.0,,,,,,,
1,afghanistan,2001,7,7,NF,0,0,1,0,0.0,...,0.0,0.0,0.0,,,,,,,
2,afghanistan,2002,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,1.0,,,,,,,
3,afghanistan,2003,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,,,,,,,
4,afghanistan,2004,5,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,-0.24
5,afghanistan,2005,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,3.0,1.0,2.0,2.0,3.0,2.0,-0.24
6,afghanistan,2006,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,,,,,,,
7,afghanistan,2007,5,5,PF,1,0,0,1,0.0,...,2.0,0.0,2.0,,,,,,,
8,afghanistan,2008,5,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,,,,,,,
9,afghanistan,2009,6,6,NF,0,0,1,0,0.0,...,2.0,0.0,2.0,3.0,0.0,2.0,2.0,3.0,2.0,-0.421


### Classify democratic erosion cases (is_erosion) manually

* For year 2000, all countries whether a Free, Partly Free or Not Free receive a value of 0 for is_erosion
* For subsequent years, if a country is Free at t-1 and receives a PF or NF at t, is_erosion at t = 1
* if a country is PF at t-1 and PF at t, if PR or CL scores at t is less than t-1, is_erosion at t = 1
* if a country is PF at t-1 and NF at t, is_erosion = 1
* else, is_erosion = 0

In [32]:
final_data['is_erosion'] = -1

In [33]:
final_data.loc[final_data['year'] == 2000, 'is_erosion'] = 0

In [34]:
def classify_erosions(final_data_gb, final_data):
    '''
    Modify the dataframe in place.
    '''
    new_final_data = final_data.copy()
    j = 0 
    for name, group in final_data_gb:
        j += 1
        if j == 3:
            return final_data
        print("== Working on {} ==".format(name))
        print(type(group))
        for i, row in group.iterrows():
            if i == 0:
                continue
            cur_row = new_final_data.iloc[i]
            print(cur_row.loc['Status'])
            prev_row = new_final_data.iloc[i-1]
            print(prev_row.loc['Status'])
            
            if (((prev_row.loc['Status'] == 'F') & (cur_row.loc['Status'] == 'PF'))\
                | ((prev_row.loc['Status'] == 'F') & (cur_row.loc['Status'] == 'NF'))):
                print('if executed')
                print(new_final_data.iloc[i].loc['is_erosion'])
                new_final_data.iloc[i].loc['is_erosion'] = 1
                print(new_final_data.iloc[i].loc['is_erosion'])
            elif ((prev_row.loc['Status'] == 'PF') & (cur_row.loc['Status'] == 'NF')):
                print('elif executed')
                print(new_final_data.iloc[i].loc['is_erosion'])
                new_final_data.iloc[i].loc['is_erosion'] = 1
                print(new_final_data.iloc[i].loc['is_erosion'])
            #elif ((prev_row.loc['Status'] == 'PF') & (prev_row.loc['Status'] == 'PF')):
            #    print('elif 2 executed')
            #    if ((cur_row.loc['PR'] > prev_row.loc['PR']) | (cur_row.loc['CL'] > prev_row.loc['CL'])):
            #        final_data.iloc[i].loc['is_erosion'] = 1
            else:
                print('else executed')
                print(new_final_data.iloc[i].loc['is_erosion'])
                new_final_data.iloc[i].loc['is_erosion'] = 0
                print(new_final_data.iloc[i].loc['is_erosion'])
    
    return new_final_data

In [35]:
final_data_gb = final_data.groupby(['country'])

In [38]:
ftr_check = ['country', 'year', 'Status', 'PR', 'CL', 'is_erosion']

In [39]:
test2 = classify_erosions(final_data_gb, final_data)

== Working on afghanistan ==
<class 'pandas.core.frame.DataFrame'>
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
PF
NF
else executed
-1
-1
PF
PF
else executed
-1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


-1
PF
PF
else executed
-1
-1
NF
PF
elif executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
NF
NF
else executed
-1
-1
== Working on albania ==
<class 'pandas.core.frame.DataFrame'>
PF
NF
else executed
0
0
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1
PF
PF
else executed
-1
-1


In [40]:
test2[ftr_check]

Unnamed: 0,country,year,Status,PR,CL,is_erosion
0,afghanistan,2000,NF,7,7,0
1,afghanistan,2001,NF,7,7,-1
2,afghanistan,2002,NF,6,6,-1
3,afghanistan,2003,NF,6,6,-1
4,afghanistan,2004,NF,5,6,-1
5,afghanistan,2005,PF,5,5,-1
6,afghanistan,2006,PF,5,5,-1
7,afghanistan,2007,PF,5,5,-1
8,afghanistan,2008,NF,5,6,-1
9,afghanistan,2009,NF,6,6,-1


In [83]:
final_data[ftr_check]

Unnamed: 0,country,year,Status,PR,CL,is_erosion
0,afghanistan,2000,NF,7,7,0
1,afghanistan,2001,NF,7,7,0
2,afghanistan,2002,NF,6,6,0
3,afghanistan,2003,NF,6,6,0
4,afghanistan,2004,NF,5,6,0
5,afghanistan,2005,PF,5,5,0
6,afghanistan,2006,PF,5,5,0
7,afghanistan,2007,PF,5,5,0
8,afghanistan,2008,NF,5,6,0
9,afghanistan,2009,NF,6,6,0


In [175]:
for name, group in final_data_gb:
    print(name)
    print(group)

afghanistan
        country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
0   afghanistan  2000   7   7     NF            0     0      1      0   
1   afghanistan  2001   7   7     NF            0     0      1      0   
2   afghanistan  2002   6   6     NF            0     0      1      0   
3   afghanistan  2003   6   6     NF            0     0      1      0   
4   afghanistan  2004   5   6     NF            0     0      1      0   
5   afghanistan  2005   5   5     PF            1     0      0      1   
6   afghanistan  2006   5   5     PF            1     0      0      1   
7   afghanistan  2007   5   5     PF            1     0      0      1   
8   afghanistan  2008   5   6     NF            0     0      1      0   
9   afghanistan  2009   6   6     NF            0     0      1      0   
10  afghanistan  2010   6   6     NF            0     0      1      0   
11  afghanistan  2011   6   6     NF            0     0      1      0   
12  afghanistan  2012   6   6     NF   

[18 rows x 76 columns]
costa rica
        country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
720  costa rica  2000   1   2      F            2     1      0      0   
721  costa rica  2001   1   2      F            2     1      0      0   
722  costa rica  2002   1   2      F            2     1      0      0   
723  costa rica  2003   1   2      F            2     1      0      0   
724  costa rica  2004   1   1      F            2     1      0      0   
725  costa rica  2005   1   1      F            2     1      0      0   
726  costa rica  2006   1   1      F            2     1      0      0   
727  costa rica  2007   1   1      F            2     1      0      0   
728  costa rica  2008   1   1      F            2     1      0      0   
729  costa rica  2009   1   1      F            2     1      0      0   
730  costa rica  2010   1   1      F            2     1      0      0   
731  costa rica  2011   1   1      F            2     1      0      0   
732  costa rica  

[18 rows x 76 columns]
greece
     country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
1152  greece  2000   1   3      F            2     1      0      0   
1153  greece  2001   1   3      F            2     1      0      0   
1154  greece  2002   1   2      F            2     1      0      0   
1155  greece  2003   1   2      F            2     1      0      0   
1156  greece  2004   1   2      F            2     1      0      0   
1157  greece  2005   1   2      F            2     1      0      0   
1158  greece  2006   1   2      F            2     1      0      0   
1159  greece  2007   1   2      F            2     1      0      0   
1160  greece  2008   1   2      F            2     1      0      0   
1161  greece  2009   1   2      F            2     1      0      0   
1162  greece  2010   1   2      F            2     1      0      0   
1163  greece  2011   2   2      F            2     1      0      0   
1164  greece  2012   2   2      F            2     1      0 

[18 rows x 76 columns]
lebanon
      country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
1647  lebanon  2000   6   5     NF            0     0      1      0   
1648  lebanon  2001   6   5     NF            0     0      1      0   
1649  lebanon  2002   6   5     NF            0     0      1      0   
1650  lebanon  2003   6   5     NF            0     0      1      0   
1651  lebanon  2004   6   5     NF            0     0      1      0   
1652  lebanon  2005   5   4     PF            1     0      0      1   
1653  lebanon  2006   5   4     PF            1     0      0      1   
1654  lebanon  2007   5   4     PF            1     0      0      1   
1655  lebanon  2008   5   4     PF            1     0      0      1   
1656  lebanon  2009   5   3     PF            1     0      0      1   
1657  lebanon  2010   5   3     PF            1     0      0      1   
1658  lebanon  2011   5   4     PF            1     0      0      1   
1659  lebanon  2012   5   4     PF            

[18 rows x 76 columns]
nicaragua
        country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
2199  nicaragua  2000   3   3     PF            1     0      0      1   
2200  nicaragua  2001   3   3     PF            1     0      0      1   
2201  nicaragua  2002   3   3     PF            1     0      0      1   
2202  nicaragua  2003   3   3     PF            1     0      0      1   
2203  nicaragua  2004   3   3     PF            1     0      0      1   
2204  nicaragua  2005   3   3     PF            1     0      0      1   
2205  nicaragua  2006   3   3     PF            1     0      0      1   
2206  nicaragua  2007   3   3     PF            1     0      0      1   
2207  nicaragua  2008   4   3     PF            1     0      0      1   
2208  nicaragua  2009   4   4     PF            1     0      0      1   
2209  nicaragua  2010   4   4     PF            1     0      0      1   
2210  nicaragua  2011   5   4     PF            1     0      0      1   
2211  nicaragua  2

                             country  year  PR  CL Status  FH_category  FH_F  \
2884  st. vincent and the grenadines  2000   2   1      F            2     1   
2885  st. vincent and the grenadines  2001   2   1      F            2     1   
2886  st. vincent and the grenadines  2002   2   1      F            2     1   
2887  st. vincent and the grenadines  2003   2   1      F            2     1   
2888  st. vincent and the grenadines  2004   2   1      F            2     1   
2889  st. vincent and the grenadines  2005   2   1      F            2     1   
2890  st. vincent and the grenadines  2006   2   1      F            2     1   
2891  st. vincent and the grenadines  2007   2   1      F            2     1   
2892  st. vincent and the grenadines  2008   2   1      F            2     1   
2893  st. vincent and the grenadines  2009   2   1      F            2     1   
2894  st. vincent and the grenadines  2010   1   1      F            2     1   
2895  st. vincent and the grenadines  20

      country  year  PR  CL Status  FH_category  FH_F  FH_NF  FH_PF  \
3370  vanuatu  2000   1   3      F            2     1      0      0   
3371  vanuatu  2001   1   3      F            2     1      0      0   
3372  vanuatu  2002   1   2      F            2     1      0      0   
3373  vanuatu  2003   2   2      F            2     1      0      0   
3374  vanuatu  2004   2   2      F            2     1      0      0   
3375  vanuatu  2005   2   2      F            2     1      0      0   
3376  vanuatu  2006   2   2      F            2     1      0      0   
3377  vanuatu  2007   2   2      F            2     1      0      0   
3378  vanuatu  2008   2   2      F            2     1      0      0   
3379  vanuatu  2009   2   2      F            2     1      0      0   
3380  vanuatu  2010   2   2      F            2     1      0      0   
3381  vanuatu  2011   2   2      F            2     1      0      0   
3382  vanuatu  2012   2   2      F            2     1      0      0   
3383  

In [40]:
ftr_check = ['country', 'year', 'Status', 'PR', 'CL', 'is_erosion']
final_data[ftr_check]

Unnamed: 0,country,year,Status,PR,CL,is_erosion
0,afghanistan,2000,NF,7,7,0
1,afghanistan,2001,NF,7,7,-1
2,afghanistan,2002,NF,6,6,-1
3,afghanistan,2003,NF,6,6,-1
4,afghanistan,2004,NF,5,6,-1
5,afghanistan,2005,PF,5,5,-1
6,afghanistan,2006,PF,5,5,-1
7,afghanistan,2007,PF,5,5,-1
8,afghanistan,2008,NF,5,6,-1
9,afghanistan,2009,NF,6,6,-1


In [85]:
final_data[(final_data['year'] == 2005) & (final_data['PR'] == 3) |
               (final_data['PR'] == 4) | (final_data['PR'] == 5)]

Unnamed: 0,country,year,PR,CL,Status,FH_category,FH_F,FH_NF,FH_PF,BMR_democracy,...,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim,is_erosion,match
0,afghanistan,2005,5,5,PF,1,0,0,1,0.0,...,2.0,3.0,1.0,2.0,2.0,3.0,2.0,-0.240,-1,False
1,afghanistan,2006,5,5,PF,1,0,0,1,0.0,...,2.0,,,,,,,,-1,False
2,afghanistan,2007,5,5,PF,1,0,0,1,0.0,...,2.0,,,,,,,,-1,False
3,afghanistan,2008,5,6,NF,0,0,1,0,0.0,...,2.0,,,,,,,,-1,False
12,afghanistan,2017,5,6,NF,0,0,1,0,,...,1.0,,,,,,,,-1,False
13,albania,2005,3,3,PF,1,0,0,1,1.0,...,2.0,4.0,1.0,2.0,2.0,3.0,2.0,0.036,-1,False
91,armenia,2005,5,4,PF,1,0,0,1,0.0,...,0.0,,,,,,,,-1,True
92,armenia,2006,5,4,PF,1,0,0,1,0.0,...,0.0,,,,,,,,-1,False
93,armenia,2007,5,4,PF,1,0,0,1,0.0,...,0.0,4.0,0.0,2.0,1.0,1.0,2.0,-0.804,-1,False
98,armenia,2012,5,4,PF,1,0,0,1,0.0,...,2.0,4.0,1.0,2.0,2.0,2.0,2.0,-0.399,-1,False


In [86]:
final_data.loc[final_data['country'] == 'turkey']

Unnamed: 0,country,year,PR,CL,Status,FH_category,FH_F,FH_NF,FH_PF,BMR_democracy,...,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim,is_erosion,match
2315,turkey,2005,3,3,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,True
2316,turkey,2006,3,3,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2317,turkey,2007,3,3,PF,1,0,0,1,1.0,...,3.0,4.0,3.0,2.0,4.0,4.0,2.0,1.203,-1,False
2318,turkey,2008,3,3,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2319,turkey,2009,3,3,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2320,turkey,2010,3,3,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2321,turkey,2011,3,3,PF,1,0,0,1,1.0,...,3.0,4.0,3.0,2.0,4.0,4.0,2.0,0.638,-1,False
2322,turkey,2012,3,4,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2323,turkey,2013,3,4,PF,1,0,0,1,1.0,...,3.0,,,,,,,,-1,False
2324,turkey,2014,3,4,PF,1,0,0,1,1.0,...,2.0,4.0,3.0,1.0,3.0,4.0,2.0,0.159,-1,False


In [28]:
final_data['FH_category'].loc[final_data['Status'] == 'F'] = 2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [29]:
final_data['FH_category'].loc[final_data['Status'] == 'PF'] = 1

In [30]:
final_data[['country', 'year', 'Status', 'FH_category']][:10]

Unnamed: 0,country,year,Status,FH_category
0,afghanistan,2005,PF,1
1,afghanistan,2006,PF,1
2,afghanistan,2007,PF,1
3,afghanistan,2008,NF,0
4,afghanistan,2009,NF,0
5,afghanistan,2010,NF,0
6,afghanistan,2011,NF,0
7,afghanistan,2012,NF,0
8,afghanistan,2013,NF,0
9,afghanistan,2014,NF,0


In [31]:
final_data['is_erosion'] = -1

In [65]:
#df['match'] = df.col1.eq(df.col1.shift())
final_data['match'] = ((final_data['Status'] == 'PF') & (final_data['Status'].shift() == 'F'))

In [70]:
final_data[final_data['match'] == 1][['country', 'year', 'Status', 'match']]

Unnamed: 0,country,year,Status,match
91,armenia,2005,PF,True
156,bahrain,2005,PF,True
338,burkina faso,2005,PF,True
416,central african republic,2005,PF,True
611,djibouti,2005,PF,True
647,dominican republic,2015,PF,True
728,ethiopia,2005,PF,True
780,gabon,2005,PF,True
858,guatemala,2005,PF,True
983,indonesia,2013,PF,True


Unnamed: 0,country,year,PR,CL,Status,FH_category,FH_F,FH_NF,FH_PF,BMR_democracy,...,v2elembaut_ord,v2elmulpar_ord,v2elvotbuy_ord,v2elfrcamp_ord,v2elfrfair_ord,v2elaccept_ord,v2elasmoff_ord,v2elintim,is_erosion,match
1763,poland,2005,1,1,F,2,1,0,0,1.0,...,4.0,4.0,4.0,2.0,4.0,4.0,2.0,1.785,-1,False
1764,poland,2006,1,1,F,2,1,0,0,1.0,...,4.0,,,,,,,,-1,False
1765,poland,2007,1,1,F,2,1,0,0,1.0,...,4.0,4.0,4.0,2.0,4.0,4.0,2.0,1.598,-1,False
1766,poland,2008,1,1,F,2,1,0,0,1.0,...,4.0,,,,,,,,-1,False
1767,poland,2009,1,1,F,2,1,0,0,1.0,...,4.0,,,,,,,,-1,False
1768,poland,2010,1,1,F,2,1,0,0,1.0,...,4.0,4.0,4.0,2.0,4.0,4.0,2.0,1.826,-1,False
1769,poland,2011,1,1,F,2,1,0,0,1.0,...,4.0,4.0,4.0,2.0,4.0,4.0,2.0,1.868,-1,False
1770,poland,2012,1,1,F,2,1,0,0,1.0,...,4.0,,,,,,,,-1,False
1771,poland,2013,1,1,F,2,1,0,0,1.0,...,4.0,,,,,,,,-1,False
1772,poland,2014,1,1,F,2,1,0,0,1.0,...,3.0,,,,,,,,-1,False


In [33]:
final_data[['country', 'year', 'Status']].head()

Unnamed: 0,country,year,Status
0,afghanistan,2005,PF
1,afghanistan,2006,PF
2,afghanistan,2007,PF
3,afghanistan,2008,NF
4,afghanistan,2009,NF


In [43]:
final_data_gb = final_data.groupby(['country'])

In [62]:
for name, group in final_data_gb:
    for i in range(group.shape[0]):
        if i == 0:
            final_data.iloc[i]['is_erosion'] = 0
        else:
            cur_row = final_data.iloc[i]
            prev_row = final_data.iloc[i-1]
            cur_row['is_erosion'] = cur_row['is_erosion'].where((cur_row['Status'] == 'PF') &
                                                                (prev_row['Status'] == 'F'), 0)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [47]:
#for name, group in final_data_gb:

In [31]:
#final_data['is_erosion'] = final_data['is_erosion'].where(final_data['Status'] == 'PF', 0)

In [32]:
#final_data[['country', 'year', 'Status', 'is_erosion']]

Unnamed: 0,country,year,Status,is_erosion
0,afghanistan,2008,NF,0
1,afghanistan,2009,NF,0
2,afghanistan,2010,NF,0
3,afghanistan,2011,NF,0
4,afghanistan,2012,NF,0
5,afghanistan,2013,NF,0
6,afghanistan,2014,NF,0
7,afghanistan,2015,NF,0
8,afghanistan,2016,NF,0
9,afghanistan,2017,NF,0


In [33]:
final_data.columns

Index(['country', 'year', 'PR', 'CL', 'Status', 'FH_category', 'FH_F', 'FH_NF',
       'FH_PF', 'BMR_democracy', 'BMR_democracy_trans',
       'BMR_democracy_breakdowns', 'BMR_democracy_duration', 'v2lgfunds_ord',
       'v2lgamend', 'v2exhoshog', 'v2exaphogp', 'v2exaphos', 'v2ddlexci',
       'v2ddlexrf', 'v2ddlexpl', 'v2lginello', 'v2lginelup',
       'v2exl_legitlead_ord', 'v2pepwrgeo_ord', 'v2clgencl_ord',
       'v2clpolcl_ord', 'v2peapssoc_ord', 'v2peapsgen_ord', 'v2peapsecon_ord',
       'v2peapspol_ord', 'v2peapsgeo_ord', 'v2pepwrses_ord', 'v2pepwrsoc_ord',
       'v2pepwrgen_ord', 'v2peedueq_ord', 'v2pehealth_ord', 'v2mecenefm_ord',
       'v2mecenefi_ord', 'v2mecrit_ord', 'v2meslfcen_ord', 'v2mebias_ord',
       'v2mecorrpt_ord', 'v2cseeorgs_ord', 'v2csreprss_ord', 'v2cltrnslw_ord',
       'v2clrspct_ord', 'v2cldiscm_ord', 'v2cldiscw_ord', 'v2jureform_ord',
       'v2jupoatck_ord', 'v2jupack_ord', 'v2juhcind_ord', 'v2juncind_ord',
       'v2jureview_ord', 'v2lgotovst_ord', 'v

In [34]:
final_data.shape

(1946, 76)

In [35]:
final_data.to_csv('./democracy.csv', index=False)