In [1]:
import pandas as pd

# Dispute-level data and corresponding country codes into data frames
disputes = pd.read_csv('data/MIDB_4.01.csv')
country_codes = pd.read_csv('data/COW country codes.csv')
country_codes = country_codes.drop_duplicates()

# Get rid of columns we don't care about
disputes = disputes.loc[:, ['StAbb', 'ccode', 'StYear', 'EndYear', 'Orig', 'Fatality', 'FataPre', 'HiAct']]

# Only include hostile acts that involved at least a border violation
severity_filter = disputes['HiAct'].map(lambda x: x >= 12)
disputes = disputes[severity_filter]

In [12]:
# Get state membership by year
states_by_year = pd.read_csv('data/system2011.csv')

In [13]:
# Get years covered
years = states_by_year['year'].unique()

# Make a dict to track state membership by year
membership = dict()
for year in years:
    membership[year] = set()
    
# Iterate over the dataset, creating a set for each year with state membership by CCode
for row in states_by_year.iterrows():
    membership[row[1]['year']].add(row[1]['ccode'])

In [14]:
# Need to do a dataframe with all countries that existed in a year, whether attacked or not and with potential 
# explanatory variables (10 year average of CINC, international memberships, etc.).

# Create a new dataframe with the dispute data but filter out countries who were aggressor and add the variable columns
non_aggressors_filter = disputes['Orig'].map(lambda x: x == 0)
non_aggressors = disputes[non_aggressors_filter]

# New dataframe to store all countries, including those with no disputes in a given year
total_possible_conflicts = disputes

# TODO only include non-aggressors since we are studying deterrence

# Add the countries that existed in each year but did not have a conflict
for year in range(1816, 2011):
    
    # Find the countries that existed AND had a conflict for given year
    filter_criteria = disputes['StYear'].map(lambda x: x == year)
    
    # Find countries that existed but did not have a conflict
    had_conflict = set(disputes[filter_criteria]['ccode'])
    no_conflict = membership[year] - had_conflict
    
    for country in no_conflict:
        # Add to original data frame (conflict-related columns should be blank)
        new_line = pd.DataFrame({'ccode': country, 'no_conflict': 1, 'StYear': year, 'EndYear': year}, index=[0])
        total_possible_conflicts = total_possible_conflicts.append(new_line, ignore_index=True)
    

In [15]:
# Add the variables investigating (NMC, etc.)
nmc = pd.read_csv('data/NMC_v4_0.csv')
total_possible_conflicts = pd.merge(left=total_possible_conflicts, right=nmc, left_on=['ccode', 'StYear'], 
                                    right_on=['ccode', 'year'])

# Fill out no_conflict for countries that had one (this is the variable we are testing).
total_possible_conflicts.no_conflict = total_possible_conflicts.no_conflict.fillna(value=0)

# Merge with country code names to get the full state name
total_possible_conflicts = pd.merge(left=total_possible_conflicts,right=country_codes, 
                                    left_on='ccode', right_on='CCode', how='inner')


In [98]:
"""
Remove right and left-censored data then get collection of durations since last conflict for fully-observed cycles.
"""

# Sort conflict data by country then year
total_possible_conflicts.sort_values(by=['CCode', 'StYear'], ascending=[True, True], inplace=True)
# Add column to track duration since last war by country
total_possible_conflicts['duration'] = pd.Series()
# Get list of all countries in dataset to track when they are no longer left-censored
censored_countries = pd.Series(True, index=states_by_year['ccode'].unique())
# Track duration of current periods
current_periods = conflict_last_iteration = pd.Series(0, index=states_by_year['ccode'].unique())
# Track results
durations_since_conflict = list()

# Get the all fully-observed durations since last conflict in data set. 
# Iterate by year since some years have multiple entries (if a state was involved in a multi-part dispute 
# during a single year).
for year in range(1816, 2011):
    year_filter = total_possible_conflicts['StYear'].map(lambda x: x == year)
    year_data = total_possible_conflicts[year_filter]
    for ccode in states_by_year['ccode'].unique():
        country_filter = year_data['ccode'].map(lambda x: x == ccode)
        country_data = year_data[country_filter]

        # Find instances of given country in given year starting a conflict
        conflicts_for_year = country_data.loc[(total_possible_conflicts['no_conflict'] == 0)]
        
        # Check if the country had a conflict during selection
        if(len(conflicts_for_year.index) > 1):
            # Reset period
            current_periods[ccode] = 0
            # Make sure country isn't censored
            censored_countries[ccode] = False
        else:
            # Track the duration since last conflict if not censored
            if censored_countries[ccode] == False:
                durations_since_conflict.append(current_periods[ccode])
                
        # Add duration since last conflict to the row but only if not censored
        if censored_countries[ccode] == False:
            total_possible_conflicts.loc[year_filter & country_filter] = current_periods[ccode]
        
        # Increment the period
        current_periods[ccode] += 1

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs


In [101]:
# Use durations from above to get media duration between conflict for fully-observed periods
median_duration = pd.Series(durations_since_conflict).median()
print(median_duration)

# Populate left-censored data with median duration

17.0
