In [37]:
import pandas as pd
import numpy as np
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
from copy import deepcopy
from traceback import format_exc
# from pprint import pprint
import the_networks_of_war_python_functions

In [38]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [39]:
csv_directory = '/Users/charlieyaris/Personal/data_sources/the_networks_of_war/csvs/'
pickle_directory = '/Users/charlieyaris/Personal/data_sources/the_networks_of_war/pickles/'

In [4]:
## other documentation:
## Entities.pdf
## MII_v4.0_Codebook.pdf
## Version 4 release notes.pdf

In [5]:
c_code_df = pd.read_csv(csv_directory + 'COW country codes.csv', encoding='latin-1')

query_text = """

select
    ccode as c_code,
    statenme as state_name,
    stateabb as state_name_abbreviation
from c_code_df
group by 1, 2, 3

"""

c_code_df = deepcopy(pysqldf(query_text))

In [6]:
c_code_df[c_code_df['c_code']==200]

Unnamed: 0,c_code,state_name,state_name_abbreviation
35,200,United Kingdom,UKG


# Descriptive Statistics for Each Country by Year
### Note: Applies to states/countries only.
### This will be joined to the participants of each war

### First, defining the participants by year to exclude any countries/years that will not be needed later on.

In [49]:
## data source documentation: globalterrorismdb_0221dist.pdf
terr_df = pd.read_csv(csv_directory + 'globalterrorismdb_0221dist.csv')[['iyear', 'country_txt', 'nkill', 'nwound', 'nhostkid']]

## renaming country names to the best-matched correlates of war c_code.
country_renaming_dic = {
    
    'Antigua and Barbuda': 'Antigua & Barbuda',
    'Bosnia-Herzegovina': 'Bosnia and Herzegovina',
    'East Germany (GDR)': 'German Democratic Republic',
    'North Yemen': 'Yemen Arab Republic',
    'People\'s Republic of the Congo': 'Congo',
    'Republic of the Congo': 'Congo',
    'Rhodesia': 'Zimbabwe',
    'Serbia-Montenegro': 'Yugoslavia',
    'Slovak Republic': 'Slovakia',
    'South Vietnam': 'Republic of Vietnam',
    'South Yemen': 'Yemen People\'s Republic',
    'Soviet Union': 'Russia',
    'United States': 'United States of America',
    'West Germany (FRG)': 'German Federal Republic',
    'Zaire': 'Democratic Republic of the Congo',
    
    'Falkland Islands': 'United Kingdom',
    'French Guiana': 'France',
    'French Polynesia': 'France',
    'Guadeloupe': 'France',
    'Hong Kong': 'China',
    'Macau': 'China',
    'Martinique': 'France',
    'New Caledonia': 'France',
    'Wallis and Futuna': 'France',
    'West Bank and Gaza Strip': 'Israel',
    'Western Sahara': 'Morocco'
}

## Keeping Hong Kong as United Kingdom if before 1997.
terr_df.loc[(terr_df['country_txt']=='Hong Kong') & (terr_df['iyear']<=1997), 'country_txt'] = 'United Kingdom'
terr_df.loc[(terr_df['country_txt']=='Hong Kong'), 'country_txt'] = 'China'

for original_country_name in country_renaming_dic.keys():
    terr_df.loc[terr_df['country_txt']==original_country_name, 'country_txt'] = country_renaming_dic[original_country_name]

query_text = """
    
select
    cc.c_code,
    a.iyear as year,
    sum(max(a.nkill, 0)) as terrorism_deaths
from terr_df a
left join c_code_df cc on a.country_txt = cc.state_name
group by 1, 2

"""

terr_df = deepcopy(pysqldf(query_text))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [8]:
## data source documentation: Dyadic MID Codebook V4.0.pdf
## adjusting dyadic mid data to become one country by year
mid_df = pd.read_csv(csv_directory + 'dyadic_mid_4.02.csv')[['year', 'disno', 'dyindex', 'statea', 'stateb', 'rolea', 'roleb']]

query_text = """

with

mid_union_table as (

    select
        year,
        statea as c_code,
        disno,
        dyindex,
        rolea as role
    from mid_df
    group by 1, 2, 3, 4, 5
    union
    select
        year,
        stateb as c_code,
        disno,
        dyindex,
        roleb as role
    from mid_df
    group by 1, 2, 3, 4, 5)

select
    year,
    c_code,
    count(distinct dyindex) as mid_dyads,
    count(distinct case when role = 1 then dyindex else null end) as mid_dyads_initiated,
    count(distinct case when role = 3 then dyindex else null end) as mid_dyads_targeted,
    count(distinct case when role in (2, 4) then dyindex else null end) as mid_dyads_joined
from mid_union_table
group by 1, 2

"""

mid_df = deepcopy(pysqldf(query_text))

In [9]:
## data source documentation: CoW_ally_data_change_history.pdf
## data source documentation: Version 4 release notes.pdf
## only counting alliances that are considered defense, neutrality, nonaggression or entente.
## basically excluding alliances that do not have the type cataloged.
## adjusting dyadic alliance data to become one country by year
alliance_df = pd.read_csv(csv_directory + 'alliance_v4.1_by_directed_yearly.csv')[['year', 'ccode1', 'ccode2', 'defense', 'neutrality', 'entente', 'nonaggression']]

query_text = """

with

alliance_union_table as (

    select
        year,
        ccode1 as c_code_a,
        ccode2 as c_code_b
    from alliance_df
    where
        (defense + neutrality + nonaggression + entente) > 0
    group by 1, 2, 3
    union
    select
        year,
        ccode2 as c_code_a,
        ccode1 as c_code_b
    from alliance_df
    where
        (defense + neutrality + nonaggression + entente) > 0
    group by 1, 2, 3)

select
    year,
    c_code_a as c_code,
    count(distinct c_code_b) as allied_countries
from alliance_union_table
group by 1, 2

"""

alliance_df = deepcopy(pysqldf(query_text))

In [10]:
## data source documentaion: COW Trade Data Set Codebook.pdf
## adjusting dyadic trade data to become one country by year
trade_df1 = pd.read_csv(csv_directory + 'Dyadic_COW_4.0.csv')[['year', 'ccode1', 'ccode2', 'flow1', 'flow2']]

query_text = """

with
    
trade_union_table as (

    select
        year,
        ccode1 as c_code_a,
        ccode2 as c_code_b,
        max(coalesce(flow1, 0), 0) as money_flow_in,
        max(coalesce(flow2, 0), 0) as money_flow_out
    from trade_df1
    where
        flow1 > 0
        or flow2 > 0
    group by 1, 2, 3, 4, 5
    union
    select
        year,
        ccode2 as c_code_a,
        ccode1 as c_code_b,
        max(coalesce(flow2, 0), 0) as money_flow_in,
        max(coalesce(flow1, 0), 0) as money_flow_out
    from trade_df1
    where
        flow1 > 0
        or flow2 > 0
    group by 1, 2, 3, 4, 5)

select
    year,
    c_code_a as c_code,
    sum(money_flow_in) as money_flow_in,
    sum(money_flow_out) as money_flow_out,
    count(distinct c_code_b) as trade_countries
from trade_union_table
group by 1, 2

"""

#     b.c_codes_in_year,
#     rank() over(partition by a.year order by sum(a.money_flow_in) desc) as rank_money_flow_in,
#     rank() over(partition by a.year order by sum(a.money_flow_out) desc) as rank_money_flow_out,
#     rank() over(partition by a.year order by count(distinct a.c_code_b) desc) as rank_trade_countries

trade_df1 = deepcopy(pysqldf(query_text))

In [11]:
## data source documentaion: COW Trade Data Set Codebook.pdf
## adjusting dyadic trade data to become one country by year
trade_df2 = pd.read_csv(csv_directory + 'National_COW_4.0.csv', encoding='latin-1')[['year', 'ccode', 'imports', 'exports']]

query_text = """

select
    year,
    ccode as c_code,
    sum(max(coalesce(imports, 0), 0)) as imports,
    sum(max(coalesce(exports, 0), 0)) as exports
from trade_df2
where
    imports > 0
    or exports > 0
group by 1, 2

"""

trade_df2 = deepcopy(pysqldf(query_text))

In [12]:
## data source documentation: NMC_Documentation_v6_0_final_v2.pdf
nmc_df = pd.read_csv(csv_directory + 'NMC-60-wsupplementary.csv', encoding='latin-1')[['year', 'ccode', 'milex', 'milper', 'irst', 'pec', 'tpop', 'upop', 'upopgrowth', 'cinc']]

query_text = """

select
    year,
    ccode as c_code,
    milex as military_expenditure,
    milper as military_personnel,
    irst as iron_steel_production,
    pec as energy_consumption,
    tpop as population,
    upop as urban_population,
    upopgrowth as urban_population_growth_rate,
    cinc as cinc_score
from nmc_df
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

"""

nmc_df = deepcopy(pysqldf(query_text))

In [13]:
## data source documentation: tcmanual.pdf
tc_df = pd.read_csv(csv_directory + 'tc2018.csv', encoding='latin-1')[['year', 'gainer', 'loser', 'area', 'pop']]

query_text = """

with
    
gainer_loser_union_table as (

    select
        year,
        gainer as c_code,
        sum(max(area, 0)) as land_mass_exchange_gain,
        sum(max(pop, 0)) as population_exchange_gain,
        sum(max(area, 0)) * -1 as land_mass_exchange_loss,
        sum(max(pop, 0)) * -1 as population_exchange_loss
    from tc_df
    group by 1, 2
    union
    select
        year,
        loser as c_code,
        sum(max(area, 0)) * -1 as land_mass_exchange_gain,
        sum(max(pop, 0)) * -1 as population_exchange_gain,
        sum(max(area, 0)) as land_mass_exchange_loss,
        sum(max(pop, 0)) as population_exchange_loss
    from tc_df
    group by 1, 2)
    
select
    year,
    c_code,
    sum(land_mass_exchange_gain) as land_mass_exchange_gain,
    sum(land_mass_exchange_loss) as land_mass_exchange_loss,
    sum(land_mass_exchange_gain) as population_exchange_gain,
    sum(land_mass_exchange_loss) as population_exchange_loss
from gainer_loser_union_table
group by 1, 2

"""

tc_df = deepcopy(pysqldf(query_text))

In [14]:
## data source: https://www.systemicpeace.org/inscrdata.html
## data source documentation: FDPCodebook2008.pdf
## Forcibly Displaced Populations, 1946-2018
dp_df = pd.read_csv(csv_directory + 'FDP2008a.csv', encoding='latin-1')[['year', 'ccode', 'source', 'host', 'idp']]

query_text = """

select
    year,
    ccode as c_code,
    sum(coalesce(source, 0)) as refugees_originated,
    sum(coalesce(host, 0)) as refugees_hosted,
    sum(coalesce(idp, 0)) as internally_displaced_persons
from dp_df
group by 1, 2

"""

dp_df = deepcopy(pysqldf(query_text))

In [15]:
years_df = pd.DataFrame(np.arange(1500, 2100), columns=['year'])
## including start and end years since that's how participant data will be joined in step 3
## getting a dataframe for all years between two participant's start and end years.
part_df = pd.read_pickle(pickle_directory + 'initial_part_df.pkl')
part_df['c_code'] = part_df['c_code'].astype(int)
part_df.rename({
    'battle_deaths': 'battle_deaths_z',
    'days_not_at_war': 'days_not_at_war_z',
#                 'days_at_war': 'days_at_war_z',
#                 'deaths_both_sides': 'deaths_both_sides_z',
    'peak_battle_forces': 'peak_battle_forces_z',
    'peak_forces_available': 'peak_forces_available_z'}, axis=1, inplace=True)

query_text = """

select
    a.*,
    y.year
from part_df a
inner join years_df y on y.year between a.start_year and a.end_year

"""

part_df = deepcopy(pysqldf(query_text))

In [16]:
## taking only participants in more than 1 war in a given year
query_text = """

select
    year,
    c_code,
    count(distinct war_num) - 1 as concurrent_wars
from part_df
where
    c_code > 0
group by 1, 2
having
    concurrent_wars > 0

"""

concurrent_wars_df = deepcopy(pysqldf(query_text))

In [17]:
part_desc_df = pd.DataFrame()

for i, iteration in enumerate(list(np.arange(1, 4))):
    if i==0:
        part_desc_join_df = part_df[part_df['start_year']==part_df['year']].reset_index(drop=True)
    elif i==1:
        part_desc_join_df = part_df[part_df['end_year']==part_df['year']].reset_index(drop=True)
    else:
        part_desc_join_df = part_df

    query_text = """

    select
        a.war_num,
        a.war_name,
        a.c_code,
        round(avg(td.terrorism_deaths), 2) as terrorism_deaths,
        round(avg(md.mid_dyads), 2) as mid_dyads,
        round(avg(md.mid_dyads_initiated), 2) as mid_dyads_initiated,
        round(avg(md.mid_dyads_targeted), 2) as mid_dyads_targeted,
        round(avg(md.mid_dyads_joined), 2) as mid_dyads_joined,
        round(avg(al.allied_countries), 2) as allied_countries,
        round(avg(t1.trade_countries), 2) as trade_countries,
        round(avg(t1.money_flow_in), 2) as money_flow_in,
        round(avg(t1.money_flow_out), 2) as money_flow_out,
        round(avg(t2.imports), 2) as imports,
        round(avg(t2.exports), 2) as exports,
        round(avg(nm.military_expenditure), 2) as military_expenditure,
        round(avg(nm.military_personnel), 2) as military_personnel,
        round(avg(nm.iron_steel_production), 2) as iron_steel_production,
        round(avg(nm.energy_consumption), 2) as energy_consumption,
        round(avg(nm.population), 2) as population,
        round(avg(nm.urban_population), 2) as urban_population,
        avg(nm.urban_population_growth_rate) as urban_population_growth_rate,
        avg(nm.cinc_score) as cinc_score,
        round(max(avg(tc.land_mass_exchange_gain), 0), 2) as land_mass_exchange_gain,
        round(max(avg(tc.population_exchange_gain), 0), 2) as population_exchange_gain,
        round(max(avg(tc.land_mass_exchange_loss), 0), 2) as land_mass_exchange_loss,
        round(max(avg(tc.population_exchange_loss), 0), 2) as population_exchange_loss,
        round(avg(dp.refugees_originated), 2) as refugees_originated,
        round(avg(dp.refugees_hosted), 2) as refugees_hosted, 
        round(avg(dp.internally_displaced_persons), 2) as internally_displaced_persons,
        round(avg(cc.concurrent_wars), 2) as concurrent_wars
    from part_desc_join_df a
    left join terr_df td on a.c_code = td.c_code and a.year = td.year
    left join mid_df md on a.c_code = md.c_code and a.year = md.year
    left join alliance_df al on a.c_code = al.c_code and a.year = al.year
    left join trade_df1 t1 on a.c_code = t1.c_code and a.year = t1.year
    left join trade_df2 t2 on a.c_code = t2.c_code and a.year = t2.year
    left join nmc_df nm on a.c_code = nm.c_code and a.year = nm.year
    left join tc_df tc on a.c_code = tc.c_code and a.year = tc.year
    left join dp_df dp on a.c_code = dp.c_code and a.year = dp.year
    left join concurrent_wars_df cc on a.c_code = cc.c_code and a.year = cc.year
    where
        a.c_code > 0
    group by 1, 2, 3

    """

    part_desc_temp_df = deepcopy(pysqldf(query_text))
    
    for column in list(part_desc_temp_df.columns):
        if column not in list(part_df.columns):
            if i==0:
                part_desc_temp_df.rename({column: column + '_x'}, axis = 1, inplace = True)
            elif i==1:
                part_desc_temp_df.rename({column: column + '_y'}, axis = 1, inplace = True)
            else:
                part_desc_temp_df.rename({column: column + '_z'}, axis = 1, inplace = True)
        
    if i==0:
        part_desc_df = part_desc_temp_df
    else:
        part_desc_df = deepcopy(pd.merge(part_desc_df, part_desc_temp_df, how='outer', on=['war_num',
                                                                                           'war_name',
                                                                                           'c_code']))

In [18]:
part_df.drop('year', axis = 1, inplace = True)
part_df.drop_duplicates(subset=list(part_df.columns), keep='first', inplace=True)
part_desc_df = deepcopy(pd.merge(part_df, part_desc_df, how='outer', on=['war_num', 'war_name', 'c_code']))

print('Total Rows of Descriptive/Yearly Participant Data: {}'.format(format(len(part_desc_df), ',d')))
part_desc_df.to_pickle(pickle_directory + 'participant_descriptive_df.pkl')

Total Rows of Descriptive/Yearly Participant Data: 1,715


# Descriptive Statistics for Each Dyad by Year
### Note: Applies to states/countries only.
### This will be joined to the dyadic pairs for each war

### First, defining the dyads by year to exclude any countries/years that will not be needed later on.

In [19]:
dyad_df = pd.read_pickle(pickle_directory + 'initial_dyad_df.pkl')
initial_dyad_columns = deepcopy(list(dyad_df.columns))

print('Total Unique Dyadic Years in Initial Data: {}'.format(format(int(len(dyad_df)/2), ',d')))

Total Unique Dyadic Years in Initial Data: 5,844


### Correlates of War Descriptive Data

In [20]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
descriptive_columns = deepcopy(set(list(dyad_df.columns)))

## data source documentation: tcmanual.pdf
## lot's to use in this dataset so I'll start with the basics
data_source = csv_directory + 'tc2018.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(dyad_df, data_source, None, None, 'gainer', 'loser', 'year', 'territory_exchange'))

## data source documentation: Direct Contiguity Codebook.pdf
## contiguity dataframe for states of colonial dependencies
data_source = csv_directory + 'contcold.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'statelno', 'statehno', 'year', 'colonial_contiguity'))

## data source documentation: Direct Contiguity Codebook.pdf
data_source = csv_directory + 'contdird.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'state1no', 'state2no', 'year', 'contiguity'))

## data source documentation: CoW_ally_data_change_history.pdf
## data source documentation: Version 4 release notes.pdf
## only counting alliances that are considered defense, neutrality, nonaggression or entente.
## basically excluding alliances that do not have the type cataloged.
alliance_df = pd.read_csv(csv_directory + 'alliance_v4.1_by_directed_yearly.csv')
alliance_df = deepcopy(alliance_df[(alliance_df['defense']==1) | (alliance_df['neutrality']==1) | (alliance_df['nonaggression']==1) | (alliance_df['entente']==1)].reset_index(drop=True))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, None, alliance_df, None, 'ccode1', 'ccode2', 'year', 'alliance'))

data_source = csv_directory + 'DCAD-v1.0-dyadic.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'defense_cooperation_agreements'))

## data source documentation: IGO Codebook_v3_short.pdf
## must have at least one joined by both states in order to be included
data_source = csv_directory + 'dyadic_formatv3.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'inter_governmental_organizations'))

## data source documentation: Diplomatic_Exchange_2006v1_codebook.pdf
## this one needs to be filled since its only 5 years
data_source = csv_directory + 'Diplomatic_Exchange_2006v1.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'diplomatic_exchange'))

## data source documentation: COW Trade Data Set Codebook.pdf
## only including countries with trade data present.
trade_df = pd.read_csv(csv_directory + 'Dyadic_COW_4.0.csv')
trade_df = deepcopy(trade_df[(trade_df['flow1']>0) | (trade_df['flow2']>0)].reset_index(drop=True))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, None, trade_df, None, 'ccode1', 'ccode2', 'year', 'trade_relations'))

the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, descriptive_columns, None)

Counting Total Dyadic Year Combinations by Descriptive Field

inter_governmental_organizations 1439
                      contiguity  683
                 trade_relations  594
  defense_cooperation_agreements  270
             diplomatic_exchange  246
             colonial_contiguity  172
                        alliance  142
              territory_exchange  115


### Non-Correlates of War Descriptive Data

### Setting up processing of 'ddrevisited_data_v1'

In [21]:
# data source documentation: ddrevisited codebook v1.pdf
dd_df1 = pd.read_csv(csv_directory + 'ddrevisited_data_v1.csv', encoding='latin-1')
dd_df2 = deepcopy(dd_df1)
## including columns that need to be included later on (that don't need name changes)
dd_df_renaming = {

    'cowcode': 'c_code',
    'emil': 'e_military_leader',
    'nmil': 'n_military_leader',
    'royal': 'royal_leader',
    'comm': 'communist_leader',
    'democracy': 'democratic_regime',
    'collect': 'collective_leadership',
    'regime': 'regime_type',
    'incumb': 'incumbent_type',
    'exselec': 'election_type',
    'legselec': 'legislature_type_1',
    'closed': 'legislature_type_2',
    'lparty': 'legislature_party_status',
    'dejure': 'party_legal_status',
    'defacto': 'party_existance_1',
    'defacto2': 'party_existance_2',
    'edeath': 'leader_died',
    'flageh': 'new_leader',
    'ttd': 'transition_to_democracy',
    'tta': 'transition_to_dictatorship',
    'year': 'year'
}
dd_df1.rename(dd_df_renaming, axis=1, inplace=True)
dd_df1 = deepcopy(dd_df1[list(dd_df_renaming.values())])
dd_df1 = deepcopy(dd_df1[dd_df1['c_code'].isnull()==False].reset_index(drop=True))

del dd_df_renaming['cowcode']
dd_df_renaming['cowcode2'] = 'c_code'
dd_df2.rename(dd_df_renaming, axis=1, inplace=True)
dd_df2 = deepcopy(dd_df2[list(dd_df_renaming.values())])
dd_df2 = deepcopy(dd_df2[dd_df2['c_code'].isnull()==False].reset_index(drop=True))

dd_df = deepcopy(pd.concat([dd_df1, dd_df2], sort=True, ignore_index=True))
dd_df = deepcopy(the_networks_of_war_python_functions.union_opposite_columns(dd_df))

In [22]:
dd_df1 = deepcopy(dd_df)
dd_df2 = deepcopy(dd_df)

for column in dd_df.columns:
    if column!='year':
        dd_df1.rename({column: column + '_a'}, axis=1, inplace=True)
        dd_df2.rename({column: column + '_b'}, axis=1, inplace=True)

dd_df = deepcopy(pd.merge(dd_df1, dd_df2, how='outer', on=['year']))
## ensuring that c_code_a and c_code_b do not match, and cowcode and cowcode2 (overlapping states) are not joined to each other.
dd_df = deepcopy(dd_df[(dd_df['c_code_a'].isnull()==False) & (dd_df['c_code_b'].isnull()==False) & (dd_df['c_code_a']!=dd_df['c_code_b'])].reset_index(drop=True))
dd_df = deepcopy(pd.merge(dd_df, dyad_df, how='inner', on=['year', 'c_code_a', 'c_code_b']))
dd_df.drop_duplicates(subset=list(dd_df.columns), keep='first', inplace=True)

In [23]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
descriptive_columns = deepcopy(set(list(descriptive_df_2.columns)))

conditional_statement = (dd_df['e_military_leader_a']==dd_df['e_military_leader_b']) & (dd_df['n_military_leader_a']==dd_df['n_military_leader_b']) & (dd_df['communist_leader_a']==dd_df['communist_leader_b']) & (dd_df['royal_leader_a']==dd_df['royal_leader_b']) & (dd_df['democratic_regime_a']==dd_df['democratic_regime_b'])
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'same_leader_type'))

conditional_statement = ((dd_df['e_military_leader_a']==1) & (dd_df['e_military_leader_b']==1)) | ((dd_df['n_military_leader_a']==1) & (dd_df['n_military_leader_b']==1))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'military_leaders'))

conditional_statement = (dd_df['communist_leader_a']==1) & (dd_df['communist_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'communist_leaders'))

conditional_statement = (dd_df['royal_leader_a']==1) & (dd_df['royal_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'royal_leaders'))

conditional_statement = (dd_df['incumbent_type_a']==1) & (dd_df['incumbent_type_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'democratic_incumbent'))

conditional_statement = (dd_df['incumbent_type_a']==2) & (dd_df['incumbent_type_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'unconstitutional_incumbent'))

conditional_statement = ((dd_df['incumbent_type_a']==1) | (dd_df['democratic_regime_a']==1) | (dd_df['regime_type_a']==0) | (dd_df['regime_type_a']==1) | (dd_df['regime_type_a']==2)) & ((dd_df['incumbent_type_b']==1) |(dd_df['democratic_regime_b']==1) | (dd_df['regime_type_b']==0) | (dd_df['regime_type_b']==1) | (dd_df['regime_type_b']==2))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'democratic_regimes'))

conditional_statement = ((dd_df['regime_type_a']==3) | (dd_df['regime_type_a']==4) | (dd_df['regime_type_a']==5)) & ((dd_df['regime_type_b']==3) | (dd_df['regime_type_b']==4) | (dd_df['regime_type_b']==5))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'dictatorships'))

conditional_statement = (dd_df['collective_leadership_a']==1) & (dd_df['collective_leadership_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'collective_leaderships'))

conditional_statement = (dd_df['election_type_a']==1) & (dd_df['election_type_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'direct_election'))

conditional_statement = (dd_df['election_type_a']==2) & (dd_df['election_type_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'indirect_election'))

conditional_statement = (dd_df['election_type_a']==3) & (dd_df['election_type_b']==3)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'non_elected_leaders'))

conditional_statement = (dd_df['legislature_type_1_a']==0) & (dd_df['legislature_type_1_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_legislature'))

conditional_statement = (dd_df['legislature_type_1_a']==1) & (dd_df['legislature_type_1_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'non_elective_legislature'))

conditional_statement = (dd_df['legislature_type_1_a']==2) & (dd_df['legislature_type_1_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'elective_legislature'))

conditional_statement = (dd_df['legislature_party_status_a']==0) & (dd_df['legislature_party_status_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_partisan_legislature_legal'))

conditional_statement = (dd_df['legislature_party_status_a']==1) & (dd_df['legislature_party_status_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_non_regime_legislature_parties_legal'))

conditional_statement = (dd_df['legislature_party_status_a']==2) & (dd_df['legislature_party_status_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'multi_party_legislature_legal'))

conditional_statement = (dd_df['party_legal_status_a']==0) & (dd_df['party_legal_status_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'all_parties_illegal'))

conditional_statement = (dd_df['party_legal_status_a']==1) & (dd_df['party_legal_status_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'single_party_state_exists'))

conditional_statement = (dd_df['party_legal_status_a']==2) & (dd_df['party_legal_status_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'multi_party_state_exists'))

conditional_statement = ((dd_df['party_existance_1_a']==0) & (dd_df['party_existance_1_b']==0)) & ((dd_df['party_existance_2_a']==0) & (dd_df['party_existance_2_b']==0))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_parties_exist'))

conditional_statement = (dd_df['party_existance_1_a']==1) & (dd_df['party_existance_1_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'one_party_exists'))

conditional_statement = (dd_df['party_existance_2_a']==1) & (dd_df['party_existance_2_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_non_regime_parties_exist'))

conditional_statement = (dd_df['leader_died_a']==0) & (dd_df['leader_died_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'leader_died'))

conditional_statement = (dd_df['new_leader_a']==1) & (dd_df['new_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'new_leader'))

conditional_statement = (dd_df['transition_to_democracy_a']==1) & (dd_df['transition_to_democracy_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'transition_to_democracy'))

conditional_statement = (dd_df['transition_to_dictatorship_a']==1) & (dd_df['transition_to_dictatorship_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'transition_to_dictatorship'))

the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, descriptive_columns, None)

Counting Total Dyadic Year Combinations by Descriptive Field

                   elective_legislature 451
               multi_party_state_exists 300
                          dictatorships 276
                    non_elected_leaders 130
                       same_leader_type 128
          multi_party_legislature_legal  87
                      indirect_election  74
            no_non_regime_parties_exist  65
                       military_leaders  55
          no_partisan_legislature_legal  47
                        direct_election  34
                     democratic_regimes  31
no_non_regime_legislature_parties_legal  26
                       one_party_exists  22
                         no_legislature  17
                      communist_leaders  15
                    all_parties_illegal  14
                             new_leader  13
               non_elective_legislature  12
              single_party_state_exists   8
                       no_parties_exist   2
              

### Other Non-COW Data Sources

In [24]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
descriptive_columns = deepcopy(set(list(descriptive_df_2.columns)))

## data source documentation: ATOPcodebookV4.pdf
## Alliance Treaty Obligations and Provisions (ATOP)
data_source = csv_directory + 'atop4_01ddyr.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'stateA', 'stateB', 'year', 'atop'))

## data source documentation: mtcode.pdf
## The ICOW multilateral treaties of pacific settlement (MTOPS)
data_source = csv_directory + 'mtopsd150.csv'
mtops_dy_df = pd.read_csv(data_source, encoding='utf8')
mtops_dy_df['total'] = mtops_dy_df['pacsettg'] + mtops_dy_df['pacsettr'] + mtops_dy_df['pacsett'] + mtops_dy_df['tergen'] + mtops_dy_df['terviol'] + mtops_dy_df['tertot']
conditional_statement = (mtops_dy_df['total']>0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', mtops_dy_df, conditional_statement, 'state1', 'state2', 'year', 'mtops'))

the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, descriptive_columns, None)

Counting Total Dyadic Year Combinations by Descriptive Field

mtops 822
 atop 184


In [25]:
non_groupby_columns = ['battle_deaths_a', 'battle_deaths_b', 'battle_deaths_est_a', 'battle_deaths_est_b']

start_dy_df = deepcopy(descriptive_df_2[descriptive_df_2['start_year']==descriptive_df_2['year']].reset_index(drop=True))
end_dy_df = deepcopy(descriptive_df_2[descriptive_df_2['end_year']==descriptive_df_2['year']].reset_index(drop=True))

start_dy_df.drop(non_groupby_columns + ['year'], axis=1, inplace=True)
# keeping the non_groupby_columns in end_dy_dfsince these will be joined and not aggregated.
end_dy_df.drop('year', axis=1, inplace=True)
descriptive_df_2.drop(non_groupby_columns + ['year'], axis=1, inplace=True)

initial_dyad_columns.remove('year')
for column in non_groupby_columns:
    initial_dyad_columns.remove(column)

aggregations = {}

for column in descriptive_df_2.columns:
    if column not in initial_dyad_columns and column not in non_groupby_columns:
        start_dy_df.rename({column: column + '_x'}, axis=1, inplace=True)
        end_dy_df.rename({column: column + '_y'}, axis=1, inplace=True)
        descriptive_df_2.rename({column: column + '_z'}, axis=1, inplace=True)
        aggregations[column + '_z'] = 'max'

descriptive_df_2 = deepcopy(descriptive_df_2.groupby(initial_dyad_columns).agg(aggregations).reset_index())

start_end_df = deepcopy(pd.merge(start_dy_df, end_dy_df, how='outer', on=initial_dyad_columns))
descriptive_df_2 = deepcopy(pd.merge(start_end_df, descriptive_df_2, how='outer', on=initial_dyad_columns))

### Removing any descriptive dyadic field with 0 records returned.

In [26]:
descriptive_columns = list(descriptive_df_2.drop(initial_dyad_columns, axis=1).columns)

for column in descriptive_columns:
    if len(descriptive_df_2[descriptive_df_2[column]>0])==0:
        print('Fields Removed: {}'.format(column))
        descriptive_df_2.drop(column, axis=1, inplace=True)

Fields Removed: royal_leaders_x
Fields Removed: democratic_incumbent_x
Fields Removed: unconstitutional_incumbent_x
Fields Removed: collective_leaderships_x
Fields Removed: leader_died_x
Fields Removed: transition_to_democracy_x
Fields Removed: transition_to_dictatorship_x
Fields Removed: royal_leaders_y
Fields Removed: democratic_incumbent_y
Fields Removed: unconstitutional_incumbent_y
Fields Removed: collective_leaderships_y
Fields Removed: leader_died_y
Fields Removed: transition_to_democracy_y
Fields Removed: transition_to_dictatorship_y
Fields Removed: royal_leaders_z
Fields Removed: democratic_incumbent_z
Fields Removed: unconstitutional_incumbent_z
Fields Removed: collective_leaderships_z
Fields Removed: leader_died_z
Fields Removed: transition_to_democracy_z
Fields Removed: transition_to_dictatorship_z


In [27]:
print('Total Dyadic Combinations of Descriptive Data: {}'.format(format(int(len(descriptive_df_2)/2), ',d')))
descriptive_df_2.to_pickle(pickle_directory + 'dyadic_descriptive_df.pkl')

Total Dyadic Combinations of Descriptive Data: 3,052


In [28]:
# ## not sure that this one is worth the trouble because ccodes are not included
# part_df_4 = pd.read_csv(csv_directory + 'co-emissions-per-capita.csv', encoding='latin-1')

In [29]:
# pd.read_csv(csv_directory + 'tc2018.csv', encoding='utf8')


# Process of Territorial Change: The process of territorial change includes six possible procedures:
# 1. Conquest
# 2. Annexation
# 3. Cession
# 4. Secession
# 5. Unification
# 6. Mandated territory

# Portion of the Territory Exchanged: "Portion of unit exchanged" indicates whether part or all of
# the unit being transferred was involved in the exchange. A "0" means that part of the unit was
# transferred, a "1" indicates that the entire unit was involved in the exchange, and a “-9” means
# that this could not be determined.

In [30]:
# data_source = csv_directory + 'ucdp-peace-agreements-191.csv'
# dy_df_19 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Conflict Termination Dataset version 2-2015
# data_source = csv_directory + 'ucdp-term-dyadic-2015.csv'
# dy_df_20 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Battle-Related Deaths Dataset version 20.1
# data_source = csv_directory + 'UCDP Battle-Related Deaths Dataset version 20.1'
# dy_df_21 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Actor Dataset version 20.1
# data_source = csv_directory + 'ucdp-actor-201.csv'
# dy_df_22 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Non-state Conflict Issues and Actors Dataset
# data_source = csv_directory + 'UCDP_NS_IAD.csv'
# dy_df_23 = pd.read_csv(data_source, encoding='utf8')

# # UCDP External Support in Non-state Conflict Dataset
# data_source = csv_directory + 'UCDP External Support in Non-state Conflict Dataset v.1.0.csv'
# dy_df_24 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Managing Intrastate Low-intensity Conflict (MILC) dataset
# data_source = csv_directory + 'milc-10.csv'
# dy_df_25 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Managing Intrastate Conflict (MIC) dataset
# ## more than 2 ids per conflict
# data_source = csv_directory + 'micFINAL.csv'
# dy_df_26 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(initial_dyad_df, data_source, None, 'contry_id', 'thirdid1', 'year', 'atop'))

# # 'gwno'
# # 'dyad_id'
# ## only including failed peace agreements
# dy_df_19 = deepcopy(dy_df_19[dy_df_19['ended']==True])[['gwno', 'dyad_id', 'dyad_name', 'actor_id', 'actor_name', 'year', 'duration', 'c_duration']]


In [31]:
# for year in np.arange(1800, 2020):
#     for row in dy_df_7['year']:
#         if len(dy_df_7[dy_df_7['year']==year])== 0:
#             temp_dyad_df = deepcopy(dy_df_7[dy_df_7['year']==year].reset_index())
#             for i, dyad in enumerate(temp_dyad_df['year']):
#                 dyad_df_length = deepcopy(len(dy_df_7))
#                 dy_df_7.loc[dyad_df_length, 'year'] = year
#                 dy_df_7.loc[dyad_df_length, 'c_code_a'] = temp_dyad_df.loc[i, 'c_code_a']
#                 dy_df_7.loc[dyad_df_length, 'c_code_b'] = temp_dyad_df.loc[i, 'c_code_b']
#         else:
#             current_year = year
            
# dy_df_7['diplomatic_exchange'] = 1
# print(len(dy_df_7))