In [1]:
import pandas as pd
import numpy as np
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
from copy import deepcopy
from traceback import format_exc
# from pprint import pprint
import the_networks_of_war_python_functions

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [3]:
csv_directory = '/Users/charlieyaris/Personal/data_sources/the_networks_of_war/csvs/'
pickle_directory = '/Users/charlieyaris/Personal/data_sources/the_networks_of_war/pickles/'

In [4]:
c_code_df = pd.read_csv(csv_directory + 'COW country codes.csv', encoding='latin-1')

query_text = """

select
    ccode as c_code,
    statenme as state_name,
    stateabb as state_name_abbreviation
from c_code_df
group by 1, 2, 3

"""

c_code_df = deepcopy(pysqldf(query_text))

# Descriptive Statistics for Each Country by Year
### Note: Applies to states/countries only.
### This will be joined to the participants of each war

### First, defining the participants by year to exclude any countries/years that will not be needed later on.

In [5]:
## adjusting dyadic alliance data to become one country by year
alliance_df = pd.read_csv(csv_directory + 'alliance_v4.1_by_dyad_yearly.csv')

query_text = """

with

alliance_union_table as (

    select
        year,
        ccode1 as c_code_a,
        ccode2 as c_code_b
    from alliance_df
    group by 1, 2, 3
    union
    select
        year,
        ccode2 as c_code_a,
        ccode1 as c_code_b
    from alliance_df
    group by 1, 2, 3)

select
    year,
    c_code_a as c_code,
    count(distinct c_code_b) as allied_countries
from alliance_union_table
group by 1, 2

"""

alliance_grouped_df = deepcopy(pysqldf(query_text))

In [6]:
## adjusting dyadic trade data to become one country by year
trade_df1 = pd.read_csv(csv_directory + 'Dyadic_COW_4.0.csv')

query_text = """

with
    
trade_union_table as (

    select
        year,
        ccode1 as c_code_a,
        ccode2 as c_code_b,
        flow1 as money_flow_in,
        flow2 as money_flow_out
    from trade_df1
    where
        coalesce(flow1, -9) not in (-8, 9)
        or coalesce(flow2, -9) not in (-8, 9)
    group by 1, 2, 3
    union
    select
        year,
        ccode2 as c_code_a,
        ccode1 as c_code_b,
        flow2 as money_flow_in,
        flow1 as money_flow_out
    from trade_df1
    where
        coalesce(flow1, -9) not in (-8, 9)
        or coalesce(flow2, -9) not in (-8, 9)
    group by 1, 2, 3)

select
    year,
    c_code_a as c_code,
    sum(money_flow_in) as money_flow_in,
    sum(money_flow_out) as money_flow_out,
    count(distinct c_code_b) as trade_countries
from trade_union_table
group by 1, 2

"""

trade_grouped_df1 = deepcopy(pysqldf(query_text))

In [7]:
## adjusting dyadic trade data to become one country by year
trade_df2 = pd.read_csv(csv_directory + 'National_COW_4.0.csv', encoding='latin-1')

query_text = """

select
    year,
    ccode as c_code,
    sum(max(coalesce(imports, 0), 0)) as imports,
    sum(max(coalesce(exports, 0), 0)) as exports
from trade_df2
group by 1, 2

"""

trade_grouped_df2 = deepcopy(pysqldf(query_text))

In [8]:
nmc_df = pd.read_csv(csv_directory + 'NMC_5_0-wsupplementary.csv', encoding='latin-1')

query_text = """

select
    year,
    ccode as c_code,
    milex as military_expenditure,
    milper as military_personnel,
    irst as iron_steel_production,
    pec as energy_consumption,
    tpop as population,
    upop as urban_population,
    upopgrowth as urban_population_growth_rate,
    cinc as cinc_score
from nmc_df
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

"""

nmc_df = deepcopy(pysqldf(query_text))

In [9]:
tc_df = pd.read_csv(csv_directory + 'tc2018.csv', encoding='latin-1')

query_text = """

select
    year,
    gainer as c_code,
    sum(max(area, 0)) as land_mass_exchange_gain,
    sum(max(pop, 0)) as population_exchange_gain,
    0 as land_mass_exchange_loss,
    0 as population_exchange_loss
from tc_df
group by 1, 2
union
select
    year,
    loser as c_code,
    0 as land_mass_exchange_gain,
    0 as population_exchange_gain,
    sum(max(area, 0)) as land_mass_exchange_loss,
    sum(max(pop, 0)) as population_exchange_loss
from tc_df
group by 1, 2

"""

tc_df = deepcopy(pysqldf(query_text))

In [10]:
# https://www.systemicpeace.org/inscrdata.html
# Forcibly Displaced Populations, 1946-2018
dp_df = pd.read_csv(csv_directory + 'FDP2008a.csv', encoding='latin-1')

query_text = """
    
    select
        year,
        ccode as c_code,
        source as refugees_originated,
        host as refugees_hosted,
        idp as internally_displaced_persons
    from dp_df
    group by 1, 2, 3, 4, 5

"""

dp_df = deepcopy(pysqldf(query_text))

In [11]:
# # https://www.systemicpeace.org/inscrdata.html
# # High Casualty Terrorist Bombings (HCTB), March 11, 1998 - March 10, 2020
hctb_df = pd.read_csv(csv_directory + 'HCTBMar2020list.csv', encoding='latin-1')

query_text = """

    select
        a.year,
        c.c_code,
        sum(a.death) as terrorist_bombing_deaths
    from hctb_df a
    left join c_code_df c on case when a.loc = 'USR' then 'RUS' else replace(a.loc, ' ', '') end = c.state_name_abbreviation
    group by 1

"""

hctb_df = deepcopy(pysqldf(query_text))

In [12]:
# ## not sure that this one is worth the trouble because ccodes are not included
# part_df_4 = pd.read_csv(csv_directory + 'co-emissions-per-capita.csv', encoding='latin-1')

In [13]:
years_df = pd.DataFrame(np.arange(1500, 2100), columns=['year'])
## including start and end years since that's how participant data will be joined in step 3
## getting a dataframe for all years between two participant's start and end years.
part_df = pd.read_pickle(pickle_directory + 'initial_part_df.pkl')

query_text = """

select
    y.year,
    a.war_num,
    cast(a.c_code as int) as c_code,
    a.start_year,
    a.end_year,
    al.allied_countries,
    t1.trade_countries,
    t1.money_flow_in,
    t1.money_flow_out,
    t2.imports,
    t2.exports,
    nm.military_expenditure,
    nm.military_personnel,
    nm.iron_steel_production,
    nm.energy_consumption,
    nm.population,
    nm.urban_population,
    nm.urban_population_growth_rate,
    nm.cinc_score,
    tc.land_mass_exchange_gain,
    tc.population_exchange_gain,
    tc.land_mass_exchange_loss,
    tc.population_exchange_loss,
    dp.refugees_originated,
    dp.refugees_hosted,
    dp.internally_displaced_persons,
    hc.terrorist_bombing_deaths
from part_df a
inner join years_df y on y.year between a.start_year and a.end_year
left join alliance_grouped_df al on a.c_code = al.c_code and y.year = al.year
left join trade_grouped_df1 t1 on a.c_code = t1.c_code and y.year = t1.year
left join trade_grouped_df2 t2 on a.c_code = t2.c_code and y.year = t2.year
left join nmc_df nm on a.c_code = nm.c_code and y.year = nm.year
left join tc_df tc on a.c_code = tc.c_code and y.year = tc.year
left join dp_df dp on a.c_code = dp.c_code and y.year = dp.year
left join hctb_df hc on a.c_code = hc.c_code and y.year = hc.year
where
    a.c_code > 0
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23

"""

part_desc_df = deepcopy(pysqldf(query_text))
print('Total Yearly Participants in Initial Data: {}'.format(format(len(part_df), ',d')))

Total Yearly Participants in Initial Data: 1,715


# Descriptive Statistics for Each Dyad by Year
### Note: Applies to states/countries only.
### This will be joined to the dyadic pairs for each war

### First, defining the dyads by year to exclude any countries/years that will not be needed later on.

In [14]:
initial_dyad_df = pd.read_pickle(pickle_directory + 'initial_dyad_df.pkl')[['c_code_a', 'c_code_b', 'start_year', 'end_year']]

query_text = """


select
    y.year,
    a.c_code_a,
    a.c_code_b,
from initial_dyad_df a
inner join years_df y on y.year between a.start_year and a.end_year
group by 1, 2, 3

"""

initial_dyad_df = deepcopy(pysqldf(query_text))

## must be a state (non-state participants will not be joined below)
## this will remove -8 and -9 participants
## must have a valid year since all joins below are by dyad yearly
initial_dyad_df = deepcopy(initial_dyad_df[(initial_dyad_df['c_code_a']>0) & (initial_dyad_df['c_code_b']>0) & (initial_dyad_df['year'].isnull()==False)].reset_index(drop=True))

print('Total Dyadic Years in Initial Data: {}'.format(format(int(len(initial_dyad_df)/2), ',d')))

PandaSQLException: (sqlite3.OperationalError) near "from": syntax error
[SQL: 


select
    y.year,
    a.c_code_a,
    a.c_code_b,
from initial_dyad_df a
inner join years_df y on y.year between a.start_year and a.end_year
group by 1, 2, 3

]
(Background on this error at: http://sqlalche.me/e/13/e3q8)

### Correlates of War Descriptive Data

In [None]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
initial_descriptive_columns = deepcopy(set(list(initial_dyad_df.columns)))

## lot's to use in this dataset so I'll start with the basics
data_source = csv_directory + 'tc2018.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(initial_dyad_df, data_source, None, None, 'gainer', 'loser', 'year', 'territory_exchange'))

## contiguity dataframe for states of colonial dependencies
data_source = csv_directory + 'contcold.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'statelno', 'statehno', 'year', 'colonial_contiguity'))

data_source = csv_directory + 'contdird.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'state1no', 'state2no', 'year', 'contiguity'))

data_source = csv_directory + 'alliance_v4.1_by_dyad_yearly.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'alliance'))

data_source = csv_directory + 'DCAD-v1.0-dyadic.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'defense_cooperation_agreements'))

## must have at least one joined by both states in order to be included
data_source = csv_directory + 'dyadic_formatv3.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'inter_governmental_organizations'))

## this one needs to be filled since its only 5 years
data_source = csv_directory + 'Diplomatic_Exchange_2006v1.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'ccode1', 'ccode2', 'year', 'diplomatic_exchange'))

## only including countries with trade data present.
trade_df = pd.read_csv(csv_directory + 'Dyadic_COW_4.0.csv')
trade_df = deepcopy(trade_df[(trade_df['flow1']>0) | (trade_df['flow2']>0)].reset_index(drop=True))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, None, trade_df, None, 'ccode1', 'ccode2', 'year', 'trade_relations'))

the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, initial_descriptive_columns, None)

### Non-Correlates of War Descriptive Data

### Setting up processing of 'ddrevisited_data_v1'

In [None]:
dd_df_1 = pd.read_csv(csv_directory + 'ddrevisited_data_v1.csv', encoding='latin-1')
## including columns that need to be included later on (that don't need name changes)
dd_df_renaming = {'cowcode': 'c_code',
                  'cid': 'alternate_country_code',
                  'emil': 'military_leader',
                  'royal': 'royal_leader',
                  'comm': 'communist_leader',
                  'democracy': 'democratic_regime',
                  'collect': 'collective_leadership',
                  'regime': 'regime_type',
                  'incumb': 'incumbent_type',
                  'exselec': 'election_type',
                  'legselec': 'legislature_type_1',
                  'closed': 'legislature_type_2',
                  'lparty': 'legislature_party_status',
                  'dejure': 'party_legal_status',
                  'defacto': 'party_existance_1',
                  'defacto2': 'party_existance_2',
                  'year': 'year'}
dd_df_1.rename(dd_df_renaming, axis=1, inplace=True)
dd_df_1 = deepcopy(dd_df_1[list(dd_df_renaming.values())])

## replacing cowcode with cowcode2 for second version
del dd_df_renaming['cowcode']
dd_df_renaming['cowcode2'] = 'c_code'

dd_df_2 = pd.read_csv(csv_directory + 'ddrevisited_data_v1.csv', encoding='latin-1')
dd_df_2.rename(dd_df_renaming, axis=1, inplace=True)
dd_df_2 = deepcopy(dd_df_2[list(dd_df_renaming.values())])

## unioning the two ccodes above so they can both be represented
## this will also allow for substates to be joined to the larger states
## this will need to be recognized later on to prevent from saying same leadership when it's the same leader
dd_df = deepcopy(pd.concat([dd_df_1, dd_df_2], sort=True, ignore_index=True))

## removing duplicates from concat
dd_df.drop_duplicates(subset=list(dd_df_renaming.values()), keep='first', inplace=True)

## transforming this into a dyadic dataset (each country joined to each country)
dd_df = deepcopy(pd.merge(dd_df, dd_df, how='left', on=['year']))
    
for column in dd_df.columns:
    if column[-2:]=='_x':
        dd_df.rename({column: column[:-2] + '_a'}, axis=1, inplace=True)
    elif column[-2:]=='_y':
        dd_df.rename({column: column[:-2] + '_b'}, axis=1, inplace=True)
    else:
        pass

## inner join to only include dyads found in dyadic war data
## this will limit runtime significantly
dd_df = deepcopy(pd.merge(initial_dyad_df, dd_df, how='inner', on=['c_code_a', 'c_code_b', 'year']))

## ensuring that c_code_a and c_code_b do not match, and cowcode and cowcode2 (overlapping states) are not joined to each other.
dd_df = deepcopy(dd_df[(dd_df['c_code_a']!=dd_df['c_code_b']) & (dd_df['alternate_country_code_a']!=dd_df['alternate_country_code_b'])].reset_index(drop=True))
dd_df.drop(['alternate_country_code_a', 'alternate_country_code_b'], axis=1, inplace=True)

In [None]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
initial_descriptive_columns = deepcopy(set(list(descriptive_df_2.columns)))

conditional_statement = (dd_df['military_leader_a']==dd_df['military_leader_b']) & (dd_df['communist_leader_a']==dd_df['communist_leader_b']) & (dd_df['royal_leader_a']==dd_df['royal_leader_b']) & (dd_df['democratic_regime_a']==dd_df['democratic_regime_b'])
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'same_leader_type'))

conditional_statement = (dd_df['military_leader_a']==1) & (dd_df['military_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'military_leaders'))

conditional_statement = (dd_df['communist_leader_a']==1) & (dd_df['communist_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'communist_leaders'))

conditional_statement = (dd_df['royal_leader_a']==1) & (dd_df['royal_leader_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'royal_leaders'))

conditional_statement = (dd_df['incumbent_type_a']==1) & (dd_df['incumbent_type_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'democratic_incumbent'))

conditional_statement = (dd_df['incumbent_type_a']==2) & (dd_df['incumbent_type_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'unconstitutional_incumbent'))

conditional_statement = ((dd_df['incumbent_type_a']==1) | (dd_df['democratic_regime_a']==1) | (dd_df['regime_type_a']==0) | (dd_df['regime_type_a']==1) | (dd_df['regime_type_a']==2)) & ((dd_df['incumbent_type_b']==1) |(dd_df['democratic_regime_b']==1) | (dd_df['regime_type_b']==0) | (dd_df['regime_type_b']==1) | (dd_df['regime_type_b']==2))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'democratic_regimes'))

conditional_statement = ((dd_df['regime_type_a']==3) | (dd_df['regime_type_a']==4) | (dd_df['regime_type_a']==5)) & ((dd_df['regime_type_b']==3) | (dd_df['regime_type_b']==4) | (dd_df['regime_type_b']==5))
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'dictatorships'))

conditional_statement = (dd_df['collective_leadership_a']==1) & (dd_df['collective_leadership_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'collective_leaderships'))

conditional_statement = (dd_df['election_type_a']==1) & (dd_df['election_type_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'direct_election'))

conditional_statement = (dd_df['election_type_a']==2) & (dd_df['election_type_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'indirect_election'))

conditional_statement = (dd_df['election_type_a']==3) & (dd_df['election_type_b']==3)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'non_elected_leaders'))

conditional_statement = (dd_df['legislature_type_1_a']==0) & (dd_df['legislature_type_1_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_legislature'))

conditional_statement = (dd_df['legislature_type_1_a']==1) & (dd_df['legislature_type_1_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'non_elective_legislature'))

conditional_statement = (dd_df['legislature_type_1_a']==2) & (dd_df['legislature_type_1_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'elective_legislature'))

conditional_statement = (dd_df['legislature_party_status_a']==0) & (dd_df['legislature_party_status_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_partisan_legislature_legal'))

conditional_statement = (dd_df['legislature_party_status_a']==1) & (dd_df['legislature_party_status_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_non_regime_legislature_parties_legal'))

conditional_statement = (dd_df['legislature_party_status_a']==2) & (dd_df['legislature_party_status_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'multi_party_legislature_legal'))

conditional_statement = (dd_df['party_legal_status_a']==0) & (dd_df['party_legal_status_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'all_parties_illegal'))

conditional_statement = (dd_df['party_legal_status_a']==1) & (dd_df['party_legal_status_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'single_party_state_exists'))

conditional_statement = (dd_df['party_legal_status_a']==2) & (dd_df['party_legal_status_b']==2)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'multi_party_state_exists'))

conditional_statement = (dd_df['party_existance_1_a']==0) & (dd_df['party_existance_1_b']==0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_parties_exist'))

conditional_statement = (dd_df['party_existance_1_a']==1) & (dd_df['party_existance_1_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'one_party_exists'))

conditional_statement = (dd_df['party_existance_2_a']==1) & (dd_df['party_existance_2_b']==1)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', dd_df, conditional_statement, 'c_code_a', 'c_code_b', 'year', 'no_non_regime_parties_exist'))

the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, initial_descriptive_columns, None)

### Other Non-COW Data Sources

In [None]:
print('Counting Total Dyadic Year Combinations by Descriptive Field\n')
initial_descriptive_columns = deepcopy(set(list(descriptive_df_2.columns)))

# Alliance Treaty Obligations and Provisions (ATOP)
data_source = csv_directory + 'atop4_01ddyr.csv'
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, data_source, None, None, 'stateA', 'stateB', 'year', 'atop'))

# The ICOW multilateral treaties of pacific settlement (MTOPS)
data_source = csv_directory + 'mtopsd150.csv'
mtops_dy_df = pd.read_csv(data_source, encoding='utf8')
mtops_dy_df['total'] = mtops_dy_df['pacsettg'] + mtops_dy_df['pacsettr'] + mtops_dy_df['pacsett'] + mtops_dy_df['tergen'] + mtops_dy_df['terviol'] + mtops_dy_df['tertot']
conditional_statement = (mtops_dy_df['total']>0)
descriptive_df_2 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(descriptive_df_2, 'conditional', mtops_dy_df, conditional_statement, 'state1', 'state2', 'year', 'mtops'))
the_networks_of_war_python_functions.print_new_fields(descriptive_df_2, initial_descriptive_columns, None)

### Removing any descriptive dyadic field with 0 records returned.

In [None]:
descriptive_columns = list(descriptive_df_2.drop(['c_code_a', 'c_code_b', 'year'], axis=1).columns)
for column in descriptive_columns:
    if len(descriptive_df_2[descriptive_df_2[column]>0])==0:
        print('Fields Removed: {}'.format(column))
        descriptive_df_2.drop(column, axis=1, inplace=True)

In [None]:
print('Total Dyadic Years of Descriptive Data: {}'.format(format(int(len(descriptive_df_2)/2), ',d')))
descriptive_df_2.to_pickle(pickle_directory + 'dyadic_descriptive_df.pkl')

In [None]:
sorted(descriptive_df_2.columns)

In [None]:
# pd.read_csv(csv_directory + 'tc2018.csv', encoding='utf8')


# Process of Territorial Change: The process of territorial change includes six possible procedures:
# 1. Conquest
# 2. Annexation
# 3. Cession
# 4. Secession
# 5. Unification
# 6. Mandated territory

# Portion of the Territory Exchanged: "Portion of unit exchanged" indicates whether part or all of
# the unit being transferred was involved in the exchange. A "0" means that part of the unit was
# transferred, a "1" indicates that the entire unit was involved in the exchange, and a “-9” means
# that this could not be determined.

In [None]:
# data_source = csv_directory + 'ucdp-peace-agreements-191.csv'
# dy_df_19 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Conflict Termination Dataset version 2-2015
# data_source = csv_directory + 'ucdp-term-dyadic-2015.csv'
# dy_df_20 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Battle-Related Deaths Dataset version 20.1
# data_source = csv_directory + 'UCDP Battle-Related Deaths Dataset version 20.1'
# dy_df_21 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Actor Dataset version 20.1
# data_source = csv_directory + 'ucdp-actor-201.csv'
# dy_df_22 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Non-state Conflict Issues and Actors Dataset
# data_source = csv_directory + 'UCDP_NS_IAD.csv'
# dy_df_23 = pd.read_csv(data_source, encoding='utf8')

# # UCDP External Support in Non-state Conflict Dataset
# data_source = csv_directory + 'UCDP External Support in Non-state Conflict Dataset v.1.0.csv'
# dy_df_24 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Managing Intrastate Low-intensity Conflict (MILC) dataset
# data_source = csv_directory + 'milc-10.csv'
# dy_df_25 = pd.read_csv(data_source, encoding='utf8')

# # UCDP Managing Intrastate Conflict (MIC) dataset
# ## more than 2 ids per conflict
# data_source = csv_directory + 'micFINAL.csv'
# dy_df_26 = deepcopy(the_networks_of_war_python_functions.descriptive_dyad_from_source(initial_dyad_df, data_source, None, 'contry_id', 'thirdid1', 'year', 'atop'))

# # 'gwno'
# # 'dyad_id'
# ## only including failed peace agreements
# dy_df_19 = deepcopy(dy_df_19[dy_df_19['ended']==True])[['gwno', 'dyad_id', 'dyad_name', 'actor_id', 'actor_name', 'year', 'duration', 'c_duration']]


In [None]:
# for year in np.arange(1800, 2020):
#     for row in dy_df_7['year']:
#         if len(dy_df_7[dy_df_7['year']==year])== 0:
#             temp_dyad_df = deepcopy(dy_df_7[dy_df_7['year']==year].reset_index())
#             for i, dyad in enumerate(temp_dyad_df['year']):
#                 dyad_df_length = deepcopy(len(dy_df_7))
#                 dy_df_7.loc[dyad_df_length, 'year'] = year
#                 dy_df_7.loc[dyad_df_length, 'c_code_a'] = temp_dyad_df.loc[i, 'c_code_a']
#                 dy_df_7.loc[dyad_df_length, 'c_code_b'] = temp_dyad_df.loc[i, 'c_code_b']
#         else:
#             current_year = year
            
# dy_df_7['diplomatic_exchange'] = 1
# print(len(dy_df_7))