# THIS NOTEBOOK IS IN DEVELOPMENT.

# Import and setup

In [1]:
import pandas as pd

In [2]:
# change display options
pd.options.display.max_rows = 150
pd.options.display.max_columns = 150

In [3]:
# import cleaned terrorism data (may take a few minutes to load)
df = pd.read_csv('../data/derived/terrorism.csv')

# Group by variables

### Collapse related incidents

If an event occurs at separate times or locations, than they are considered separate incidents; see p. 12 of codebook. "If any discontinuity in time or space is noted, the event is comprised of multiple incidents."

Relevant variables are `multiple`, which indicates whether attacks are connected; see p. 17 of codebook. If the attack is part of a multiple incident, the variable `related` includes the text of event ID's from `eventid` for related incidents; see p. 13 and 17 of codebook.

In [21]:
df.shape

(59395, 136)

Collapse events into one incident if records are related, occurred in the same city and on the same day. Numbers of fatalities, injured, and victims are summed across records.

In [22]:
# group related incidents that occur on same city and day
terrorism_by_incident = df.groupby(['related',
                                    'year_gtd','imonth','iday',
                                    'country','region',
                                    'country_txt_gtd','region_txt',
                                    'provstate','city']).agg(
    incident_id  = ('eventid', 'first'),
    nkill     = ('nkill',     'sum'),
    nkillus   = ('nkillus',   'sum'),
    nkillte   = ('nkillte',  'sum'),
    nwound    = ('nwound',    'sum'),
    nwoundus  = ('nwoundus',  'sum'),
    nwoundte  = ('nwoundte',  'sum'),
    nvictim   = ('nvictim',   'sum'),
    nvictimus = ('nvictimus', 'sum'),
    nvictimte = ('nvictimte', 'sum'),
    event_ct_total   = ('eventid', 'size')
)

# reset index
terrorism_by_incident.reset_index(inplace=True)
terrorism_by_incident.set_index('incident_id', inplace=True)

# inspect dataframe
terrorism_by_incident.head()

Unnamed: 0_level_0,related,year_gtd,month_gtd,day_gtd,country_gtd,region,country_txt_gtd,region_txt,provstate,city,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,event_ct_total
incident_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
201201010002,"201201010002, 201201010004",2012,1,1,104,11,Kenya,Sub-Saharan Africa,North Eastern,Garissa,5.0,0.0,0.0,28.0,0.0,0.0,2
201201010013,"201201010013, 201201030040, 201201030041",2012,1,1,229,11,Democratic Republic of the Congo,Sub-Saharan Africa,South Kivu,Shabunda,18.0,0.0,0.0,5.0,0.0,0.0,1
201201030040,"201201010013, 201201030040, 201201030041",2012,1,3,229,11,Democratic Republic of the Congo,Sub-Saharan Africa,South Kivu,Shabunda,27.0,0.0,0.0,8.0,0.0,0.0,2
201201030008,"201201030008, 201201030009, 201201030010",2012,1,3,4,6,Afghanistan,South Asia,Kandahar,Kandahar,15.0,0.0,2.0,35.0,0.0,0.0,3
201201040006,"201201040006, 201201040007",2012,1,4,147,11,Nigeria,Sub-Saharan Africa,Borno,Maiduguri,0.0,0.0,0.0,0.0,0.0,0.0,1


In [23]:
# write to csv
terrorism_by_incident.to_csv('../data/derived/terrorism_by_incident.csv')

### Group by year

In [24]:
# group by year and country
terrorism_by_year = terrorism_by_incident.groupby(['year_gtd','country_gtd']).agg(
    incident_ct_total = ('country_gtd', 'size'), # arbitrarily chose to count one variable
    nkill_total       = ('nkill', 'sum'),
    nkillus_total     = ('nkillus', 'sum'),
    nkillte_total     = ('nkillte', 'sum'),
    nwound_total      = ('nwound', 'sum'),
    nwoundus_total    = ('nwoundus', 'sum'),
    nwoundte_total    = ('nwoundte', 'sum')
)

# inspect dataframe
terrorism_by_year.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,incident_ct_total,nkill_total,nkillus_total,nkillter_total,nwound_total,nwoundus_total,nwoundte_total
year_gtd,country_gtd,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012,4,48,226.0,0.0,61.0,575.0,0.0,6.0
2012,6,1,0.0,0.0,0.0,1.0,0.0,0.0
2012,18,2,2.0,0.0,0.0,1.0,0.0,0.0
2012,45,6,2.0,0.0,0.0,22.0,0.0,0.0
2012,60,1,0.0,0.0,0.0,3.0,0.0,0.0


In [25]:
# write to csv
terrorism_by_year.to_csv('../data/derived/terrorism_by_year.csv')