## Data cleanup for terrorism and countries data

In [2]:
## import dependencies
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import fiona; help(fiona.open)


Help on function open in module fiona:

open(path, mode='r', driver=None, schema=None, crs=None, encoding=None, layer=None, vfs=None, enabled_drivers=None, crs_wkt=None)
    Open file at ``path`` in ``mode`` "r" (read), "a" (append), or
    "w" (write) and return a ``Collection`` object.
    
    In write mode, a driver name such as "ESRI Shapefile" or "GPX" (see
    OGR docs or ``ogr2ogr --help`` on the command line) and a schema
    mapping such as:
    
      {'geometry': 'Point',
       'properties': [('class', 'int'), ('label', 'str'),
                      ('value', 'float')]}
    
    must be provided. If a particular ordering of properties ("fields"
    in GIS parlance) in the written file is desired, a list of (key,
    value) pairs as above or an ordered dict is required. If no ordering
    is needed, a standard dict will suffice.
    
    A coordinate reference system for collections in write mode can be
    defined by the ``crs`` parameter. It takes Proj4 style mappings lik

In [3]:
## read in countries.geojson into df
countries_geojson = "datasets/countries.geojson"

countries_gdf = gpd.read_file(countries_geojson)

print(type(countries_gdf))
countries_gdf.head()
##countries_gdf.plot()
##plt.show()
##countries_gdf.dtypes
##countries_gdf.index

<class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,ADMIN,ISO_A3,geometry
0,Aruba,ABW,POLYGON ((-69.99693762899992 12.57758209800004...
1,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,..."
2,Angola,AGO,(POLYGON ((11.73751945100014 -16.6925779829998...
3,Anguilla,AIA,(POLYGON ((-63.03766842399995 18.2129580750000...
4,Albania,ALB,"POLYGON ((19.74776574700007 42.57890085900007,..."


In [4]:
## read in gterrorism data csv, change encoding to read strings encoded in latin-1
terrorism_csv = "datasets/gtdb_0617_proj_cols.csv"

incidents_df = pd.read_csv(terrorism_csv, encoding = "ISO-8859-1")
incidents_df.head()
## terrorism_df.dtypes


Unnamed: 0,eventid,iyear,imonth,iday,Date,country,country_txt,latitude,longitude,attacktype1,attacktype1_txt,targtype1,targtype1_txt,gname,weaptype1,weaptype1_txt,nkill,nwound,property
0,197000000001,1970,7,2,7/2/1970,58,Dominican Republic,18.456792,-69.951164,1,Assassination,14,Private Citizens & Property,MANO-D,13,Unknown,1.0,0.0,0
1,197000000002,1970,0,0,,130,Mexico,19.432608,-99.133207,6,Hostage Taking (Kidnapping),7,Government (Diplomatic),23rd of September Communist League,13,Unknown,0.0,0.0,0
2,197001000001,1970,1,0,,160,Philippines,15.478598,120.599741,1,Assassination,10,Journalists & Media,Unknown,13,Unknown,1.0,0.0,0
3,197001000002,1970,1,0,,78,Greece,37.983773,23.728157,3,Bombing/Explosion,7,Government (Diplomatic),Unknown,6,Explosives/Bombs/Dynamite,,,1
4,197001000003,1970,1,0,,101,Japan,33.580412,130.396361,7,Facility/Infrastructure Attack,7,Government (Diplomatic),Unknown,8,Incendiary,,,1


In [5]:
## used data cleanup from datamung file for consistency
incidents_df = incidents_df.fillna({'Date':0, 'nkill':0, 'nwound':0})
incidents_df['Date'] = pd.to_datetime(incidents_df['Date'])
incidents_df['nkill'] = incidents_df['nkill'].fillna(0).astype(int)
incidents_df['nwound'] = incidents_df['nwound'].fillna(0).astype(int)
incidents_df = incidents_df.rename(columns={"eventid":"incident_id", 
                                            "Date":"idate", 
                                            "country":"icountry_id", 
                                            "country_txt":"ADMIN",
                                            "latitude":"ilatitude", 
                                            "longitude":"ilongitude", 
                                            "attacktype1":"attacktype_id", 
                                            "attacktype1_txt":"attacktype_txt", 
                                            "targtype1":"targtype_id", 
                                            "targtype1_txt":"targtype_txt", 
                                            "weaptype1":"weaptype_id", 
                                            "weaptype1_txt": "weaptype_txt",
                                            "property":"property_flg"
                                            })
incidents_df = incidents_df[pd.notnull(incidents_df['ilatitude'])]
incidents_df.head()

Unnamed: 0,incident_id,iyear,imonth,iday,idate,icountry_id,ADMIN,ilatitude,ilongitude,attacktype_id,attacktype_txt,targtype_id,targtype_txt,gname,weaptype_id,weaptype_txt,nkill,nwound,property_flg
0,197000000001,1970,7,2,1970-07-02,58,Dominican Republic,18.456792,-69.951164,1,Assassination,14,Private Citizens & Property,MANO-D,13,Unknown,1,0,0
1,197000000002,1970,0,0,1970-01-01,130,Mexico,19.432608,-99.133207,6,Hostage Taking (Kidnapping),7,Government (Diplomatic),23rd of September Communist League,13,Unknown,0,0,0
2,197001000001,1970,1,0,1970-01-01,160,Philippines,15.478598,120.599741,1,Assassination,10,Journalists & Media,Unknown,13,Unknown,1,0,0
3,197001000002,1970,1,0,1970-01-01,78,Greece,37.983773,23.728157,3,Bombing/Explosion,7,Government (Diplomatic),Unknown,6,Explosives/Bombs/Dynamite,0,0,1
4,197001000003,1970,1,0,1970-01-01,101,Japan,33.580412,130.396361,7,Facility/Infrastructure Attack,7,Government (Diplomatic),Unknown,8,Incendiary,0,0,1


In [8]:
## group by country name (ADMIN) and year
incidents_country_df = incidents_df.groupby(['ADMIN','iyear'],as_index=False)[['nkill']].sum()
incidents_country_df
##incidents_country_df.dtypes

Unnamed: 0,ADMIN,iyear,nkill
0,Afghanistan,1973,0
1,Afghanistan,1979,53
2,Afghanistan,1988,128
3,Afghanistan,1989,10
4,Afghanistan,1990,12
5,Afghanistan,1991,64
6,Afghanistan,1992,49
7,Afghanistan,1994,22
8,Afghanistan,1995,5
9,Afghanistan,1996,31


In [9]:
## merge relevant terrorism data into countries df
countries_incidents_merged_gdf = countries_gdf.merge(incidents_country_df, on = 'ADMIN')
countries_incidents_merged_gdf.head()

Unnamed: 0,ADMIN,ISO_A3,geometry,iyear,nkill
0,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,...",1973,0
1,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,...",1979,53
2,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,...",1988,128
3,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,...",1989,10
4,Afghanistan,AFG,"POLYGON ((71.04980228700009 38.40866445000009,...",1990,12


In [97]:
## set fiona.open properties
path = 'datasets/countriesTerrorismKills.geojson'
#mode='w',
#driver=None,
# schema=None,
# crs=None,
# encoding=None,
# layer=None,
# vfs=None,
# enabled_drivers=None,
# crs_wkt=None

      {'geometry': 'Point',
       'properties': [('class', 'int'), ('label', 'str'),
                      ('value', 'float')]}

In [99]:
## send to geojson and save in datasets folder
countries_incidents_merged_gdf.to_file(path, driver = 'GeoJSON')