<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [15]:
!pip install py7zr



In [381]:
import pandas as pd
import numpy as np
import py7zr
import os
import json
from urllib.request import urlopen

from typing import (
    Deque, Dict, FrozenSet, List, Optional, Sequence, Set, Tuple, Union
    )
from pydantic import BaseModel
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [205]:
from google.colab import drive
drive.mount("/content/gdrive")

In [383]:
# Data reading
PATH = "/content/gdrive/MyDrive/GlobalTerrorismViz/"
PATH = "../"

In [521]:
# Data reading
with py7zr.SevenZipFile(PATH + 'data/raw/globalterrorismdb_0221dist.7z') as z:
    # open the csv file in the dataset
    targetPath = PATH + 'data/'
    z.extract(path = targetPath)
    df = pd.read_excel(targetPath + 'globalterrorismdb_0221dist.xlsx')
    
# We filtered out doubted attacks to be able to have exact attacks
df = df[(df['doubtterr'] != 1) & (df['iyear'] >=2011)]

_df = df[['eventid','iyear','imonth','iday','country','country_txt',
         'region','region_txt', 'provstate', 'city','latitude','longitude',
         'nkill', 'nwound','gname','targtype1_txt', 'attacktype1_txt']]

In [522]:
# Geojson for polygons of map
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)
                          
countries_df = pd.json_normalize(countries,  record_path =['features'])

In [523]:
# Merging locations and main df
df_merged = pd.merge(_df, countries_df , left_on = 'country_txt', right_on = 'properties.name', how = 'left' )



In [577]:
# Getting Stats
group_year_dataset = df_merged.groupby(by=['gname','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

attacks_by_target = df_merged.groupby(by=['targtype1_txt','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

death_injury_yearly = df_merged.groupby(by=['iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

df_merged['affiliation'] = df_merged['gname'].apply(lambda x: 'Affiliated' if x !='Unknown' else 'Not Affiliated')
affiliated_yearly = df_merged.groupby(by=['iyear','country_txt','id','affiliation']).agg({'eventid':'nunique',
                                                                              'nkill':'sum',
                                                                               'nwound':'sum'}).reset_index()

In [None]:
# Writing to excel
group_year_dataset.to_excel(PATH + 'data/xlsx/group_year_dataset.xlsx', engine='openpyxl', encoding='utf-8')
attacks_by_target.to_excel(PATH + 'data/xlsx/attacks_by_target.xlsx', engine='openpyxl', encoding='utf-8')
death_injury_yearly.to_excel(PATH + 'data/xlsx/death_injury_yearly.xlsx', engine='openpyxl', encoding='utf-8')
affiliated_yearly.to_excel(PATH + 'data/xlsx/affiliated_yearly.xlsx', engine='openpyxl', encoding='utf-8')

# Writing to JSON
group_year_dataset.to_json(PATH + 'data/json/group_year_dataset.json')
attacks_by_target.to_json(PATH + 'data/json/attacks_by_target.json')
death_injury_yearly.to_json(PATH + 'data/json/death_injury_yearly.json')
affiliated_yearly.to_json(PATH + 'data/json/affiliated_yearly.json')

Most influential terrorist organizations

In [370]:
topN = 5
groupsData = (df_merged[~df_merged['gname'].isin(['Unknown'])].groupby(by=['gname','iyear']).agg({'eventid':'nunique'})
            .reset_index()
            .rename(columns={'iyear':'x', 'eventid': 'y'}))

groupsData_years = sorted(groupsData['x'].unique())
groupsData_groups = sorted(groupsData.groupby(by=['gname']).agg({'y':np.sum}).reset_index().sort_values(by = ['y'] ,ascending=False).head(topN)['gname'])
groupsData = groupsData[groupsData['gname'].isin(groupsData_groups)]

In [347]:
groupsData_groups

group_names = {
'Al-Shabaab':'Al-Shabaab',
'Boko Haram':'Boko Haram',
'Houthi extremists (Ansar Allah)':'Houthi',
'Islamic State of Iraq and the Levant (ISIL)': 'ISIL',
'Maoists':'Maoists',
'Taliban':'Taliban'
}

In [348]:
class Coordinates(BaseModel):
    x: Union[int, str]
    y: int
    
class GroupDataItem(BaseModel):
    id: str = None
    data: List[Coordinates] = None

#class GroupDataList(BaseModel):
#    __root__: List[GroupDataItem]    

In [349]:
groupsData_List = []
for group_name in groupsData_groups:
    coord_list = []
    for year_id in groupsData_years:
        if len(groupsData[(groupsData['gname']==group_name)&(groupsData['x']==year_id)])>0:
            y =  groupsData.loc[(groupsData['gname']==group_name)&(groupsData['x']==year_id),'y'].iloc[0]
        else:
            y=0
        c = Coordinates(x=year_id, y=y )
        coord_list.append(c.dict())

    group_item = GroupDataItem(id=group_names[group_name], data=coord_list)
    groupsData_List.append(group_item.dict())

In [350]:
with open(PATH + 'data/json/groupsData.json', 'w') as f:
    json.dump(groupsData_List, f)

Most attacked countries over the time

In [391]:
class YearlyAttacksCountry(BaseModel):
    id: str
    affiliated: int
    unknown: int
    
class YearlyAttacks(BaseModel):
    year: Union[int, str]
    data: List[YearlyAttacksCountry] = None

In [None]:
mostAttackedData = (df_merged.groupby(by=['iyear','id','affiliation'])
                            .agg({'eventid':'nunique'})
                            .reset_index())
mostAttacked_years = sorted(mostAttackedData['iyear'].unique())

In [437]:
topN = 7
mostAttackedData_List = []
affiliation = ['Affiliated', 'Not Affiliated']
for year in mostAttacked_years:
    mostAttackedData_Countries = []
    mostAttacked_yearly_total = (mostAttackedData[mostAttackedData['iyear'].isin([year])]
                                                .groupby(by=['iyear','id'])
                                                .agg({'eventid':np.sum}).reset_index()
                                                .sort_values(by = ['eventid'] ,ascending=False).head(topN))
    mostAttacked_yearly_total = mostAttacked_yearly_total.sort_values(by = ['eventid'] ,ascending=True)
    
    mostAttacked_countries = mostAttacked_yearly_total['id'].unique().tolist()
    mostAttacked_yearly_total= (mostAttackedData[(mostAttackedData['iyear'].isin([year])) & (mostAttackedData['id'].isin(mostAttacked_countries))])
    
    for country in mostAttacked_countries:
    
        if len(mostAttacked_yearly_total[(mostAttacked_yearly_total['id']==country)&(mostAttacked_yearly_total['iyear']==year) & (mostAttacked_yearly_total['affiliation']==affiliation[0])])>0:
            affiliated_val =  mostAttacked_yearly_total.loc[(mostAttacked_yearly_total['id']==country)&(mostAttacked_yearly_total['iyear']==year)& (mostAttacked_yearly_total['affiliation']==affiliation[0]),'eventid'].iloc[0]
        else:
            affiliated_val =  0
        if len(mostAttacked_yearly_total[(mostAttacked_yearly_total['id']==country)&(mostAttacked_yearly_total['iyear']==year) & (mostAttacked_yearly_total['affiliation']==affiliation[1])])>0:
            nonaffiliated_val =  mostAttacked_yearly_total.loc[(mostAttacked_yearly_total['id']==country)&(mostAttacked_yearly_total['iyear']==year)& (mostAttacked_yearly_total['affiliation']==affiliation[1]),'eventid'].iloc[0]
        else:
            nonaffiliated_val =  0
                
                
        country_data = YearlyAttacksCountry(id=country, affiliated=affiliated_val, unknown = nonaffiliated_val )
        mostAttackedData_Countries.append(country_data.dict())
    yearly_data = YearlyAttacks(year=year, data=mostAttackedData_Countries)
    mostAttackedData_List.append(yearly_data.dict())

In [438]:
with open(PATH + 'data/json/mostAttackedData.json', 'w') as f:
    json.dump(mostAttackedData_List, f)

victimsData

In [576]:
class Coordinates(BaseModel):
    x: Union[int, str]
    y: int
    
class WoundDataItem(BaseModel):
    id: str = None
    data: List[Coordinates] = None
        
class CountryDataItem(BaseModel):
    id: str = None
    data: List[WoundDataItem] = None        
    

In [580]:
death_injury_yearly = death_injury_yearly.rename(columns={'nwound':'injuries', 'nkill': 'fatalities'})

countries = death_injury_yearly['id'].unique().tolist()
years = death_injury_yearly['iyear'].unique().tolist()
woundtypes = ['injuries', 'fatalities']


victimsData_List = []

#total numbers
WoundData_List = []
for woundtype in woundtypes:
    coord_List = []
    for year_id in years:
        y =  int(death_injury_yearly.loc[death_injury_yearly['iyear']==year_id,woundtype].sum())
        c = Coordinates(x=year_id, y=y )
        coord_List.append(c.dict())
    WoundData = WoundDataItem(id = woundtype, data = coord_List)
    
    WoundData_List.append(WoundData.dict())
    
CountryData = CountryDataItem(id = '', data =WoundData_List)
victimsData_List.append(CountryData.dict())    

#numbers by countries    
for country in countries:
    WoundData_List = []
    
    for woundtype in woundtypes:
        coord_List = []
        for year_id in years:
            if len(death_injury_yearly[(death_injury_yearly['id']==country)&(death_injury_yearly['iyear']==year_id)])>0:
                y =  death_injury_yearly.loc[(death_injury_yearly['id']==country)&(death_injury_yearly['iyear']==year_id),woundtype].iloc[0]
            else:
                y=0
            c = Coordinates(x=year_id, y=y )
            coord_List.append(c.dict())
        WoundData = WoundDataItem(id = woundtype, data = coord_List)
        WoundData_List.append(WoundData.dict())
        
    CountryData = CountryDataItem(id = country, data =WoundData_List)
    victimsData_List.append(CountryData.dict())
#victimsData_List = str(victimsData_List).replace("'", '"')

In [579]:
with open(PATH + 'data/json/victimsData.json', 'w') as f:
    json.dump(victimsData_List, f)  
#with open(PATH + 'data/json/victimsData.json', 'r') as f:
#    victimsData_List = json.load(f)  
#with open(PATH + 'data/json/victimsData.json', 'w') as f:
#    json.dump(victimsData_List, f, indent=2)

geoData

In [507]:
class CountryGeoData(BaseModel):
    id: str
    value: int
    wounded: int
    killed: int
    
class geoDataItem(BaseModel):
    year: Union[int, str]
    data: List[CountryGeoData] = None

In [513]:
death_injury_yearly = death_injury_yearly.rename(columns={'eventid':'value', 'injuries':'wounded', 'fatalities': 'killed'})

countries = death_injury_yearly['id'].unique().tolist()
years = death_injury_yearly['iyear'].unique().tolist()
datatypes = ['value', 'wounded', 'killed']

geoData_List = []
for year_id in years:
    countryData_List = []
    for country in countries:
        countryValues = [] 
        for dt in datatypes:
            if len(death_injury_yearly[(death_injury_yearly['id']==country)&(death_injury_yearly['iyear']==year_id)])>0:
                y =  death_injury_yearly.loc[(death_injury_yearly['id']==country)&(death_injury_yearly['iyear']==year_id),dt].iloc[0]
            else:
                y=0
            countryValues.append(y)
        countryData = CountryGeoData(id = country, value = countryValues[0], wounded = countryValues[1], killed = countryValues[2])
        countryData_List.append(countryData.dict())
    geoData = geoDataItem(year = year_id, data = countryData_List)
    geoData_List.append(geoData.dict())


In [514]:
with open(PATH + 'data/json/geoData.json', 'w') as f:
    json.dump(geoData_List, f)

attackTypes

In [718]:
class AttackTypeData(BaseModel):
    id: str
    value: int

class CountryAttackTypeItem(BaseModel):
    id: str
    data: List[AttackTypeData] = None

In [719]:
attack_by_type = (df_merged.groupby(by=['attacktype1_txt','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index())
attack_by_type = attack_by_type.drop(columns = ['nkill', 'nwound'])

countries = attack_by_type['id'].unique().tolist()
attack_by_type_names = attack_by_type['attacktype1_txt'].unique().tolist()

In [720]:
attack_by_type_names_dict = {'Armed Assault':'Armed Assault',
 'Assassination':'Assassination',
 'Bombing/Explosion':'Explosion',
 'Facility/Infrastructure Attack':'Facility Attack',
 'Hijacking':'Hijacking',
 'Hostage Taking (Barricade Incident)':'Hostage Taking',
 'Hostage Taking (Kidnapping)':'Hostage Taking',
 'Unarmed Assault':'Unarmed Assault',
 'Unknown':'Others'}
attack_by_type['attacktype1_txt'] = attack_by_type['attacktype1_txt'].apply(lambda x: attack_by_type_names_dict[x])
attack_by_type = (attack_by_type.groupby(by=['attacktype1_txt','country_txt','id']).agg({'eventid':'sum'}).reset_index())

#attackTypes_topN = sorted(attack_by_type.sort_values(by = ['eventid'] ,ascending=False).head(topN)['attacktype1_txt'])
topN = 3
for country in countries:
    total_incidents = attack_by_type[attack_by_type['id']==country].eventid.sum()
    attackTypes_topN = sorted(attack_by_type[attack_by_type['id']==country].sort_values(by = ['eventid'] ,ascending=False).head(topN)['attacktype1_txt'])
    attack_by_type.loc[(~attack_by_type['attacktype1_txt'].isin(attackTypes_topN)) & (attack_by_type['id']==country),'attacktype1_txt'] = 'Others'

    attack_by_type = (attack_by_type.groupby(by=['attacktype1_txt','country_txt','id']).agg({'eventid':'sum'}).reset_index())   


In [721]:
attackTypes_List = []

#total numbers
attack_by_type_total = (attack_by_type.groupby(by=['attacktype1_txt']).agg({'eventid':'sum'}).reset_index())
total_attacks = attack_by_type_total.eventid.sum()
attackTypes_topN = sorted(attack_by_type_total.sort_values(by = ['eventid'] ,ascending=False).head(topN)['attacktype1_txt'])
attack_by_type_total.loc[(~attack_by_type_total['attacktype1_txt'].isin(attackTypes_topN)),'attacktype1_txt'] = 'Others'
attack_by_type_total = (attack_by_type_total.groupby(by=['attacktype1_txt']).agg({'eventid':'sum'}).reset_index())
attack_by_type_names = attack_by_type_total['attacktype1_txt'].unique().tolist()

attacks_by_country = []
for attack_id in attack_by_type_names:
    attack_count = attack_by_type_total[attack_by_type_total['attacktype1_txt']==attack_id].eventid.sum()
    attack_count = int(round(100*attack_count/total_attacks))
    attack_data = AttackTypeData(id=attack_id, value =attack_count)
    attacks_by_country.append(attack_data.dict())
attackTypes_List.append(CountryAttackTypeItem(id = '', data = attacks_by_country).dict())

#numbers by countries
for country in countries:
    attacks_by_country = []
    total_attacks_by_country = attack_by_type[attack_by_type['id']==country].eventid.sum()
    attack_by_type_names = attack_by_type[attack_by_type['id']==country]['attacktype1_txt'].unique().tolist()
    for attack_id in attack_by_type_names:
        if len(attack_by_type[(attack_by_type['id']==country)&(attack_by_type['attacktype1_txt']==attack_id)])>0:
            attack_count =  attack_by_type.loc[(attack_by_type['id']==country)&(attack_by_type['attacktype1_txt']==attack_id),'eventid'].iloc[0]
        else:
            continue
            #attack_count=0
        attack_count = int(round(100*attack_count/total_attacks_by_country))
        attack_data = AttackTypeData(id=attack_id, value =attack_count)
        attacks_by_country.append(attack_data.dict())
    attackTypes_List.append(CountryAttackTypeItem(id = country, data = attacks_by_country).dict())


In [722]:
with open(PATH + 'data/json/attackTypes.json', 'w') as f:
    json.dump(attackTypes_List, f)

attackData

In [583]:
class Coordinates(BaseModel):
    x: Union[int, str]
    y: int
    
class AttackDataItem(BaseModel):
    id: str = None
    data: List[Coordinates] = None
        
class CountryAttackDataItem(BaseModel):
    id: str = None
    data: List[AttackDataItem] = None  

In [587]:
countries = affiliated_yearly['id'].unique().tolist()
years = affiliated_yearly['iyear'].unique().tolist()
attacktypes = ['affiliated', 'unknown']
attacktypes_names = {'affiliated':'Affiliated',
                        'unknown':'Not Affiliated'}


#total numbers
CountryAttackData_List = []
AttackData_List = []
for attacktype in attacktypes:
    coord_List = []
    for year_id in years:
        y =  int(affiliated_yearly[(affiliated_yearly['iyear']==year_id) & (affiliated_yearly['affiliation']==attacktypes_names[attacktype])].eventid.sum())
        c = Coordinates(x=year_id, y=y )
        coord_List.append(c.dict())
    AttackData = AttackDataItem(id = attacktype, data = coord_List)
    
    AttackData_List.append(AttackData.dict())
    
CountryAttackData = CountryAttackDataItem(id = '', data =AttackData_List)
CountryAttackData_List.append(CountryAttackData.dict())    

#numbers by countries    
for country in countries:
    AttackData_List = []
    
    for attacktype in attacktypes:
        coord_List = []
        for year_id in years:
            if len(affiliated_yearly[(affiliated_yearly['id']==country)&(affiliated_yearly['iyear']==year_id)& (affiliated_yearly['affiliation']==attacktypes_names[attacktype])])>0:
                y =  affiliated_yearly.loc[(affiliated_yearly['id']==country)&(affiliated_yearly['iyear']==year_id)& (affiliated_yearly['affiliation']==attacktypes_names[attacktype]),'eventid'].iloc[0]
            else:
                y=0
            c = Coordinates(x=year_id, y=y )
            coord_List.append(c.dict())
        AttackData = AttackDataItem(id = attacktype, data = coord_List)
        AttackData_List.append(AttackData.dict())
        
    CountryAttackData = CountryAttackDataItem(id = country, data =AttackData_List)
    CountryAttackData_List.append(CountryAttackData.dict())

In [588]:
with open(PATH + 'data/json/attacksData.json', 'w') as f:
    json.dump(CountryAttackData_List, f)  