In [1]:
# gtd_app.ipynb

import pandas as pd
import chardet
from bokeh.io import output_file, output_notebook, show, curdoc
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Slider, CategoricalColorMapper, Select
from bokeh.layouts import row, column
from bokeh.models.widgets import Tabs, Panel
from collections import Counter

# --------------------------------------------------------------------------------------------------------------------- #
filevar = "globalterrorismdb_0718dist.csv"
with open(filevar, 'rb') as f:
    encoding_info = chardet.detect(f.read(100000))  
try: 
    df = pd.read_csv(filevar, index_col=0, encoding=encoding_info.get('encoding'))
except:
    print('File loading - Failed!')
    
# --------------------------------------------------------------------------------------------------------------------- #
def getGAS(yr):
    """
    Global Attack Statistics
    """
    year_temp = list()
    count_temp = list()
    country_temp = list()
    affected_temp = list()
    region_temp = list()   
    attack_temp = list()
    target_temp = list()
    month_temp = list()
    
    country_list_temp = df[df['iyear']==yr].country_txt.unique()
    for i in country_list_temp:
        temp_df = df.loc[(df['iyear']==yr) & (df['country_txt']==i), ['iyear','country_txt','nkill','nwound','region_txt',
                                                                      'attacktype1_txt','targtype1_txt','imonth']]
        temp_df['nkill'] = temp_df['nkill'].fillna(0)
        temp_df['nwound'] = temp_df['nwound'].fillna(0)
        
        country_counter = Counter(list(temp_df.country_txt))
        attack_counter = Counter(list(temp_df.attacktype1_txt))
        target_counter = Counter(list(temp_df.targtype1_txt))
        month_counter = Counter(list(temp_df.imonth))
        
        attack_val = dict(attack_counter.most_common(1))
        target_val = dict(target_counter.most_common(1))
        month_val  = dict(month_counter.most_common(1))

        year_temp.append(yr)
        country_temp.append(i)
        affected_temp.append(int(sum(temp_df.nkill) + sum(temp_df.nwound)))
        count_temp.append(int(country_counter[i]))
        region_temp.append(list(temp_df.region_txt)[0])
        attack_temp.append(list(attack_val.keys())[0])
        target_temp.append(list(target_val.keys())[0])    
        month_temp.append(list(month_val.keys())[0])

    data_temp = {
        'year'     : year_temp,
        'count'    : count_temp,
        'country'  : country_temp,
        'affected' : affected_temp,
        'region'   : region_temp,
        'attack'   : attack_temp,
        'target'   : target_temp,
        'month'    : month_temp
    }   
    return data_temp


def getCAT(country):
    """
    Country Attack Trend
    """
    yr_list_temp = df['iyear'][df['country_txt'] == country].unique()
    year_temp = list()
    fatal_temp = list()
    injured_temp = list()
    captured_temp = list()
    count_temp = list()
    for i in yr_list_temp:
        year_temp.append(i)
        fatal_temp.append(sum((df['nkill'][(df['country_txt'] == country) & (df['iyear']==i)].fillna(0)).astype(int)))
        injured_temp.append(sum((df['nwound'][(df['country_txt'] == country) & (df['iyear']==i)].fillna(0)).astype(int)))
        captured_temp.append(max((df['nperpcap'][(df['country_txt'] == country) & (df['iyear']==i)].fillna(-99)).astype(int)))        
        count_counter = Counter((df['iyear'][(df['country_txt'] == country) & (df['iyear']==i)]))
        count_temp.append(list(count_counter.values()))

    data_temp = {
        'year'     : year_temp,
        'fatal'    : fatal_temp,
        'injured'  : injured_temp,
        'perpcap'  : captured_temp,
        'count'    : count_temp
    }
    return data_temp


def getmonth(country):
    count_counter = Counter((df['imonth'][df['country_txt']==country]))
    new_dict = {x:y for x,y in count_counter.items() if x!=0}
    month_txt = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sept', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
    new_counter = dict((month_txt[key], value) for (key, value) in new_dict.items())
    month_temp = list(new_counter.keys())
    count_temp = list(new_counter.values())
    data_temp = {
        'month_2'    : month_temp,
        'count_2'    : count_temp
    }    
    return data_temp


def getcity(country, yr):
    """
    Country Attack Trend : Top terrorized cities per country
    """
    city_temp = Counter(df['city'][(df['country_txt'] == country) & (df['iyear'] == yr)])
    city_top = dict(city_temp.most_common(10))
    fatal_temp = list()
    injured_temp = list()
    weapon_temp = list()
    target_temp = list()
    group_temp = list()
    
    for i in list(city_top.keys()):
        fatal_temp.append(sum(df['nkill'][(df['country_txt']==country) & (df['iyear']==yr) & (df['city']==i)].fillna(0)))
        injured_temp.append(sum(df['nwound'][(df['country_txt']==country) & (df['iyear']==yr) & (df['city']==i)].fillna(0)))
        
        weap_counter = Counter(df['weaptype1_txt'][(df['country_txt']==country) & (df['iyear']==yr) & (df['city']==i)])
        weap_top = dict(weap_counter.most_common(3))
        weapon_temp.append(list(weap_top.keys()))
        
        targ_counter = Counter(df['targtype1_txt'][(df['country_txt']==country) & (df['iyear']==yr) & (df['city']==i)])
        targ_top = dict(targ_counter.most_common(3))
        target_temp.append(list(targ_top.keys()))
        
        group_counter = Counter(df['gname'][(df['country_txt']==country) & (df['iyear']==yr) & (df['city']==i)])
        group_top = dict(group_counter.most_common(3))
        group_temp.append(list(group_top.keys()))
        
    data_temp = {
        'x_1': list(city_top.keys()),
        'y_1': list(city_top.values()),
        'fatal_1'   : fatal_temp, 
        'injured_1' : injured_temp,
        'weapon_1'  : weapon_temp,
        'target_1'  : target_temp,
        'group_1'   : group_temp
    }      
    return city_top, data_temp


def getperpsdata(perp_list):
    """
    Create the Perpetrator data
    """
    group_temp = list()
    count_temp = list()
    affected_temp = list()
    nperps_temp = list()
    yr_temp = list()
    region_temp = list()
    success_temp = list()
    unsuccess_temp = list()
    country_temp = list()
    weap_temp = list()
    attack_temp = list()
    targ_temp = list()
    natl_temp = list()
    for i in perp_list:
        c_temp = df.loc[df['gname'] == i, ['gname','iyear','region_txt','success','nperps','nkill','nwound','country_txt','natlty1_txt','weaptype1_txt','attacktype1_txt','targtype1_txt']]
        g = Counter(list(c_temp.gname))
        l = list(g.values())
        y = Counter(list(c_temp.iyear))
        r = Counter(list(c_temp.region_txt))
        s = Counter(list(c_temp.success))
        n = max(c_temp.nperps.fillna(-99))
        a = sum(c_temp.nkill.fillna(0)) + sum(c_temp.nwound.fillna(0))
        
        group_temp.append(i)
        count_temp.append(l)
        yr_temp.append(len(y))
        region_temp.append(len(r))
        success_temp.append(round((s[1]/l[0])*100,2))
        unsuccess_temp.append(round((s[0]/l[0])*100,2))
        nperps_temp.append(n)
        affected_temp.append(a)
        
        c = Counter(c_temp.country_txt).most_common(3)
        w = Counter(c_temp.weaptype1_txt).most_common(3)
        k = Counter(c_temp.attacktype1_txt).most_common(3)
        t = Counter(c_temp.targtype1_txt).most_common(3)
        o = Counter(c_temp.natlty1_txt).most_common(3)

        country_temp.append(c)
        weap_temp.append(w)
        attack_temp.append(k)
        targ_temp.append(t)
        natl_temp.append(o)

    data_temp = {
        'group'     : group_temp,
        'count'     : count_temp,
        'affected'  : affected_temp,
        'gcount'    : nperps_temp,
        'yr'        : yr_temp,
        'region'    : region_temp,
        'success'   : success_temp,
        'unsuccess' : unsuccess_temp,
        'country'   : country_temp,
        'weapon'    : weap_temp,
        'attack'    : attack_temp,
        'target'    : targ_temp,
        'natl'      : natl_temp
    }
    return pd.DataFrame(data_temp)
    

def getperpstat(crit, perp_data):
    """
    Perpetrator Statistics
    """
    if crit == 'With most fatalities':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['affected'])
    elif crit == 'Largest in number/ count':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['gcount'])
    elif crit == 'Most frequent attacks':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['count'])
    elif crit == 'Longest running':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['yr'])      
    elif crit == 'Present in most continents':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['region'])       
    elif crit == 'Highest success % on attacks':
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['success'])     
    else:
        x_bar = list(perp_data['group'])
        y_bar = list(perp_data['unsuccess'])   
    data_temp = {
        'x_bar' : x_bar,
        'y_bar' : y_bar,
        'country': list(perp_data['country']),
        'weapon': list(perp_data['weapon']),
        'attack': list(perp_data['attack']),
        'target': list(perp_data['target']),
        'natl'  : list(perp_data['natl'])
    }
    return x_bar, y_bar, data_temp


def update_plot_a(attr, old, new):
    yr = slider_a.value
    new_data = getGAS(yr)
    cds_data_a.data = new_data   
    plot_a.x_range.start = min(cds_data_a.data['affected']) - 10
    plot_a.x_range.end = max(cds_data_a.data['affected']) + 10
    plot_a.y_range.start = min(cds_data_a.data['count']) - 10
    plot_a.y_range.end = max(cds_data_a.data['count']) + 10


def update_plot_b(attr, old, new):   
    country = country_select_b.value
    new_data = getCAT(country)
    cds_data_b.data = new_data
    
    yr = slider_b.value
    top10_cities, new_city = getcity(country, yr)
    cds_data_b1.data = new_city
    plot_b1_cat = list(top10_cities.keys())
    plot_b1.x_range.factors = plot_b1_cat
    
    new_data_2 = getmonth(country)
    cds_data_b2.data = new_data_2


def update_plot_c(attr, old, new):
    perp_val = perp_select.value
    x_bar, y_bar, new_data = getperpstat(perp_val, perp_data)
    cds_data_c.data = new_data
    if (perp_select.value == 'Highest success % on attacks') or (perp_select.value == 'Lowest success % on attacks'):
        new_cat = sorted(x_bar, key=lambda x:y_bar[x_bar.index(x)], reverse=True)
    else:
        new_cat = sorted(x_bar, key=lambda x:y_bar[x_bar.index(x)], reverse=True)[:10]
    plot_c.x_range.factors = new_cat


# --------------------------------------------------------------------------------------------------------------------- #
# BASIC GLOBAL ATTACK STATISTICS #
# ------------------------------ #

# Initial Data (1970)
data_a = getGAS(1970)
cds_data_a = ColumnDataSource(data_a)

# Prepare the graph
color_mapper_a = CategoricalColorMapper(factors=['Central America & Caribbean', 'North America', 'Southeast Asia', 'Western Europe', 'East Asia', 'South America', 'Eastern Europe', 'Sub-Saharan Africa', 'Middle East & North Africa', 'Australasia & Oceania', 'South Asia', 'Central Asia'], 
                                        palette=['#851e3e','#3d1e6d','#4a4e4d','#83d0c9','#d0e1f9','green','red','pink','yellow','brown','blue','orange'])
xmin, xmax = min(cds_data_a.data['affected'])-10, max(cds_data_a.data['affected'])+10
ymin, ymax = min(cds_data_a.data['count'])-10, max(cds_data_a.data['count'])+10
plot_a = figure(title = 'Global Attack Statistics', x_range=(xmin, xmax), y_range=(ymin, ymax), plot_height=500, plot_width=1200)
plot_a.xaxis.axis_label = 'Total number of people affected (killed + injured)'
plot_a.yaxis.axis_label = 'Total number of incident attacks'
plot_a.circle(x='affected', y='count', legend_group='region', source=cds_data_a, alpha=0.8, radius=1.5, color=dict(field='region', transform=color_mapper_a))
plot_a.legend.location = 'top_right'
plot_a.legend.background_fill_alpha = 0

# Add hover guides
hover_a = HoverTool(tooltips=[('Country', '@country'), ('Year', '@year'), ('Region', '@region'), ('Attack Count', '@count'), ('Affected Count', '@affected'), ('Frequent Strategy', '@attack'), ('Frequent Target', '@target'), ('Frequent Attacked Month', '@month')])
plot_a.add_tools(hover_a)

# Add slider with year-value
slider_a = Slider(start=1970, end=2017, step=1, value=1970, title='Year (1970 till 2017, except 1993)')
slider_a.on_change('value', update_plot_a)


# --------------------------------------------------------------------------------------------------------------------- #
# COUNTRY ATTACK TREND #
# -------------------- #

# Initial Data (Philippines)
data_b = getCAT('Philippines')
cds_data_b = ColumnDataSource(data_b)
top10_cities, data_b1 = getcity('Philippines', 2017)
cds_data_b1 = ColumnDataSource(data_b1)
data_b2 = getmonth('Philippines')
cds_data_b2 = ColumnDataSource(data_b2)

# Prepare the graph
plot_b = figure(title='Country Attack Trend', plot_height=400, plot_width=700)
plot_b.diamond(x='year', y='count', source=cds_data_b, color='green', alpha=0.5, size=15, legend_label='Attack Count')
plot_b.circle(x='year', y='fatal', source=cds_data_b, color='firebrick', alpha=0.5, size=15, legend_label='Fatal')
plot_b.square(x='year', y='injured', source=cds_data_b, color='navy', alpha=0.5, size=10, legend_label='Injured')
plot_b.triangle(x='year', y='perpcap', source=cds_data_b, color='olive', alpha=0.5, size=10, legend_label='Perpetrators Captured')
plot_b.legend.location = 'top_left'
plot_b.legend.background_fill_alpha = 0.2
plot_b.line(x='year', y='count', source=cds_data_b, color='green', line_width=2, line_dash=[4, 4])
plot_b.line(x='year', y='fatal', source=cds_data_b, color='firebrick', line_width=2, line_dash=[4, 4])
plot_b.line(x='year', y='injured', source=cds_data_b, color='navy', line_width=2, line_dash=[4, 4])
plot_b.line(x='year', y='perpcap', source=cds_data_b, color='olive', line_width=2, line_dash=[4, 4])
plot_b.xaxis.axis_label = 'Year (1970 till 2017: no 1993 data)'
plot_b.yaxis.axis_label = 'Total Count'

plot_b1_cat = list(top10_cities.keys())
plot_b1 = figure(x_range=plot_b1_cat, plot_height=400, plot_width=500, title='Top 10 cities with most attacks per year')
plot_b1.vbar(x='x_1',top='y_1',width=0.5,source=cds_data_b1)
plot_b1.xaxis.major_label_orientation = 0.7
plot_b1.yaxis.axis_label = 'Incident Attack Count'

plot_b2 = figure(title='Attack Month Statistics (Timeline: 1970 - 2017)', plot_height=300, plot_width=700, x_range=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec'])
plot_b2.vbar(x='month_2', top='count_2', width=0.5, source=cds_data_b2, alpha=0.5, color='green')
plot_b2.yaxis.axis_label = 'Total Attack Count'
plot_b2.xaxis.axis_label = 'Month'

# Add hover guides
hover_b = HoverTool(tooltips=[('Year', '@year'), ('No. of attacks', '@count'), ('Fatal','@fatal'), ('Injured', '@injured'), ('Attackers Captured', '@perpcap')])
plot_b.add_tools(hover_b)

hover_b1 = HoverTool(tooltips=[('Total incident attacks', '@y_1'), ('Fatalities', '@fatal_1'), ('Injured', '@injured_1'), ('Frequent weapon', '@weapon_1'), ('Frequent target', '@target_1'), ('Frequent terrorists', '@group_1')])
plot_b1.add_tools(hover_b1)

hover_b2 = HoverTool(tooltips=[('Month', '@month_2'), ('Count', '@count_2')])
plot_b2.add_tools(hover_b2)

# Create the dropdown list
country_drop_list_b = sorted(list(df.country_txt.unique()))
country_select_b = Select(title='Country List', options=country_drop_list_b, value='Philippines')
country_select_b.on_change('value', update_plot_b)

# Add slider with year-value
slider_b = Slider(start=1970, end=2017, step=1, value=2017, title='Year (1970 till 2017, except 1993)')
slider_b.on_change('value', update_plot_b)


# --------------------------------------------------------------------------------------------------------------------- #
# PERPETRATOR STATISTICS #
# ---------------------- #

# Initial Data (With most fatalities)
perp_list = list(df.gname.unique())
perp_data = getperpsdata(perp_list)
x_bar, y_bar, data_c = getperpstat('With most fatalities', perp_data)
cds_data_c = ColumnDataSource(data_c)

# Prepare the graph
sorted_cat = sorted(x_bar, key=lambda x:y_bar[x_bar.index(x)], reverse=True)[:10]
plot_c = figure(x_range=sorted_cat, title='Top Perpetrator Statistics', plot_width=900)
plot_c.vbar(x='x_bar', top='y_bar', width=0.5, alpha=0.5, color='green', source=cds_data_c)
plot_c.xaxis.major_label_orientation = 0.7

# Add hover guides
hover_c = HoverTool(tooltips=[('Total Count', '@y_bar'), ('Most attacked country', '@country'), ('Frequent weapon', '@weapon'), ('Frequent attack', '@attack'), ('Frequent targets', '@target'), ('Most attacked nationals', '@natl')])
plot_c.add_tools(hover_c)

# Create the dropdown list
perp_select_list = ['With most fatalities', 'Largest in number/ count', 'Most frequent attacks',
                    'Longest running', 'Present in most continents', 'Highest success % on attacks',
                    'Lowest success % on attacks']
perp_select = Select(title='Statistics List (per top 10)', value='With most fatalities', 
                    options=perp_select_list)
perp_select.on_change('value', update_plot_c)


# --------------------------------------------------------------------------------------------------------------------- #
tab1 = Panel(title='Global Statistics', child=column(plot_a, slider_a))
tab2 = Panel(title='Country Statistics', child=column(country_select_b, row([column([plot_b, plot_b2]), column([plot_b1, slider_b])])))
tab3 = Panel(title='Perpetrator Statistics', child=column(perp_select, plot_c))

tabs = Tabs(tabs=[tab1, tab2, tab3])
curdoc().clear()
curdoc().add_root(tabs)


  interactivity=interactivity, compiler=compiler, result=result)
