In [None]:
#Standard imports 

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import json
import requests as r
import squarify
from scipy.stats import ttest_ind_from_stats
%matplotlib inline

In [None]:
#Reference file path

# Download file from:  https://enveraconsulting.box.com/s/c3t6elxu7ngj3ywfs6to2a75arfsbgkv

file = '/Users/gta/Desktop/globalterrorismdb_0617dist.csv'
presidents = 'http://bit.ly/001-presidents'
gdp_filepath = 'http://bit.ly/001-gdp'
country_codes = 'http://bit.ly/001-codes'

In [None]:
#Read CSV into dataframe.  Forced encoding to 'cp1252 due to error.

df = pd.read_csv(file, encoding='cp1252', low_memory=False, skipinitialspace=True)
df_presidents = pd.read_csv(presidents, skipinitialspace=True)
df_gdp = pd.read_csv(gdp_filepath, skipinitialspace=True)
df_codes = pd.read_csv(country_codes, skipinitialspace=True)

In [None]:
df.shape

In [None]:
#Select a portion of the columns and send them to a new dataframe

df = df[['iyear',
 'imonth',
 'iday',
 'country_txt',
 'region_txt',
 'city',
 'latitude',
 'longitude',
 'location',
 'success',
 'suicide',
 'attacktype1_txt',
 'targtype1_txt',
 'natlty1_txt',
 'gname',
 'weaptype1_txt',
 'nkill',
 'nwound']]

In [None]:
#Generate dictionary needed to rename columns

col_name = {'iyear': 'year',
 'imonth': 'month',
 'iday': 'day',
 'country_txt': 'country',
 'region_txt': 'region',
 'city': 'city',
 'latitude': 'latitude',
 'longitude': 'longitude',
 'location': 'location',
 'success': 'success',
 'suicide': 'sucide',
 'attacktype1_txt': 'type_of_attack',
 'targtype1_txt': 'type_of_target',
 'natlty1_txt': 'nationality',
 'gname': 'group_name',
 'weaptype1_txt': 'type_of_weapon',
 'nkill': 'num_killed',
 'nwound': 'num_wounded'}

#Rename columns based on dict and display head

df = df.rename(columns=col_name)
df.columns.tolist()

In [None]:
df['num_killed'].fillna(value = 0, inplace=True)
df['num_wounded'].fillna(value = 0, inplace=True)

In [None]:
df['num_killed_wounded'] = df['num_killed'] + df['num_wounded']

In [None]:
df_presidents.columns.tolist()

In [None]:
col_name = {'country_code': 'country'}
df_gdp = df_gdp.rename(columns=col_name)
df_gdp.columns.tolist()

In [None]:
col_name = {'Name': 'country', 'Code': 'code'}
df_codes = df_codes.rename(columns=col_name)
df_codes.columns.tolist()

In [None]:
df_president_main = pd.merge(df, df_presidents, on='year', how='inner')

In [None]:
df_president_main_codes = pd.merge(df_president_main, df_codes, on='country', how='inner')

In [None]:
df_president_main_codes['helper'] = df_president_main_codes['code'] + df_president_main_codes['year'].map(str)

In [None]:
df_gdp['helper'] = df_gdp['country'] + df_gdp['gdp_year'].map(str)  

In [None]:
df_gdp.columns.tolist()

In [None]:
df_gdp = df_gdp.drop(['country'], axis = 1)

In [None]:
df_gdp.columns.tolist()

In [None]:
df_combined = pd.merge(df_president_main_codes, df_gdp, on='helper', how='inner')

In [None]:
df_combined = df_combined.drop(['gdp_year'], axis = 1)

In [None]:
df_combined['party'] = df_combined['party'].str.strip()

In [None]:
df_combined.shape

In [None]:
df_combined.head()

In [None]:
new_df = df_combined[['region', 'year', 'success']].groupby(['region', 'year'], as_index=False).sum()
region_list = set(new_df.region.values.tolist())

In [None]:
fig, ax = plt.subplots(figsize = (15, 7))
for region in region_list:
    new_df[new_df['region'] == region].plot(x='year', y='success', ax=ax, label=region)
plt.show()

In [None]:
df_party_ct_gb = df_combined[['party', 'success', 'num_killed_wounded']].groupby('party', as_index = False).count()
df_party_ct_gb.iloc[0]

In [None]:
df_party_sum_gb = df_combined[['party', 'success', 'num_killed_wounded']].groupby('party', as_index = False).sum()
df_party_sum_gb

In [None]:
dem_kw = df_party_sum_gb.iloc[0]['num_killed_wounded'] / df_party_ct_gb.iloc[0]['num_killed_wounded']
dem_succcess = df_party_sum_gb.iloc[0]['success'] / df_party_ct_gb.iloc[0]['success']
rep_kw = df_party_sum_gb.iloc[1]['num_killed_wounded'] / df_party_ct_gb.iloc[1]['num_killed_wounded']
rep_success = df_party_sum_gb.iloc[1]['success'] / df_party_ct_gb.iloc[1]['success']

In [None]:
print('Democratic Killed or Wounded', dem_kw)
print('Republican Killed or Wounded', rep_kw)
print('----')
print('Democratic Sucessful Attacks', dem_succcess)
print('Republican Sucessful Attacks', rep_success)

In [None]:
x = np.arange(4)
money = [1.5e5, 2.5e6, 5.5e6, 2.0e7]

In [None]:
plt.figure(figsize = (9, 5))
x = np.arange(2)
kw_values = [dem_kw, rep_kw]
plt.bar(x, kw_values)
plt.xticks(x, ('Dem', 'Rep'))
plt.xlabel('Party', fontsize = 14)
plt.ylabel('Number Killed \nor\n Wounded \n (normalized per event)', fontsize = 14)
plt.title('Number of People Killed or Wounded As a Function of US President', fontsize = 14)
for a, b in zip(x, kw_values):
    plt.text(a, b, str(b))
plt.show()

In [None]:
dem_success = df_combined[df_combined['party'] == 'Democratic']['success'].tolist()
rep_success = df_combined[df_combined['party'] == 'Republician']['success'].tolist()

In [None]:
dem_success = df_combined[df_combined['party'] == 'Democratic']
dem_success.describe()['success']

In [None]:
rep_success = df_combined[df_combined['party'] == 'Republican']
rep_success.describe()['success']

In [None]:
mean1 = rep_success.describe()['success']['mean']
std1 = rep_success.describe()['success']['std']
n1 = rep_success.describe()['success']['count']
mean2 = dem_success.describe()['success']['mean']
std2 = dem_success.describe()['success']['std']
n2 = dem_success.describe()['success']['count']

In [None]:
tstat, pvalue = ttest_ind_from_stats(mean1, std1, n1, mean2, std2, n2)
pvalue

In [None]:
tstat

In [None]:
df_combined_gb = df_combined[['success', 'type_of_weapon','region']].groupby(['region', 'type_of_weapon'], as_index = True).sum()

In [None]:
labels = df_combined_gb.loc['Australasia & Oceania'].index.values.tolist()
values = df_combined_gb.loc['Australasia & Oceania']['success'].tolist()
plt.pie(values, labels = None)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), labels = labels)
plt.title('Mode of Attack for Australasia & Oceania')
plt.axis('equal')
plt.show()

In [None]:
df_bomb = df_combined[df_combined['type_of_attack'] == 'Bombing/Explosion']
df_bomb_gb = df_bomb[['year', 'success']].groupby('year', as_index = False).sum()
x = df_bomb_gb['year']
y = df_bomb_gb['success']
plt.plot(x, y)
plt.show()

In [None]:
df_combined_ct = pd.crosstab(df_combined.year, df_combined.type_of_attack)
df_combined_ct.plot(figsize=(15,8), alpha = 1, linewidth=3)
plt.ylabel('Number of Successful Attacks', fontsize = 14)
plt.xlabel('Year', fontsize = 14)
plt.title('Number of Attacks Per Year', fontsize = 14)
plt.show()

In [None]:
#Determine the number of sucessful attacks by country

df_combined_country_gb = df[['success', 'country']].groupby(['country'], as_index = False).sum()
df_combined_country_gb.sort_values('success', ascending=False).head()

In [None]:
df_combined_country_gb.sort_values('success', ascending=False).tail()

In [None]:
#Determine the total number of attacks by type.

df_combined_type_gb = df_combined[['type_of_attack', 'success']].groupby(['type_of_attack'], as_index =False).count()
df_combined_type_gb = df_combined_type_gb.sort_values(by=['success'], ascending=False)

In [None]:
plt.figure(figsize=(6, 6))
sns.barplot(x = 'type_of_attack', y = 'success', data = df_combined_type_gb)
plt.xticks(rotation=90)
plt.xlabel('Type of Attack', fontsize = 14)
plt.ylabel('Number of Attacks', fontsize = 14)
plt.title('Number of Attacks by Type', fontsize = 14)
plt.savefig('Figure2.png')
plt.grid(True)
plt.show()

In [None]:
df_kidnapping = df_combined[df_combined['type_of_attack'] == 'Hostage Taking (Kidnapping)']
df_kidnapping = df_kidnapping[['country', 'success']].groupby(['country']).count()

In [None]:
df_bombing = df_combined[df_combined['type_of_attack'] == 'Bombing/Explosion']
df_bombing = df_bombing[['type_of_attack', 'region']].groupby('region', as_index=False).count()

In [None]:
labels = df_bombing['region'].tolist()
sizes = df_bombing['type_of_attack'].tolist()

In [None]:
squarify.plot(sizes=sizes, label=labels, alpha=.4 )
plt.axis('off')
plt.title('Type of Attack By Region (Bombing/Explosions)')
plt.show()

In [None]:
len(df_bombing['region'].tolist())

In [None]:
df_kidnapping = df_combined[df_combined['type_of_attack'] == 'Hostage Taking (Kidnapping)']
df_kidnapping = df_kidnapping[['country', 'success']].groupby(['country'], as_index=False).count()

In [None]:
df_kidnapping = df_kidnapping.sort_values('success', ascending=False).head(10)
df_kidnapping

In [None]:
df_target_gb = df_combined[['type_of_target', 'success']].groupby(['type_of_target'], as_index =False).count()
df_target_gb.sort_values(by=['success'], ascending=False).head(10)

In [None]:
#Calculate the total number of attacks by year

df_year_gb = df_combined[['year', 'success']].groupby(['year'], as_index = False).sum()
df_year_10 = df_year_gb.sort_values(by=['success'], ascending = False).head(10)

In [None]:
plt.figure(figsize=(6, 6))
sns.set_style('whitegrid')
ax = sns.barplot(x='year', y='success', data = df_year_10)
plt.grid(True)
plt.xlabel('Year', fontsize = 14)
plt.ylabel('Total Number of Attacks', fontsize = 14)
plt.title('Years With Most Terror Attacks', fontsize = 14)
plt.show()

In [None]:
#Calculate the total number of attacks by month

df_month_gb = df_combined[['month', 'success']].groupby(['month'], as_index = False).sum()
df_month_gb = df_month_gb.sort_values(by=['success'], ascending = False)
df_month_gb 

In [None]:
#Calculate the total number of attacks by region

df_country_gb = df_combined[['region', 'success']].groupby(['region'], as_index = False).sum()
df_country_gb.sort_values(by=['success'], ascending = False).head()

In [None]:
df_columbia = df_combined[df_combined['country'] == 'Colombia']
df_columbia.columns.tolist()

In [None]:
df_columbia[['success', 'type_of_attack']].groupby('type_of_attack', as_index = False).count()

In [None]:
df_columbia = df_columbia[['year', 'success']].groupby(['year'], as_index = False).sum()

In [None]:
plt.figure(figsize = (5, 5))
x = df_columbia['year']
y = df_columbia['success']
plt.plot(x, y)
plt.show()

In [None]:
df_columbia = df_combined[df_combined['country'] == 'Colombia']

df_columbia = df_columbia[['year', 'success', 'gdp_value']].groupby(['year'],as_index=False).sum()
df_columbia

In [None]:
x = df_columbia['year']
y1 = df_columbia['success']
y2 = df_columbia['gdp_value']
fig, ax1 = plt.subplots(figsize = (8, 8))
ax1.plot(x, y1, c='r', linewidth = 3)
ax1.set_ylabel('Number of Sucessful Attacks \nIn Columbia', fontsize = 14)
ax2 = ax1.twinx()
ax2.plot(x, y2, c='b', linewidth = 3)
ax2.set_ylabel('GDP Values (US $)', fontsize = 14)
ax1.set_xlabel('Year', fontsize = 14)
plt.title('GDP Values and Number of Sucessful Attacks in Columbia', fontsize = 14)
plt.savefig('Figure3.png')
plt.legend()
plt.show()

In [None]:
df_iraq = df_combined[df_combined['country'] == 'Iraq'].reset_index(drop=True)
df_iraq.head()

In [None]:
df_iraq_year = df_iraq[['year', 'success']].groupby(['year'],as_index=False).sum()
df_iraq_year[df_iraq_year['year'] > 2000]
plt.figure(figsize=(10, 10))
x = df_iraq_year['year']
y = df_iraq_year['success']
plt.plot(x, y, alpha = 0.5)
plt.ylabel('Number of Successful Attacks Per Year', fontsize = 14)
plt.xlabel('Year', fontsize = 14)
plt.title('Number of Successful Attacks Per Year (Iraq)', fontsize = 14)
plt.xlim(1970, 2020)
plt.show()

In [None]:
df_iraq_city_gb = df_iraq[['city', 'success']].groupby('city', as_index=False).sum()
df_iraq_city_gb.sort_values(by=['success'], ascending=False).head()

In [None]:
x = df_iraq['year']
y = df_iraq['gdp_value']
plt.plot(x, y)
plt.show()

In [None]:
df_iraq_2000_2016 = df_iraq[df_iraq['year'] > 2000].reset_index(drop=True)
x = df_iraq_2000_2016['year']
y = df_iraq_2000_2016['gdp_value']

In [None]:
df_iraq_year = df_iraq[['year', 'success', 'gdp_value']].groupby(['year'],as_index=False).sum()
df_iraq_year_2000_2016 = df_iraq_year[df_iraq_year['year'] > 2000].reset_index(drop=True)
df_iraq_year_2000_2016 = df_iraq_year_2000_2016.fillna(0)

In [None]:
df_iraq_year_2000_2016

In [None]:
x = df_iraq_year_2000_2016['year']
y1 = df_iraq_year_2000_2016['success']
y2 = df_iraq_year_2000_2016['gdp_value']

fig, ax1 = plt.subplots(figsize = (8, 8))
ax1.plot(x, y1, c='r', linewidth = 3)
ax1.set_ylabel('Number of Sucessful Attacks In Iraq', fontsize = 14)
ax2 = ax1.twinx()
ax2.plot(x, y2, c='b', linewidth = 3)
ax2.set_ylabel('GDP Values', fontsize = 14)
ax1.set_xlabel('Year', fontsize = 14)
plt.title('GDP Values and Number of Sucessful Attacks in Iraq', fontsize = 14)
plt.legend()
plt.show()

In [None]:
df_iraq.corr()

In [None]:
df_iraq_attack_type = df_iraq[['type_of_attack', 'success']].groupby(['type_of_attack'], as_index=False).count()
df_iraq_attack_type.sort_values(by=['success'], ascending=False).reset_index(drop=True)

In [None]:
df_iraq[['city', 'success', 'year']].groupby(['city', 'year'], as_index = False).sum().sort_values(by='success', ascending=False).head()

In [None]:
df_baghdad = df_iraq[df_iraq['city'] == 'Baghdad']
df_baghdad_gb = df_baghdad[['group_name', 'success']].groupby('group_name', as_index=False).sum()
df_baghdad_gb.sort_values(by='success', ascending=False).head()

In [None]:
df_isis = df_iraq[df_iraq['group_name'] == 'Islamic State of Iraq and the Levant (ISIL)'].reset_index()
df_isis[['city', 'success']].groupby('city', as_index=False).sum().sort_values(by='success', ascending=False).head()

In [None]:
df_isis[['type_of_target', 'success']].groupby('type_of_target', 
                                               as_index=False).sum().sort_values(by='success', 
                                                                                 ascending=False).head()

In [None]:
plt.figure(figsize=(10, 10))
df_usa = df_combined[df_combined['country'] == 'United States']
df_usa_gb = df_usa[['year', 'success']].groupby('year', as_index=False).sum()
x = df_usa_gb['year']
y = df_usa_gb['success']
plt.xlabel('Year', fontsize = 14)
plt.ylabel('Number of Successful Terror Attacks', fontsize = 14)
plt.plot(x, y)
plt.show()

In [None]:
df_usa = df_combined[df_combined['country'] == 'United States']
df_usa[['city', 'success']].groupby('city', as_index=False).sum().sort_values(by='success', ascending=False).head()

In [None]:
df_losangeles = df_usa[df_usa['city'] == 'Los Angeles']
df_losangeles[['year', 
               'success']].groupby('year', 
                                           as_index=False).sum().sort_values(by='success', 
                                                                             ascending=False).head()
cross_la = pd.crosstab(df_losangeles.year, df_losangeles.type_of_attack)
cross_la.plot()
plt.show()

In [None]:
df_nyc = df_usa[df_usa['city'] == 'New York City']
df_nyc[['year', 
               'success']].groupby('year', 
                                           as_index=False).sum().sort_values(by='success', 
                                                                             ascending=False).head()
cross_nyc = pd.crosstab(df_nyc.year, df_nyc.type_of_attack)
cross_nyc.plot()
plt.show()

In [None]:
#http://jonathansoma.com/lede/data-studio/classes/small-multiples/long-explanation-of-using-plt-subplots-to-create-small-multiples/

plt.figure(figsize=(20, 40), facecolor = 'white')

plot_num = 1

for country_name, selection in df_combined.groupby('country'):
    ax = plt.subplot(15, 11, plot_num)
    selection.plot(x= 'year', y='success', ax=ax, label = country_name, legend = False)
    ax.set_title(country_name)
    plot_num += 1
plt.tight_layout()
plt.show()