# Exploratory Data Analysis - Global Terrorism



#### By Venkata Hemanth Tammareddy

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
data = pd.read_csv('C:/Users/HEMANTH/Desktop/Analytics projects/Global Terrorism.csv')
data.head()

In [None]:
data.shape

In [None]:
data.isnull().any()

In [None]:
data.nunique()

In [None]:
pd.set_option('display.max_columns', 135)
data.head()

## Cleaning the Data

In [None]:
data.columns.values

## Renaming important column values


In [None]:
data.rename(columns={'iyear':'Year','imonth':'Month','iday':"day",'gname':'Group','country_txt':'Country','region_txt':'Region',
                     'provstate':'State','city':'City','latitude':'latitude','longitude':'longitude','summary':'summary',
                     'attacktype1_txt':'Attacktype','targtype1_txt':'Targettype','weaptype1_txt':'Weapon','nkill':'kill',
                     'nwound':'Wound'},inplace=True)

In [None]:
data = data[['Year','Month','day','Country','State','Region','City','latitude','longitude',"Attacktype",'kill',
               'Wound','target1','summary','Group','Targettype','Weapon','motive']]
data.head()

In [None]:
data.info()

In [None]:
data.describe()

## Data Visualization

## Correlation between the features

In [None]:
data.corr()

In [None]:
plt.figure(figsize=(15,10))
sns.heatmap(data.corr(), annot= True)
plt.show()

In [None]:
data.isnull().sum()

## General Analysis


In [None]:
print("Country with most attacks: ",data["Country"].value_counts().idxmax())
print("City with most attacks: ",data["City"].value_counts().index[1])
print("Region with most attacks: ",data["Region"].value_counts().idxmax())
print("Year with most attacks: ",data["Year"].value_counts().idxmax())
print("Month with most attacks: ",data["Month"].value_counts().idxmax())
print("Group with most attacks: ",data["Group"].value_counts().index[1])
print("Most Attack Types: ",data["Attacktype"].value_counts().idxmax())

## Exploratory Data Analysis

## 1. Year Vs Attacks

In [None]:
year = data['Year'].unique()
years_count = data['Year'].value_counts(dropna = False).sort_index()
plt.figure(figsize = (15,10))
sns.barplot(x = year, y = years_count)
plt.xticks(rotation = 90)
plt.xlabel('Attacking Year')
plt.ylabel('Number of Attacks Each Year')
plt.title('Attacks In Years')
plt.show()

## 2. Top Affected Countries

In [None]:
plt.subplots(figsize=(20,10))
sns.barplot(data['Country'].value_counts()[:10].index,data['Country'].value_counts()[:10].values)
plt.title('Top Countries Affected')
plt.xlabel('Countries')
plt.ylabel('Count')
plt.xticks(rotation = 90)
plt.show()

## 3. Top affected Cities


In [None]:
plt.subplots(figsize=(20,10))
sns.barplot(data['City'].value_counts()[:10].index,data['City'].value_counts()[:10].values,palette='YlOrBr_r')
plt.title('Top Cities Affected')
plt.xlabel('City')
plt.ylabel('Count')
plt.xticks(rotation = 90)
plt.show()

## 4. Activity each year

In [None]:
pd.crosstab(data.Year, data.Region).plot(kind='area',stacked=False,figsize=(20,10))
plt.title('Terrorist Activities By Region In Each Year',fontsize=25)
plt.ylabel('Number of Attacks',fontsize=20)
plt.xlabel("Year",fontsize=20)
plt.show()

## 5. Attack Types


In [None]:
# Pie Plot of AttackTypes
plt.figure(figsize=(15,15))
data['Attacktype'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()

## 6. Target Types


In [None]:
# Pie Plot of TargetTypes
plt.figure(figsize=(20,20))
data['Targettype'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()

## 7. Weapon Types


In [None]:
# Pie Plot of WeaponTypes
plt.figure(figsize=(20,20))
data['Weapon'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()

In [None]:
df = data[['Year','kill']].groupby(['Year']).sum()
df.head()

## 8. Deaths in Years


In [None]:
# Killed vs Year
plt.figure(figsize = (15,7))
data.groupby(['Year'])['kill'].sum().sort_values(ascending = False).head(20).plot(kind = 'bar', colormap = 'PRGn')
plt.xticks(rotation=90)
plt.title('No. of people killed Year wise')
plt.ylabel("Killed")
plt.show()

## 9. Injured vs Year

In [None]:
plt.figure(figsize = (15,7))
data.groupby(['Year'])['Wound'].sum().sort_values(ascending = False).head(20).plot(kind = 'bar', colormap = 'seismic')
plt.xticks(rotation=90)
plt.title('No. of people Injured/yr')
plt.ylabel("Injured")
plt.show()

## 10. Killed vs Country

In [None]:
plt.figure(figsize = (15,7))
data.groupby(['Country'])['kill'].sum().sort_values(ascending = False).head(10).plot(kind = 'bar', colormap = 'PRGn')
plt.xticks(rotation=90)
plt.title('No. of people killed Country wise')
plt.ylabel("Killed")
plt.show()

## 11. Killed vs Region

In [None]:
plt.figure(figsize = (15,7))
data.groupby(['Region'])['kill'].sum().sort_values(ascending = False).head(10).plot(kind = 'bar', colormap = 'seismic')
plt.xticks(rotation=90)
plt.title('No. of people killed Region wise')
plt.ylabel("Killed")
plt.show()

## 12. Wounded vs Region

In [None]:
plt.figure(figsize = (15,7))
data.groupby(['Region'])['Wound'].sum().sort_values(ascending = False).head(10).plot(kind = 'bar', colormap = 'PRGn')
plt.xticks(rotation=90)
plt.title('No. of people wounded Region wise')
plt.ylabel("Wounded")
plt.show()

## 13. Attacks VS Killed

In [None]:
count_terror = data['Country'].value_counts()[:15].to_frame()
count_terror.columns=['Attacks']
count_kill=data.groupby ('Country')['kill'].sum().to_frame()
count_terror.merge(count_kill,left_index = True,right_index =True,how='left').plot.bar(width=0.9)
fig=plt.gcf()
plt.title("Attacks VS Killed", fontsize=20)
fig.set_size_inches(16,4)
plt.show()

In [None]:
df=data[['Group','Country','kill']]
df=df.groupby(['Group','Country'],axis=0).sum().sort_values('kill',ascending=False).drop('Unknown').reset_index().head(10)
df

## Terror Groups

In [None]:
plt.figure(figsize = (15,7))
sns.barplot(x = data['Group'].value_counts()[1:11].values, y = data['Group'].value_counts()[1:11].index,palette='magma')
plt.title('Top 10 Terrorist Organisation with Highest Terror Attacks')
plt.ylabel('Terror Group Name')
plt.xlabel('Number of Attacks')
plt.xticks(rotation= 90)
plt.show()

## Data Analysis on Customised data


### Terrorist attack of a particular year


In [None]:
!pip install folium
import folium

from folium.plugins import MarkerCluster

In [None]:
filterYr = data["Year"] == 2015

In [None]:
filterData = data[filterYr]
reqFields = filterData.loc[:,"City":"longitude"]
reqFields = reqFields.dropna()
reqFieldsLst = reqFields.values.tolist()

In [None]:
map = folium.Map(location=[0,30],tiles="CartoDB positron", zoom_start=2)

markerCluster = folium.plugins.MarkerCluster().add_to(map)
 
for point in range(len(reqFieldsLst)):
    folium.Marker(location=[reqFieldsLst[point][1],reqFieldsLst[point][2]],
                            popup=reqFieldsLst[point][0]).add_to(markerCluster)
    
map

## Conclusion and Results :


#### Country with the most attacks: Iraq

#### City with the most attacks: Baghdad

#### Region with the most attacks:Middle East & North Africa

#### Year with the most attacks: 2014

#### Month with the most attacks: 5

#### Group with the most attacks: Taliban

#### Most Attack Types: Bombing/Explosion