# The Sparks Foundation - TASK 4 - EDA on Global Terrorism

In [None]:
#Importing the necessary libraries

import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

In [None]:
# Loading the data into the dataframe
df = pd.read_csv('https://bit.ly/2TK5Xn5')

In [None]:
#Viewing the first five rows of the dataframe
df.head()

In [None]:
# Viewing the last five rows of the dataset
df.tail()

In [None]:
# Total number of rows and columns in the dataset
df.shape

In [None]:
# Viewing the column names in a list 
list = df.columns.tolist()
print(list,sep =', ')

In [None]:
# Acquiring columns that are necessary for our analysis
df = df[['eventid','iyear','imonth','iday','country_txt','region_txt','provstate','city','latitude','longitude','success','suicide'
        ,'attacktype1_txt','targtype1_txt','target1','gname','motive','weaptype1_txt','nkill','nwound','propvalue']]

In [None]:
df.head()

In [None]:
# Checking for null values
df.isnull().sum()

In [None]:
# Filling null values in killed and wound columns and added to get new column - Casualties
df['nkill'] = df['nkill'].fillna(0)
df['nwound'] = df['nwound'].fillna(0)
df['Casualties'] = df['nkill'] + df['nwound']

In [None]:
# Renaming the columns for ease of access
df.rename(columns={'eventid':'Eventid','iyear':'Year','imonth':'Month','iday':'Day','country_txt':'Country','region_txt':'Region',
                  'provstate':'State','city':'City','latitude':'Latitude','longitude':'Longitude','success':'Success','suicide':
                  'Suicide','attacktype1_txt':'Attacktype','targtype1_txt':'Targtype','target1':'Target','gname':'Group','motive':
                  'Motive','weaptype1_txt':'Weapontype','nkill':'killed','nwound':'Wounded','propvalue':'Propvalue'},inplace = True)
df.head()

In [None]:
# Running EDD 
df.describe()

In [None]:
print("Total number of casualities: ",int(sum(df['Casualties'].dropna())))

In [None]:
print("Total number of killed: ",int(sum(df['killed'].dropna())))

In [None]:
print("Total number of wounded: ",int(sum(df['Wounded'].dropna())))

# Total number of Terrorist Activities per year

In [None]:
plt.subplots(figsize=(15, 6))
ax = sns.countplot(x="Year", data=df, palette="magma", edgecolor=sns.color_palette("dark", 10))
plt.xticks(rotation=90)

# Adding annotations to the chart
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='baseline', fontsize=12, color='black', xytext=(0, 5),
                textcoords='offset points',rotation = 90)

plt.show()

#### Inference

* Year with lowest Terrorist Activities : 1971.
* Year with highest Terrorist Activities : 2014.

# Total number of Casualties by year

In [None]:
plt.subplots(figsize=(18,6))
cas_by_year = df.groupby('Year').Casualties.sum().to_frame().reset_index()
cas_by_year.columns = ['Year','Casualties']
ax = sns.barplot(x = cas_by_year['Year'], y =cas_by_year['Casualties'],palette = "flare",edgecolor = sns.color_palette('dark',10))
plt.xticks(rotation = 90)

for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='baseline', fontsize=12, color='black', xytext=(0, 5),
                textcoords='offset points',rotation = 90)
plt.show()

#### Inference

* Year 1971 has the lowest number of casualties : 255.
* Year 2014 has the highest number of casualties : 85618.

# Total Number of people killed region-wise

In [None]:
plt.subplots(figsize = (18,6))
region_killed = df.groupby('Region').killed.sum().to_frame().reset_index()
region_killed.columns =['Region','Total Killed']
region_killed = region_killed.sort_values('Total Killed', ascending=False)
ax = sns.barplot(x=region_killed['Region'],y=region_killed['Total Killed'],palette ="mako")
plt.xticks(rotation = 90)
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='baseline', fontsize=12, color='black', xytext=(0, 5),
                textcoords='offset points',rotation = 45)

plt.show()

#### Inference

* Over 1 lakh people were killed in Middle East and North Africa region.
* Over 100 hundred people were killed in Australasia and Oceania region.

# Top 15 nations in the world for the amount of fatalities

In [None]:
plt.subplots(figsize = (15,8))
country_killed = df.groupby('Country').killed.sum().to_frame().reset_index().sort_values('killed',ascending = False)[:15]
ax = sns.barplot(x = country_killed.Country, y= country_killed.killed, palette = "GnBu_r",edgecolor = sns.color_palette('dark',10))
for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                ha='center', va='baseline', fontsize=12, color='black', xytext=(0, 5),
                textcoords='offset points',rotation = 45)

plt.show()

#### Inference

* Iraq stands out with the highest number of fatalities at 78,589, while Yemen records the lowest among these 15 nations, with 8,776.

# Total Attacks vs Casualties by Country

In [None]:
country_att = df.Country.value_counts()[:15].to_frame().reset_index()
country_att.columns = ['Country','Total Attacks']
country_cas = df.groupby('Country').Casualties.sum().to_frame().reset_index()
country_cas.columns = ['Country','Casualties']
country_total = pd.merge(country_att,country_cas,how='left',on = 'Country').sort_values('Total Attacks',ascending = False)
sns.set_palette('Set2')
country_total.plot.bar(x="Country",width=0.8)
plt.xticks(rotation = 90)
fig = plt.gcf()
fig.set_size_inches(16,9)
plt.show

#### Inference

* Iraq takes the lead with the highest number of both attacks and casualties.

* Interestingly, Afghanistan, while having fewer attacks than Iraq, reports a higher number of casualties.

* Conversely, Spain, among these 15 nations, records the lowest figures for both attacks and casualties, painting a contrasting picture in the dataset.

# Total Attacks vs Casualties by Regions

In [None]:
region_att = df.Region.value_counts()[:15].to_frame().reset_index()
region_att.columns = ['Region','Total Attacks']
region_cas = df.groupby('Region').Casualties.sum().to_frame().reset_index()
region_cas.columns = ['Region','Casualties']
region_total = pd.merge(region_att,region_cas,how='left',on = 'Region').sort_values('Total Attacks',ascending = False)
sns.set_palette('Paired')
region_total.plot.bar(x="Region",width=0.8)
plt.xticks(rotation = 90)
plt.title = ("Total Attacks vs Casualties by Region")
fig = plt.gcf()
fig.set_size_inches(16,9)
plt.show

#### Inference

* Middle East and North Africa have faced about 50000 attacks and over 30 lakh casualties.
* Compared to other regions, Australasia and Oceania have experienced fewer attacks and casualties.

# The top 20 Infamous Groups

In [None]:
group_attacks = df.Group.value_counts().to_frame().drop('Unknown').reset_index()[:20]
group_attacks.columns = ['Terrorist Group','Total Attacks']
plt.subplots(figsize=(10,8))
sns.barplot(y = group_attacks['Terrorist Group'],x = group_attacks['Total Attacks'],palette = "mako",edgecolor = sns.color_palette('dark',10))
plt.xticks(rotation = 90)

plt.show()

#### Inference

* Taliban has over 6500 attacks followed by ISIL (Islamic State of Iraq and the Levant) with over 5000 attacks.

# Top 10 Notorious Groups and the most lethal weapons they use

In [None]:
df = df[df['Group'] != 'Unknown']
grouped_df = df.groupby(['Group', 'Weapontype'])['killed'].sum().reset_index()
top_10_groups = grouped_df.groupby('Group')['killed'].sum().nlargest(10).index
filtered_df = grouped_df[grouped_df['Group'].isin(top_10_groups)]
fig = px.sunburst(filtered_df, path=['Group', 'Weapontype'], values='killed', title='Top 10 Groups and Their Weapon Types with Total Kills')
fig.update_traces(textinfo='label+percent parent')
fig.update_layout(width=1000, height=900)
fig.show()