In [1]:
#!/usr/bin/env python
import sys
import pandas as pd
import pymongo
import json
import os
import numpy

# Extracting Terrorist Data from CSV file

In [2]:
file = 'globalterrorismdb_0718dist.csv'
terror_df = pd.read_csv(file, encoding = "ISO-8859-1")
low_memory=False,
terror_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,...,,,,,PGIS,0,0,0,0,
1,197000000002,1970,0,0,,0,,130,Mexico,1,...,,,,,PGIS,0,1,1,1,
2,197001000001,1970,1,0,,0,,160,Philippines,5,...,,,,,PGIS,-9,-9,1,1,
3,197001000002,1970,1,0,,0,,78,Greece,8,...,,,,,PGIS,-9,-9,1,1,
4,197001000003,1970,1,0,,0,,101,Japan,4,...,,,,,PGIS,-9,-9,1,1,


# Dashboard Totals

In [3]:
dashboard_df = terror_df.loc[:, ["eventid","nkill","nwound","success","region","country_txt","iyear","region_txt",]]
dashboard_df.head()

Unnamed: 0,eventid,nkill,nwound,success,region,country_txt,iyear,region_txt
0,197000000001,1.0,0.0,1,2,Dominican Republic,1970,Central America & Caribbean
1,197000000002,0.0,0.0,1,1,Mexico,1970,North America
2,197001000001,1.0,0.0,1,5,Philippines,1970,Southeast Asia
3,197001000002,,,1,8,Greece,1970,Western Europe
4,197001000003,,,1,4,Japan,1970,East Asia


In [4]:
#Finding Totals
total_attacks = int(dashboard_df["eventid"].count())
total_wounded = int(dashboard_df["nwound"].count())
total_fatalities = int( dashboard_df["nkill"].sum())
total_success =dashboard_df["success"].loc[dashboard_df["success"]==1].count()
print(f"Total Attacks : {total_attacks}")
print(f"Total Wounded : {total_wounded}")
print(f"Total Fatalities : {total_fatalities}")
print(f"Total Success : {total_success}")

Total Attacks : 181691
Total Wounded : 165380
Total Fatalities : 411868
Total Success : 161632


In [5]:
# Creating a DataFrame for Totals
dashboard_table = pd.DataFrame({"total_attacks": [total_attacks], "total_fatalities": [total_fatalities], 'total_wounded': [total_wounded], 'total_success':[total_success] })
#To be loaded to MongoDB
dashboard_table

Unnamed: 0,total_attacks,total_fatalities,total_wounded,total_success
0,181691,411868,165380,161632


# Stats for Regions

In [6]:
region_values = dashboard_df["region_txt"].value_counts()

In [7]:
region_sum_df = pd.DataFrame(region_values)
region_sum_df.rename(columns={"region_txt":"total_terror"}, inplace=True)
#To be loaded to MongDB
regstats_df = region_sum_df.reset_index()
regstats_df

Unnamed: 0,index,total_terror
0,Middle East & North Africa,50474
1,South Asia,44974
2,South America,18978
3,Sub-Saharan Africa,17550
4,Western Europe,16639
5,Southeast Asia,12485
6,Central America & Caribbean,10344
7,Eastern Europe,5144
8,North America,3456
9,East Asia,802


# Incident Data(yearly)

In [8]:
incident_pyear = dashboard_df['iyear'].value_counts()

In [9]:
incident_pyear_df = pd.DataFrame(incident_pyear)
incident_pyear_df.rename(columns={"iyear":"incidents"}, inplace=True)

#To be loaded to MongoDB
incident_df = incident_pyear_df.reset_index()
incident_df.head()

Unnamed: 0,index,incidents
0,2014,16903
1,2015,14965
2,2016,13587
3,2013,12036
4,2017,10900


# Data for Terrorist Risk Map

In [10]:
country_values = dashboard_df["country_txt"].value_counts()
country_bins = pd.DataFrame(country_values)
country_bins.rename(columns={"country_txt":"incidents"}, inplace=True)
country_bins.head()

Unnamed: 0,incidents
Iraq,24636
Pakistan,14368
Afghanistan,12731
India,11960
Colombia,8306


In [11]:
bins = [0,1000,3000, 24636]
group_names = ["Green", "Yellow", "Red"]
country_bins["danger_color"] = pd.cut(country_bins["incidents"], bins, labels=group_names)
country_bins = country_bins.reset_index()

In [12]:
country_danger_color = pd.DataFrame({'country' : country_bins['index'], 'incidents': country_bins['incidents'], 'danger_color': country_bins['danger_color']})
#To be loaded to MongoDB
country_danger_color.head()

Unnamed: 0,country,incidents,danger_color
0,Iraq,24636,Red
1,Pakistan,14368,Red
2,Afghanistan,12731,Red
3,India,11960,Red
4,Colombia,8306,Red


# LINE CHART DATA Table

In [13]:
df = terror_df
data = df.groupby(['iyear', 'country_txt']).count()
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eventid,imonth,iday,approxdate,extended,resolution,country,region,region_txt,provstate,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
iyear,country_txt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1970,Argentina,21,21,21,0,21,2,21,21,21,21,...,0,0,0,0,21,21,21,21,21,0
1970,Australia,1,1,1,0,1,0,1,1,1,1,...,0,0,0,0,1,1,1,1,1,0
1970,Belgium,1,1,1,0,1,0,1,1,1,1,...,0,0,0,0,1,1,1,1,1,0
1970,Bolivia,1,1,1,0,1,1,1,1,1,1,...,0,0,0,0,1,1,1,1,1,0
1970,Brazil,6,6,6,0,6,3,6,6,6,6,...,0,0,0,0,6,6,6,6,6,0


In [14]:
df = data.reset_index(level=['iyear', 'country_txt'])
df_index = df.sort_values(by=['country_txt', 'iyear'])

table = df_index[['iyear','country_txt', 'eventid']].reset_index()
table['value_1'] = table.country_txt.shift()

answer=[]
total=0

for i in range(len(table)):
    if table['country_txt'][i] == table['value_1'][i]:
        newtotal = table['eventid'][i]+total
        total=newtotal
    else:
        newtotal=table['eventid'][i]
        total=table['eventid'][i]
    answer.append(newtotal)      

table['freq'] = answer
table =table[['iyear','country_txt','freq']].sort_values(by=['country_txt','iyear'])

final_table = pd.DataFrame({'name': table['country_txt'], 'time': table['iyear'], 'event': table['freq']})
#To be loaded to MongoDB
final_table.head(5) 

Unnamed: 0,name,time,event
0,Afghanistan,1973,1
1,Afghanistan,1979,4
2,Afghanistan,1987,5
3,Afghanistan,1988,16
4,Afghanistan,1989,26


# Pushing Data to MongoDB

In [42]:
#Connecting to Mongo DB
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
#------------------------------------------------------------------------------------------#
# To Load Dashboard Totals    
def totals_toMongodb():
    final_df = dashboard_table
    db = client.Terrorist
    collection = db.DashTotals
    collection.delete_many({})
    for index, row in final_df.iterrows():
        post = {'total_attacks': int(row['total_attacks']),'total_fatalities': int(row['total_fatalities']),'total_wounded': int(row['total_wounded']),'total_success':int(row['total_success'])}
        collection.insert_one(post)
#------------------------------------------------------------------------------------------#        
#to Load Regional Stats
def regionalStats_toMongodb():
    final_df = regstats_df
    db = client.Terrorist
    collection = db.RegStats
    collection.delete_many({})
    for index, row in final_df.iterrows():
        post = {
            'regions': row['index'], 'total_terror': row['total_terror']
        }
        collection.insert_one(post)
#------------------------------------------------------------------------------------------#
#To Load YEarly Incidents
def incidents_toMongodb():
    final_df = incident_df
    db = client.Terrorist
    collection = db.IncidentsYearly
    collection.delete_many({})
    for index, row in final_df.iterrows():
        post = {
            'year': int(row['index']), 'incidents': int(row['incidents'])
        }
        collection.insert_one(post)
#------------------------------------------------------------------------------------------#
# To Load LineChart
def line_chart_toMongodb():
    final_df = final_table
    db = client.Terrorist
    collection = db.LineChart
    collection.delete_many({})
    for index, row in final_df.iterrows():
        post = {
            'name': row['name'], 'time': row['time'], 'event': row['event']
        }
        collection.insert_one(post)
#------------------------------------------------------------------------------------------#
# To Load RiskMap
def riskMap_toMongodb():
    final_df = country_danger_color
    db = client.Terrorist
    collection = db.RiskMap
    collection.delete_many({})
    for index, row in final_df.iterrows():
        post = {
           'country' : row['country'], 'incidents': row['incidents'], 'danger_color': row['danger_color']
        }
        collection.insert_one(post)



In [43]:
#Functions to create/load collections and data.
totals_toMongodb()
regionalStats_toMongodb()
incidents_toMongodb()
line_chart_toMongodb()
riskMap_toMongodb()