In [22]:
import pandas as pd
import numpy as np
import py7zr
import os
import json
from urllib.request import urlopen

In [2]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [8]:
# Data reading
PATH = "/content/gdrive/MyDrive/GlobalTerrorismViz/"

In [11]:
# Data reading
with py7zr.SevenZipFile(PATH + 'data/globalterrorismdb_0221dist.7z') as z:
    # open the csv file in the dataset
    targetPath = PATH + 'data/'
    z.extract(path = targetPath)
    df = pd.read_excel(targetPath + '/globalterrorismdb_0221dist.xlsx')
    
# We filtered out doubted attacks to be able to have exact attacks
df = df[(df['doubtterr'] != 1) & (df['iyear'] >=2000)]

_df = df[['eventid','iyear','imonth','iday','country','country_txt',
         'region','region_txt', 'provstate', 'city','latitude','longitude',
         'nkill', 'nwound','gname','attacktype1_txt']]

In [23]:
# Geojson for polygons of map
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)
                          
countries_df = pd.json_normalize(countries,  record_path =['features'])

In [52]:
# Merging locations and main df
df_merged = pd.merge(_df, countries_df , left_on = 'country_txt', right_on = 'properties.name', how = 'left' )

# Getting Stats
group_year_dataset =df_merged.groupby(by=['gname','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

attacks_by_target = df_merged.groupby(by=['attacktype1_txt','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

death_injury_yearly = df_merged.groupby(by=['iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

In [53]:
# Writing to Excel
group_year_dataset.to_excel(PATH + 'data/group_year_dataset.xlsx', engine='openpyxl', encoding='utf-8')
attacks_by_target.to_excel(PATH + 'data/attacks_by_target.xlsx', engine='openpyxl', encoding='utf-8')
death_injury_yearly.to_excel(PATH + 'data/death_injury_yearly.xlsx', engine='openpyxl', encoding='utf-8')