In [2]:
!pip install py7zr

Collecting py7zr
  Downloading py7zr-0.17.2-py3-none-any.whl (68 kB)
[?25l[K     |████▉                           | 10 kB 14.6 MB/s eta 0:00:01[K     |█████████▋                      | 20 kB 20.7 MB/s eta 0:00:01[K     |██████████████▍                 | 30 kB 25.9 MB/s eta 0:00:01[K     |███████████████████▏            | 40 kB 27.9 MB/s eta 0:00:01[K     |████████████████████████        | 51 kB 29.6 MB/s eta 0:00:01[K     |████████████████████████████▊   | 61 kB 32.3 MB/s eta 0:00:01[K     |████████████████████████████████| 68 kB 5.2 MB/s 
[?25hCollecting pyzstd>=0.14.4
  Downloading pyzstd-0.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 39.9 MB/s 
[?25hCollecting multivolumefile>=0.2.3
  Downloading multivolumefile-0.2.3-py3-none-any.whl (17 kB)
Collecting pybcj>=0.5.0
  Downloading pybcj-0.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (48 kB)
[K     |█████████████████████████

In [3]:
import pandas as pd
import numpy as np
import py7zr
import os
import json
from urllib.request import urlopen

In [4]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [5]:
# Data reading
PATH = "/content/gdrive/MyDrive/GlobalTerrorismViz/"

In [6]:
# Data reading
with py7zr.SevenZipFile(PATH + 'data/globalterrorismdb_0221dist.7z') as z:
    # open the csv file in the dataset
    targetPath = PATH + 'data/'
    z.extract(path = targetPath)
    df = pd.read_excel(targetPath + '/globalterrorismdb_0221dist.xlsx')
    
# We filtered out doubted attacks to be able to have exact attacks
df = df[(df['doubtterr'] != 1) & (df['iyear'] >=2000)]

_df = df[['eventid','iyear','imonth','iday','country','country_txt',
         'region','region_txt', 'provstate', 'city','latitude','longitude',
         'nkill', 'nwound','gname','targtype1_txt']]

In [9]:
# Geojson for polygons of map
with urlopen('https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json') as response:
    countries = json.load(response)
                          
countries_df = pd.json_normalize(countries,  record_path =['features'])

In [10]:
# Merging locations and main df
df_merged = pd.merge(_df, countries_df , left_on = 'country_txt', right_on = 'properties.name', how = 'left' )

# Getting Stats
group_year_dataset = df_merged.groupby(by=['gname','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

attacks_by_target = df_merged.groupby(by=['targtype1_txt','iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

death_injury_yearly = df_merged.groupby(by=['iyear','country_txt','id']).agg({'eventid':'nunique',
                                                                'nkill':'sum',
                                                                'nwound':'sum'}).reset_index()

In [12]:
# Writing to excel
group_year_dataset.to_excel(PATH + 'data/xlsx/group_year_dataset.xlsx', engine='openpyxl', encoding='utf-8')
attacks_by_target.to_excel(PATH + 'data/xlsx/attacks_by_target.xlsx', engine='openpyxl', encoding='utf-8')
death_injury_yearly.to_excel(PATH + 'data/xlsx/death_injury_yearly.xlsx', engine='openpyxl', encoding='utf-8')

In [14]:
# Writing to JSON
group_year_dataset.to_json(PATH + 'data/json/group_year_dataset.json')
attacks_by_target.to_json(PATH + 'data/json/attacks_by_target.json')
death_injury_yearly.to_json(PATH + 'data/json/death_injury_yearly.json')