# Analysing ACLED exported files

In [1]:
from arcgis.geometry import Geometry, Point
from arcgis.geometry.functions import buffer, relation
from arcgis.gis import GIS
from arcgis.features import GeoAccessor
import os
import pandas as pd

In [2]:
# Define an environment variable targeting the ACLED file path
file_path = os.environ["acled_file_path"]

In [3]:
def read_acled(file_path):
    return pd.read_csv(file_path)

def get_highest_spatial_precision(acled_data):
    return acled_data[acled_data["geo_precision"]==1]

def get_highest_temporal_precision(acled_data):
    return acled_data[acled_data["time_precision"]==1]

def get_protests(acled_data):
    return acled_data[acled_data["event_type"]=="Protests"]

def get_peaceful_protests(acled_data):
    acled_protests = get_protests(acled_data)
    return acled_protests[acled_protests["sub_event_type"]=="Peaceful protest"]

def get_most_precise_peaceful_protests(acled_data):
    return get_peaceful_protests(get_highest_spatial_precision(get_highest_temporal_precision(acled_data)))

def list_precisest_locations(acled_data):
    return get_highest_spatial_precision(acled_data)["location"].unique()

def count_precisest_locations(acled_data):
    return get_highest_spatial_precision(acled_data)["location"].value_counts()

In [4]:
acled_data = read_acled(file_path)

In [5]:
acled_data.columns

Index(['data_id', 'iso', 'event_id_cnty', 'event_id_no_cnty', 'event_date',
       'year', 'time_precision', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'region', 'country', 'admin1', 'admin2', 'admin3',
       'location', 'latitude', 'longitude', 'geo_precision', 'source',
       'source_scale', 'notes', 'fatalities', 'timestamp', 'iso3'],
      dtype='object')

In [6]:
acled_data["event_type"].value_counts()

Protests                      185193
Battles                       166853
Explosions/Remote violence    146991
Violence against civilians     76560
Riots                          52814
Strategic developments         28885
Name: event_type, dtype: int64

In [7]:
count_precisest_locations(get_peaceful_protests(acled_data))

Hyderabad               3070
Larkana                 1939
Jammu                   1897
Lahore                  1338
Karachi                 1316
                        ... 
Oktyabrske                 1
Dialakoro                  1
Zhodzina                   1
Allah Bachayo Bhatti       1
Mabayi                     1
Name: location, Length: 16717, dtype: int64

In [8]:
# Connect to ArcGIS Online anonymously
gis = GIS() 

def get_unique_locations(acled_data):
    acled_locations = {}
    for acled_index in acled_data.index:
        acled_location_name = acled_data["location"][acled_index]
        if (acled_location_name in acled_locations):
            acled_locations[acled_location_name]["hit_count"] += 1
        else:
            acled_point = Point({"x" : acled_data["longitude"][acled_index], "y" : acled_data["latitude"][acled_index], "spatialReference" : {"wkid" : 4326}})
            acled_locations[acled_location_name] = { "point": acled_point, "hit_count": 1 }
    
    return acled_locations

def to_spatial_dataframe(acled_data):
    acled_locations = {}
    acled_names = []
    acled_x = []
    acled_y = []
    acled_hit_counts = []
    acled_record_index = 0
    for acled_index in acled_data.index:
        acled_location_name = acled_data["location"][acled_index]
        if (acled_location_name in acled_locations):
            # Get the corresponding acled_record_index and update the hit_count
            record_index = acled_locations[acled_location_name]["record_index"]
            acled_hit_counts[record_index] += 1
        else:
            # Set the current acled_record_index for updating the corresponding hit_count
            acled_locations[acled_location_name] = { "record_index": acled_record_index}
            acled_names.append(acled_location_name)
            acled_x.append(acled_data["longitude"][acled_index])
            acled_y.append(acled_data["latitude"][acled_index])
            acled_hit_counts.append(1)
            acled_record_index += 1
    
    acled_dictionary = { "name": acled_names, "x": acled_x, "y": acled_y, "hit_count": acled_hit_counts }
    acled_data_frame = pd.DataFrame.from_dict(acled_dictionary)
    WGS84 = 4326
    return GeoAccessor.from_xy(acled_data_frame, x_column="x", y_column="y", sr=WGS84)

def get_europe_map():
    europe_map = gis.map("Europe")
    europe_map.basemap = "dark-gray-vector"
    return europe_map

In [9]:
#get_unique_locations(get_highest_spatial_precision(get_peaceful_protests(acled_data)))
acled_events = to_spatial_dataframe(get_most_precise_peaceful_protests(acled_data))
acled_events

Unnamed: 0,name,x,y,hit_count,SHAPE
0,Bizpur,88.4213,22.9424,1,"{""spatialReference"": {""wkid"": 4326}, ""x"": 88.4..."
1,Fatih,28.9408,41.0225,60,"{""spatialReference"": {""wkid"": 4326}, ""x"": 28.9..."
2,Sofia,23.3212,42.6974,322,"{""spatialReference"": {""wkid"": 4326}, ""x"": 23.3..."
3,Diyarbakir,40.2172,37.9136,234,"{""spatialReference"": {""wkid"": 4326}, ""x"": 40.2..."
4,Kragujevac,20.9090,44.0109,52,"{""spatialReference"": {""wkid"": 4326}, ""x"": 20.9..."
...,...,...,...,...,...
16218,Kangemi,36.7436,-1.2695,1,"{""spatialReference"": {""wkid"": 4326}, ""x"": 36.7..."
16219,Dioulacolon,-14.8700,12.8200,11,"{""spatialReference"": {""wkid"": 4326}, ""x"": -14...."
16220,Salikegne,-15.7833,13.2500,11,"{""spatialReference"": {""wkid"": 4326}, ""x"": -15...."
16221,Londiani,35.6000,-0.1667,1,"{""spatialReference"": {""wkid"": 4326}, ""x"": 35.6..."


In [10]:
europe_map = get_europe_map()
acled_events.head().copy().spatial.plot(map_widget=europe_map, renderer_type="s")
europe_map

MapView(layout=Layout(height='400px', width='100%'))

In [9]:
#count_by_date = acled_data.groupby(["event_date"]).size()
acled_data_date = acled_data.copy()
acled_data_date["event_date"] = pd.to_datetime(acled_data["event_date"])
acled_data_date.index = pd.DatetimeIndex(acled_data_date["event_date"])
count_by_month = acled_data_date.groupby(pd.Grouper(freq='M')).size()
count_by_month.index = count_by_month.index.strftime('%B %Y')
count_by_year = acled_data_date.groupby(pd.Grouper(freq='Y')).size()
count_by_month.index = count_by_month.index.strftime('%B %Y')
#count_by_month.plot(kind="bar")

In [10]:
count_by_month

event_date
January 2000        365
February 2000       280
March 2000          261
April 2000          283
May 2000            443
                  ...  
September 2019    12900
October 2019      14372
November 2019     13908
December 2019     13057
January 2020       9341
Length: 241, dtype: int64