In [44]:
import pandas as pd
import geopandas as gpd

In [45]:
pred_df = pd.read_csv('bahraindemo.csv').astype({
    'date': 'datetime64'
}).loc[:, ['lat', 'lon', 'date', 'address', 'incident_types']]

In [46]:
pred_df.head()

Unnamed: 0,lat,lon,date,address,incident_types
0,26.143,50.4599,2019-07-03,Rd No 1030,Riots/Protests|Violence Against Civilians
1,26.1336,50.6091,2019-07-03,Rd No 3401,Riots/Protests|Violence Against Civilians
2,26.1349,50.5972,2019-07-03,Lane No 4322,Riots/Protests|Violence Against Civilians
3,26.143,50.6106,2019-07-03,Rd No 2339,Riots/Protests|Violence Against Civilians
4,26.2183,50.4704,2019-07-03,Rd No 3621,Riots/Protests|Violence Against Civilians


In [47]:
print('Number of dates:', len(df.date.unique()))

Number of dates: 191


In [48]:
# Ten predictions served per day
pred_df.groupby('date').size().reset_index(name = 'counts').counts.unique()

array([10])

In [49]:
pred_locations = pred_df.groupby(['lat', 'lon', 'address']).size().reset_index(name = 'counts')
pred_locations.sort_values(by = ['counts'], ascending = False).head()

Unnamed: 0,lat,lon,address,counts
11,26.217,50.5062,Rd No 7121,163
12,26.217,50.533,Rd No 2579,160
13,26.2183,50.4704,Rd No 3621,151
2,26.1336,50.6091,Rd No 3401,142
17,26.2291,50.5092,88 Karrana Ave,115


In [50]:
# Locations of riots/protests
true_df = pd.read_csv('bahrain-riots.csv') \
    .loc[:, ['event_date', 'event_type', 'location', 'latitude', 'longitude', 'source']] \
    .rename(columns = {'event_date': 'date', 'latitude': 'lat', 'longitude': 'lon'})

true_df['date'] = pd.to_datetime(true_df['date'])
true_df.head()
# - merge together to give three types of points: lat, lon, type (=predictions, riots, protests), count
# - visualize/plot this

Unnamed: 0,date,event_type,location,lat,lon,source
0,2019-08-30,Riots,Al Maamir,26.1333,50.6086,14 February Revolution Youth Coalition
1,2019-08-29,Riots,Karzakkan,26.1156,50.4819,14 February Revolution Youth Coalition
2,2019-08-26,Riots,Bu Quwa,26.2047,50.52,Al Wafa
3,2019-08-24,Protests,Karbabad,26.2303,50.5294,Al Wafa
4,2019-08-24,Riots,Bu Quwa,26.2047,50.52,Al Wafa


In [51]:
actual_locations = true_df.groupby(['lat', 'lon', 'event_type']).size().reset_index(name = 'counts')
pred_locations['event_type'] = 'Prediction'

In [61]:
# Merging together the prediction locations, and the data from the actual riots
locations = pd.concat([actual_locations, pred_locations.drop(columns = ['address'])])
locations.head()

Unnamed: 0,lat,lon,event_type,counts
0,26.0686,50.5039,Riots,1
1,26.0753,50.5014,Protests,1
2,26.0753,50.5014,Riots,1
3,26.098,50.4866,Protests,3
4,26.098,50.4866,Riots,4


In [60]:
import plotly.express as px
import plotly.graph_objects as go

px.set_mapbox_access_token("pk.eyJ1IjoiYWRheTY1MSIsImEiOiJja3ptMzIxZm4wNzJjMnZtemcxOGJsM3h3In0.qq1WQj2abS4lz3A9lF_tyw")

fig = px.scatter_mapbox(
    locations, 
    lat = "lat", 
    lon = "lon",
    size = "counts",
    hover_name = "counts",
    color = "event_type",
    zoom = 11,
    width = 1000,
    height = 1000,
    opacity = 0.8,
    mapbox_style = "satellite",
    category_orders = {'event_type': ['Prediction', 'Riots', 'Protests', 'Strategic developments']}
)

fig.show(renderer='notebook_connected')

Note that we can see that there are similarities in where predictions are served as compared to locations of the actual riots

In [88]:
# Understanding matches in location
locations_wider = locations.pivot(
    index = ['lat', 'lon'],
    columns = 'event_type',
    values = 'counts'
).reset_index()

locations_wider = locations_wider.drop(columns = ['Strategic developments']).fillna(0)
locations_wider.head()

event_type,lat,lon,Prediction,Protests,Riots
0,26.0686,50.5039,0.0,0.0,1.0
1,26.0753,50.5014,0.0,1.0,1.0
2,26.098,50.4866,0.0,3.0,4.0
3,26.0986,50.4868,96.0,0.0,0.0
4,26.1156,50.4819,0.0,2.0,18.0


In [93]:
# Want to group points together based off whether they are sufficiently close
# to each other - to do this, we will just truncate the lat/long to 2dp (this
# corresponds roughly to being within 1.1km of each other), and then
# group by these
import numpy as np

grouped_points = locations_wider.assign(
    lat = lambda x: np.around(x.lat, 2),
    lon = lambda x: np.around(x.lon, 2)
).groupby(['lat', 'lon']).sum().reset_index().assign(
    protests_riots = lambda x: x.Protests + x.Riots
)

grouped_points.head()

event_type,lat,lon,Prediction,Protests,Riots,protests_riots
0,26.07,50.5,0.0,0.0,1.0,1.0
1,26.08,50.5,0.0,1.0,1.0,2.0
2,26.1,50.49,96.0,3.0,4.0,7.0
3,26.12,50.48,57.0,2.0,18.0,20.0
4,26.13,50.48,0.0,0.0,4.0,4.0


In [99]:
fig = px.scatter(
    grouped_points,
    x = "Prediction",
    y = "protests_riots",
    width = 800,
    height = 800,
    hover_data = ['lat', 'lon'],
    labels = {
        'Prediction': 'Number of times location appeared in predictions',
        'protests_riots': 'Total number of protests and riots in location'
    },
    title = "Number of times protests/riots occured/predicted within 1.1km of each other"
)

fig.show()

In [101]:
# Understanding temporal patterns
pred_df.head()

Unnamed: 0,lat,lon,date,address,incident_types
0,26.143,50.4599,2019-07-03,Rd No 1030,Riots/Protests|Violence Against Civilians
1,26.1336,50.6091,2019-07-03,Rd No 3401,Riots/Protests|Violence Against Civilians
2,26.1349,50.5972,2019-07-03,Lane No 4322,Riots/Protests|Violence Against Civilians
3,26.143,50.6106,2019-07-03,Rd No 2339,Riots/Protests|Violence Against Civilians
4,26.2183,50.4704,2019-07-03,Rd No 3621,Riots/Protests|Violence Against Civilians


In [105]:
# Note that protests tend to spike/occurs around Friday 
# (makes sense as they usually occur after Friday prayers)
true_df.assign(
    day = lambda x: x.date.dt.day_name()
).groupby(['day']).size().reset_index()

Unnamed: 0,day,0
0,Friday,226
1,Monday,88
2,Saturday,112
3,Sunday,66
4,Thursday,131
5,Tuesday,83
6,Wednesday,87


In [108]:
# Want to try and understand whether locations tend to pop up
# equally frequently across the days - no discernable pattern
# from this
fig = px.bar(
    pred_df.assign(
        day = lambda x: x.date.dt.dayofweek
    ).groupby(['day', 'address']).size().reset_index(name = "counts"),
    x = "day",
    y = "counts",
    color = "address"
)

fig.show()

In [110]:
# As for the locations of the actual riots/protests, can see that
# there are actual temporal trends occuring with protests occuring
# more frequently in particular places across the days
fig = px.bar(
    true_df.assign(
        day = lambda x: x.date.dt.dayofweek
    ).groupby(['day', 'location']).size().reset_index(name = "counts"),
    x = "day",
    y = "counts",
    color = "location"
)

fig.show()