In [1]:
import IPython.core.display as di

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# # This line will add a button to toggle visibility of code blocks, for use with the HTML export version
# di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)

In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
import xlrd
import plotly.offline as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.subplots import make_subplots
import plotly.figure_factory as ff


## Timeline and Analysis of Racial-Justice Events
Looking in-depth at the timeline of events that were catalogued as either protests or riots, as well as estimating the numbers of individuals present at events that were specifically called out for violence and looting. We are only looking at events that involved groups affiliated with the Black Lives Matter (BLM) movement. 

In [3]:
df = pd.read_excel('USA_2020_Oct10update.xlsx')

In [4]:
df['Date'] = pd.to_datetime(df['EVENT_DATE'])

In [5]:
blm_list = []

for ix, row in df.iterrows():
    if 'BLM' in str(row['ASSOC_ACTOR_1']):
        blm = 1
    else:
        blm = 0
    blm_list.append(blm)
    
df['BLM'] = blm_list

In [6]:
df_blm = df[df['BLM'] == 1].copy()

In [7]:
df_blm['date'] = df_blm['Date'].dt.date

In [16]:
sizes = []
provocs = []
lootings = []

for ix, row in df_blm.iterrows():
    notes = str(row['NOTES'])
    if '[size' in notes:
        size = notes.split('[size')[1].split(']')[0].split('=')[1]
        sizes.append(size)
    else:
        sizes.append('null')
#     if len(notes.split('[')) > 2:
#         provoc = notes.split('[')[-1].split(']')[0]
#         provocs.append(provoc)
#     else:
#         provocs.append('null')
    if 'loot' in notes:
        lootings.append('Looting')
    elif 'vandal' in notes:
        lootings.append('Looting')
    else:
        lootings.append('No Looting')
    
df_blm['size_est'] = sizes
# df_blm['extra'] = provocs
df_blm['Looting'] = lootings

In [8]:
antifa_list = []

for ix, row in df_blm.iterrows():
    if ('Antifa' in str(row['ACTOR1'])) | ('Antifa' in str(row['ACTOR2'])) | ('Antifa' in str(row['ASSOC_ACTOR_1'])) | ('Antifa' in str(row['ASSOC_ACTOR_2'])):
        ant = 'Antifa'
    else:
        ant = 'No Antifa'
    antifa_list.append(ant)
    
df_blm['Antifa'] = antifa_list

In [17]:
df_blm['Count'] = 1

In [18]:
df_blm_grp = df_blm.groupby(['Date', 'EVENT_TYPE'])['Count'].sum().reset_index()

In [19]:
df_blm_grp2 = df_blm.groupby(['Date', 'EVENT_TYPE', 'Antifa'])['Count'].sum().reset_index()

First an overall timeline of the number and type of events recorded following the death of George Floyd, which specifically involved BLM. 

In [20]:
fig = px.line(df_blm_grp[df_blm_grp['EVENT_TYPE'].isin(['Riots', 'Protests'])].sort_values(by=['Date', 'EVENT_TYPE']),
       x='Date',
       y='Count',
       color='EVENT_TYPE',
#        color_discrete_map={
#            "Riots": "red",
#            "Protests": "blue"},
#        opacity=.75,
       title = 'Number and Type of Events Recorded',
#        log_y = True
          )

fig.update_layout(
    legend=dict(
        orientation="h"
    ),
    legend_title_text=''
)

fig.show()

Here are some overall summary statistics, for the overall timeline as well as specific portions within it:

In [21]:
blm_protests = len(df_blm[df_blm['EVENT_TYPE'] == 'Protests'])
blm_riots = len(df_blm[df_blm['EVENT_TYPE'] == 'Riots'])
pct_riots = 100 * blm_riots / (blm_protests + blm_riots)
print("For the duration, {}% of all protest-events involving BLM are classified as 'Riots'.".format(round(pct_riots, 2)))

blm_protests = len(df_blm[(df_blm['EVENT_TYPE'] == 'Protests') & (df_blm['Date'] >= '2020-6-1')])
blm_riots = len(df_blm[(df_blm['EVENT_TYPE'] == 'Riots') & (df_blm['Date'] >= '2020-6-1')])
pct_riots = 100 * blm_riots / (blm_protests + blm_riots)
print("For June onwards, {}% of all protest-events involving BLM are classified as 'Riots'.".format(round(pct_riots, 2)))

For the duration, 6.46% of all protest-events involving BLM are classified as 'Riots'.
For June onwards, 4.39% of all protest-events involving BLM are classified as 'Riots'.


Here is a summary chart of the number of events per either classification, as well as looking at whether there was reports of Looting and/or Antifa at each event. 

In [22]:
summary = df_blm.groupby(['EVENT_TYPE', 'Looting', 'Antifa'])['Count'].sum().reset_index()

In [23]:
total_events = sum(summary['Count'])
pct_events = []

for ix, row in summary.iterrows():
    events = row['Count']
    pct = 100 * events/total_events
    pct_events.append(pct)
    
summary['% of Total'] = pct_events

In [24]:
summary[summary['EVENT_TYPE'] != 'Strategic developments']

Unnamed: 0,EVENT_TYPE,Looting,Antifa,Count,% of Total
0,Protests,Looting,No Antifa,27,0.301508
1,Protests,No Looting,Antifa,2,0.022334
2,Protests,No Looting,No Antifa,8343,93.165829
3,Riots,Looting,Antifa,2,0.022334
4,Riots,Looting,No Antifa,160,1.786711
5,Riots,No Looting,Antifa,7,0.078169
6,Riots,No Looting,No Antifa,409,4.567281


### How many people were at these events?
Next we'll look at estimating the number of people involved across these events. 

In [25]:
all_numbers = []

for ix, row in df_blm.iterrows():
    desc = str(row['size_est'])
    row_numbers = []
    for word in desc.split():
        if word.isdigit():
            row_numbers.append(int(word))
        else:
            if 'thousands' in desc:
                row_numbers.append(2000)
            elif 'several thousand' in desc:
                row_numbers.append(2000)
            elif 'thousand' in desc:
                row_numbers.append(1000)
            elif 'hundreds' in desc:
                row_numbers.append(200)
            elif 'several hundred' in desc:
                row_numbers.append(200)
            elif 'hundred' in desc:
                row_numbers.append(100)
            elif 'dozens' in desc:
                row_numbers.append(24)
            else:
                row_numbers.append(10)
    high = max(row_numbers)
    all_numbers.append(high)
            
df_blm['Number of People'] = all_numbers

In [27]:
df_blm_grp_ppl = df_blm.groupby(['Date', 'EVENT_TYPE'])['Number of People'].sum().reset_index()

This chart shows the number of estimated people at the events from earlier. We are also marking for each day the number of individuals at events that specifically mentioned the word "looting", out of the overall sample of events from that day. 

In [33]:
df_blm_grp_ppl2 =  df_blm.groupby(['Date', 'EVENT_TYPE', 'Looting', 'Antifa'])['Number of People'].sum().reset_index()

In [34]:
fig = px.line(df_blm_grp_ppl[df_blm_grp_ppl['EVENT_TYPE'].isin(['Riots', 'Protests'])].sort_values(by=['Date', 'EVENT_TYPE']),
       x='Date',
       y='Number of People',
       color='EVENT_TYPE',
#              text='Number of People',
#        color_discrete_map={
#            "Riots": "red",
#            "Protests": "blue"},
#        opacity=.75,
#        log_y=True,
       title = 'Estimated Number of People at BLM-related Events'
          )

fig.add_trace(go.Scatter(x=df_blm_grp_ppl2['Date'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Protests') & (df_blm_grp_ppl2['Looting'] == 'Looting')],
                         y=df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Protests') & (df_blm_grp_ppl2['Looting'] == 'Looting')],
                         mode='markers',
                         name='Protest w/Looting',
                         marker = dict(
                             color='blue', opacity=.6
                         )
                        )
             )

fig.add_trace(go.Scatter(x=df_blm_grp_ppl2['Date'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Riots') & (df_blm_grp_ppl2['Looting'] == 'Looting')],
                         y=df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Riots') & (df_blm_grp_ppl2['Looting'] == 'Looting')],
                         mode='markers',
                         name='Riot w/Looting',
                         marker = dict(
                             color='red', opacity=.6
                         )
                        )
             )

fig.update_layout(
#     legend=dict(
#         orientation="h"
#     ),
    legend_title_text='Number of People at:'
)

fig.show()

Here are some statistics to go along with these new, extended figures:

In [35]:
# All Events - Looting?
blm_all_ppl_l_y = sum(df_blm_grp_ppl2['Number of People'][df_blm_grp_ppl2['Looting'] == 'Looting'])
blm_all_ppl_l_n = sum(df_blm_grp_ppl2['Number of People'][df_blm_grp_ppl2['Looting'] == 'No Looting'])
print("Of all estimated people involved in all BLM events, {}% were in events with reported Looting.".format(round(100*blm_all_ppl_l_y/(blm_all_ppl_l_y+blm_all_ppl_l_n), 3)))

blm_protests_ppl_l_y = sum(df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Protests') & (df_blm_grp_ppl2['Looting'] == 'Looting')])
blm_protests_ppl_l_n = sum(df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Protests') & (df_blm_grp_ppl2['Looting'] == 'No Looting')])
print("Of the estimated people involved in Protests, {}% were also involved in events with reported Looting.".format(round(100*blm_protests_ppl_l_y/(blm_protests_ppl_l_y+blm_protests_ppl_l_n), 3)))

blm_riots_ppl_l_y = sum(df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Riots') & (df_blm_grp_ppl2['Looting'] == 'Looting')])
blm_riots_ppl_l_n = sum(df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Riots') & (df_blm_grp_ppl2['Looting'] == 'No Looting')])
print("Of the estimated people involved in Riots, {}% were also involved in events with reported Looting.".format(round(100*blm_riots_ppl_l_y/(blm_riots_ppl_l_y+blm_riots_ppl_l_n), 3)))

protests_ant_y = sum(df_blm_grp_ppl2['Number of People'][(df_blm_grp_ppl2['EVENT_TYPE'] == 'Protests') & (df_blm_grp_ppl2['Looting'] == 'Looting')])
# blm_riots_ppl = sum(df_blm_grp_ppl['Numbers'][df_blm_grp_ppl['EVENT_TYPE'] == 'Riots'])
# pct_riots_ppl = 100 * blm_riots_ppl / (blm_protests_ppl + blm_riots_ppl)
# print("For the duration, {}% of all estimated individual BLM-related protesters are involved in events classified as 'Riots'.".format(round(pct_riots_ppl, 2)))

Of all estimated people involved in all BLM events, 3.533% were in events with reported Looting.
Of the estimated people involved in Protests, 0.319% were also involved in events with reported Looting.
Of the estimated people involved in Riots, 30.171% were also involved in events with reported Looting.


Finally, here is that same summary chart, showing the number of people at events broken down across these same categories. 

In [36]:
summary2 = df_blm_grp_ppl2.groupby(['EVENT_TYPE', 'Looting', 'Antifa'])['Number of People'].sum().reset_index()

In [37]:
total_people = sum(summary2['Number of People'])
pct_ppl = []

for ix, row in summary2.iterrows():
    ppl = row['Number of People']
    pct = 100 * ppl/total_people
    pct_ppl.append(pct)
    
summary2['% of Total'] = pct_ppl

In [38]:
summary2[summary2['EVENT_TYPE'] != 'Strategic developments']

Unnamed: 0,EVENT_TYPE,Looting,Antifa,Number of People,% of Total
0,Protests,Looting,No Antifa,5414,0.285031
1,Protests,No Looting,Antifa,280,0.014741
2,Protests,No Looting,No Antifa,1689191,88.930907
3,Riots,Looting,Antifa,2010,0.105821
4,Riots,Looting,No Antifa,59692,3.142607
5,Riots,No Looting,Antifa,790,0.041591
6,Riots,No Looting,No Antifa,142015,7.476669


### This concludes some examples of using ACLED to chart the characteristics of US protests over time. 
There is lots more we could do by geography, or conducting predictive analyses along other variables. Stay tuned!