In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)

In [3]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)

# Scrap Data From The Github Site (Link Below)

Github Data API - https://raw.githubusercontent.com/2020PB/police-brutality/data_build/all-locations.json
Github Repository of Police Brutality - https://github.com/2020PB/police-brutality/blob/master/README.md

In [5]:
pb_data_raw = pd.read_json(r'https://raw.githubusercontent.com/2020PB/police-brutality/data_build/all-locations.json')

# Tests on Data

In [6]:
# Check of number of records in repository
len(pb_data_raw)

282

# Set data up into proper matrix format 

In [7]:
# creates a dictionary from the List of Data Frame Names (Keys) and the Data Frame Files (Values) themselves
dict_of_pb_dfs = {}

for pb_record_idex_num in range(0,len(pb_data_raw['data'])): 
    globals()["pb_df_{}".format(pb_record_idex_num)]  = pd.DataFrame(pb_data_raw['data'][pb_record_idex_num])
    dict_of_pb_dfs["pb_df_{}".format(pb_record_idex_num)] = globals()["pb_df_{}".format(pb_record_idex_num)]
    
# Reduces the record numbers to 1 per incident in each data frame
for pb_df in dict_of_pb_dfs.keys():
    num_count = len(dict_of_pb_dfs[pb_df])-1
    while num_count > 0:
        dict_of_pb_dfs[pb_df].drop(num_count, inplace = True)
        num_count -= 1
        
# Concatenate the individual data frames into one dataframe with all the incident data (one link/record per incident)
list_of_values = list(dict_of_pb_dfs.values())
pd_consolidated = pd.concat(list_of_values, sort = True)
pd_consolidated['incident_value'] = 1



# Graphing

In [23]:
by_date = pd_consolidated.groupby('date')
by_date_sum_incidents = by_date.sum()
first_date = by_date_sum_incidents.index[1]
last_date = by_date_sum_incidents.index[len(by_date_sum_incidents.index)-1]

fig = go.Figure(data=[go.Bar(
    x=pd_consolidated['state'],
    y=pd_consolidated['incident_value'],
    marker_color= 'black',
    name = 'Police Brutality Incident Captured on Video',
    text = pd_consolidated['city'],
    hovertext = pd_consolidated['name'],
    customdata = pd_consolidated['date'],
    hovertemplate = "City: %{text}<br>Incident Description: %{hovertext}<br>Date of Incident: %{customdata}<extra></extra>",
    width = .8,
    showlegend = True,
)])

fig.update_layout(title_text = '2020 Police Brutality: {} Incidents <br>Data from {} to {}'.format(len(pb_data_raw),first_date,last_date),
                  barmode='group',
                  xaxis_tickangle=-45,
                  autosize=True,
#                   width=1500,
#                   height=1200,
                  legend_orientation="h",
                  legend=dict(x=.875, y=0.99)
                 )

fig.write_html(r"C:\Users\cdwhi\Documents\Python\My_Code\Police_Brutality_2020\PB_BarChart.html")
fig.show()