In [28]:
import plotly.express as px
import pandas as pd
from utils.putils import reader
from mbmutils import mu

In [98]:
DEAD = "#221f1f"
INJURED   = "#b20710"
COLOR_MAP_TYPE = {"dead": DEAD ,"injured": INJURED}

# Multiple Data Sets

There are a few data set here. I will name the `df` relative to the data. They likely have a large overlap.

In [2]:
mass_df = reader.read_mass_shooting()
mass_df

Unnamed: 0,date,ymd,ym,y,dead,injured,total,state
0,2018-12-31,2018-12-31,2018-12,2018,3,2,5,Ohio
1,2018-12-29,2018-12-29,2018-12,2018,1,3,4,Ohio
2,2018-12-28,2018-12-28,2018-12,2018,4,1,5,Missouri
3,2018-12-24,2018-12-24,2018-12,2018,2,2,4,Louisiana
4,2018-12-23,2018-12-23,2018-12,2018,0,7,7,Missouri
...,...,...,...,...,...,...,...,...
317,2018-07-01,2018-07-01,2018-07,2018,1,5,6,Alabama
318,2018-07-01,2018-07-01,2018-07,2018,0,4,4,Florida
319,2018-05-01,2018-05-01,2018-05,2018,0,6,6,Mississippi
320,2018-04-01,2018-04-01,2018-04,2018,1,3,4,Arkansas


In [3]:
gun_df = reader.read_gun_violence()
gun_df

Unnamed: 0,date,ymd,ym,y,dead,injured,total,state
0,2021-12-31,2021-12-31,2021-12,2021,0,4,4,Maryland
1,2021-12-31,2021-12-31,2021-12,2021,3,4,7,Mississippi
2,2021-12-31,2021-12-31,2021-12,2021,0,6,6,California
3,2021-12-30,2021-12-30,2021-12,2021,0,6,6,Pennsylvania
4,2021-12-30,2021-12-30,2021-12,2021,2,2,4,Missouri
...,...,...,...,...,...,...,...,...
3386,2014-01-12,2014-01-12,2014-01,2014,0,5,5,Alabama
3387,2014-01-11,2014-01-11,2014-01,2014,0,4,4,Mississippi
3388,2014-01-05,2014-01-05,2014-01,2014,1,3,4,Pennsylvania
3389,2014-01-03,2014-01-03,2014-01,2014,1,3,4,New York


In [74]:
y2018_df = gun_df.query("y == '2018'")
y2018_df.describe()


Unnamed: 0,dead,injured,total
count,336.0,336.0,336.0
mean,1.107143,3.958333,5.065476
std,1.852906,1.992086,2.555976
min,0.0,0.0,4.0
25%,0.0,3.0,4.0
50%,1.0,4.0,4.0
75%,1.0,5.0,5.0
max,17.0,19.0,34.0


In [24]:
mass_df.describe()

Unnamed: 0,dead,injured,total
count,322.0,322.0,322.0
mean,1.189441,4.009317,5.195652
std,1.916841,2.300597,2.983169
min,0.0,0.0,4.0
25%,0.0,3.0,4.0
50%,1.0,4.0,4.0
75%,2.0,5.0,5.0
max,17.0,22.0,34.0


In [35]:
mass_df.groupby(["dead","injured"]).sum().reset_index()

Unnamed: 0,dead,injured,total
0,0,4,368
1,0,5,165
2,0,6,114
3,0,7,42
4,0,8,16
5,1,3,176
6,1,4,120
7,1,5,60
8,1,6,35
9,1,7,24


In [55]:
mass_df.groupby("total").agg(occurrences=("date", "count"))["occurrences"].reset_index()

Unnamed: 0,total,occurrences
0,4,166
1,5,81
2,6,42
3,7,17
4,8,8
5,10,1
6,14,1
7,17,1
8,18,1
9,23,1


In [66]:
def plot_affect_count(df, title):
       data = df.groupby("total").agg(occurrences=("date", "count"))["occurrences"].reset_index()
       px.bar(data,
              x="total", y="occurrences",
              title=title,
              labels={"total":"affected count"},
              ).show()

In [71]:
plot_affect_count(mass_df, title="Mass Shootings (2018): Number of Affected")

In [75]:
plot_affect_count(y2018_df, title="Gun Violence (2018): Number of Affected")

In [91]:
data = y2018_df[["dead", "injured"]].sum().reset_index()
data.columns = ["type", "count"]
data

Unnamed: 0,type,count
0,dead,372
1,injured,1330


In [117]:
fig = px.pie(data,
             values="count",
             names="type",
             labels={"type"},
             color="type",
             color_discrete_map=COLOR_MAP_TYPE,
             )
fig.update_traces(textposition='inside',
                    textinfo='percent+label',
                    marker=dict(line=dict(color="white", width=6)),
                    pull=[0, 0, 0.2, 0],
                    rotation=180, )
fig.update_layout(
       showlegend=False,
       # font_family="Courier New",
       # font_color="white",
       font_size=19,
       # title_font_family="Times New Roman",
       # title_font_color="red",
       # legend_title_font_color="green"
)
fig.show()

In [132]:
data = y2018_df.groupby("total").sum().unstack("total").reset_index()
data.columns = ["type", "total", "count"]
data

Unnamed: 0,type,total,count
0,dead,4,110
1,dead,5,120
2,dead,6,49
3,dead,7,28
4,dead,8,5
5,dead,10,3
6,dead,12,3
7,dead,15,13
8,dead,16,2
9,dead,18,11


In [134]:
# data = y2018_df.groupby("total").agg(occurrences=("date", "count"))["occurrences"].reset_index()
px.bar(data,
       x="total",
       y="count",
       title="Gun Violence (2018): Number of Affected",
       labels={"total":"affected count"},
       color="type",
       color_discrete_map=COLOR_MAP_TYPE,
       ).show()