In [72]:
import pandas as pd
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import re
from utils import *

In [73]:
START_DATE = '2023-03-01'
END_DATE = '2023-07-02'

intervention_filename = Path('../..') / 'data' / 'my_intervention' / 'interventions.csv'

df = pd.read_csv(intervention_filename).set_index('day')

df


Unnamed: 0_level_0,estado,vitimas,id_ocorrencia,populacao_cidade,id_ocorrencia_7d,user,replies,weekend,intervention,treatment,intervention_treatment
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-03-01,BA,6,3,972875,2.14,FogoCruzadoBA,3,0,0,0,0
2023-03-01,RJ,14,5,1747148,3.43,FogoCruzadoRJ,24,0,0,1,0
2023-03-02,BA,2,2,1419552,2.00,FogoCruzadoBA,4,0,0,0,0
2023-03-02,RJ,17,6,1469213,3.71,FogoCruzadoRJ,13,0,0,1,0
2023-03-03,BA,18,1,2675656,2.00,FogoCruzadoBA,3,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2023-06-30,RJ,2,3,2311823,2.29,FogoCruzadoRJ,33,0,1,1,1
2023-07-01,BA,4,2,1396851,2.14,FogoCruzadoBA,2,1,1,0,0
2023-07-01,RJ,11,5,1824291,2.29,FogoCruzadoRJ,11,1,1,1,1
2023-07-02,BA,6,2,1357919,2.14,FogoCruzadoBA,0,1,1,0,0


# Check assumption

In [74]:
interactions_per_day = df[['replies','estado']].copy()

interactions_per_day['total_sum'] = interactions_per_day.groupby(['estado'])['replies'].cumsum()

interactions_per_day.reset_index(inplace=True)

# rename day for Date and estado for State
interactions_per_day = interactions_per_day.rename(columns={'day': 'Date', 'estado': 'State'})

interactions_per_day

Unnamed: 0,Date,replies,State,total_sum
0,2023-03-01,3,BA,3
1,2023-03-01,24,RJ,24
2,2023-03-02,4,BA,7
3,2023-03-02,13,RJ,37
4,2023-03-03,3,BA,10
...,...,...,...,...
240,2023-06-30,33,RJ,2349
241,2023-07-01,2,BA,658
242,2023-07-01,11,RJ,2360
243,2023-07-02,0,BA,658


In [75]:
state_names = {
    'BA': 'Bahia',
    'RJ': 'Rio de Janeiro',
}

interactions_per_day['State'] = interactions_per_day['State'].replace(state_names)

In [76]:
interactions_per_day['Date'] = pd.to_datetime(interactions_per_day['Date'])
interactions_per_day.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       245 non-null    datetime64[ns]
 1   replies    245 non-null    int64         
 2   State      245 non-null    object        
 3   total_sum  245 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 7.8+ KB


In [77]:
interactions_per_day

Unnamed: 0,Date,replies,State,total_sum
0,2023-03-01,3,Bahia,3
1,2023-03-01,24,Rio de Janeiro,24
2,2023-03-02,4,Bahia,7
3,2023-03-02,13,Rio de Janeiro,37
4,2023-03-03,3,Bahia,10
...,...,...,...,...
240,2023-06-30,33,Rio de Janeiro,2349
241,2023-07-01,2,Bahia,658
242,2023-07-01,11,Rio de Janeiro,2360
243,2023-07-02,0,Bahia,658


# Plot

In [78]:
# plot cumsum
fig = px.line(interactions_per_day, x='Date', y='total_sum', color='State', title="Interactions")

fig.update_layout(template='presentation',
                  # rename total_sum to 'Total replies'
                  showlegend=False,
                  xaxis_title='',
                  yaxis_title='Cummulative sum')
fig.show()


In [79]:
# plot cumsum
fig = px.line(interactions_per_day, x='Date', y='total_sum', color='State', title="Parallel trends assumption check",log_y=True)

fig.update_layout(template='presentation',
                  # rename total_sum to 'Total replies'
                  showlegend=True,
                  xaxis_title='',
                  yaxis_title='Cummulative sum')
fig.show()


In [80]:
interactions_per_day

Unnamed: 0,Date,replies,State,total_sum
0,2023-03-01,3,Bahia,3
1,2023-03-01,24,Rio de Janeiro,24
2,2023-03-02,4,Bahia,7
3,2023-03-02,13,Rio de Janeiro,37
4,2023-03-03,3,Bahia,10
...,...,...,...,...
240,2023-06-30,33,Rio de Janeiro,2349
241,2023-07-01,2,Bahia,658
242,2023-07-01,11,Rio de Janeiro,2360
243,2023-07-02,0,Bahia,658


# Plot trendline before

In [81]:
interactions_per_day[interactions_per_day.Date < INTERVENTION_DATE]

Unnamed: 0,Date,replies,State,total_sum
0,2023-03-01,3,Bahia,3
1,2023-03-01,24,Rio de Janeiro,24
2,2023-03-02,4,Bahia,7
3,2023-03-02,13,Rio de Janeiro,37
4,2023-03-03,3,Bahia,10
...,...,...,...,...
170,2023-05-26,7,Rio de Janeiro,1479
171,2023-05-27,4,Bahia,521
172,2023-05-27,26,Rio de Janeiro,1505
173,2023-05-28,0,Bahia,521


In [82]:
fig_pre = px.scatter(interactions_per_day[interactions_per_day.Date < INTERVENTION_DATE], y='replies', x='Date', color='State', facet_col_wrap=3, template='presentation', trendline='ols',
             title='',
             labels={'total': 'replies', 'Date': 'Date'})


# change the x-label to show only the months
fig_pre.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
)

fig_pre.update_yaxes(title='Interactions with gun violence reports')

fig_pre.update_layout(
    xaxis=dict(
        range=['2023-02-27', '2023-05-30'],
        showgrid=True,
    )
)

fig_pre.update_xaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig_pre.update_yaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig_pre.update_yaxes(zeroline=False)

fig_pre.show()

In [83]:
fig_pre.write_image(img_folder / "trendline.png", width=1000, height=600, scale=2)

# Plot trendline after

In [84]:
fig_pos = px.scatter(interactions_per_day, y='replies', x='Date', color='State', facet_col_wrap=3, template='presentation', trendline='ols',
             title='',
             labels={'total': 'replies', 'Date': 'Date'})

# change the x-label to show only the months
fig_pos.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
)

fig_pos.update_yaxes(title='Interactions with gun violence reports')

fig_pos.update_layout(
    xaxis=dict(
        range=['2023-02-27', '2023-06-30'],
        showgrid=True,
    )
)
fig_pos.add_vline(x=INTERVENTION_DATE, line_width=3, line_dash="dash", line_color="green")
fig_pos.add_annotation(
    x=INTERVENTION_DATE, y=55,
    text='Intervention',
    xshift=60,
    
    showarrow=False,
)

fig_pos.update_xaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig_pos.update_yaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig_pos.update_yaxes(zeroline=False)

fig_pos.show()

In [85]:
fig_pos.write_image(img_folder / "trendline-after.png", width=1000, height=600, scale=2)

# Cummulative sum

In [86]:
fig2 = make_subplots(rows=1, cols=2, subplot_titles=("a) Bahia", "b) Rio de Janeiro"))

fig2.add_trace(
    go.Scatter(x=df[df.treatment == 0].index, y=df[df.treatment == 0].replies.cumsum()),
    row=1, col=1
)

fig2.add_trace(
    go.Scatter(x=df[df.treatment == 0].index, y=df[df.treatment == 1].replies.cumsum()),
    row=1, col=2
)
INTERVENTION_DATE = '2023-05-29'

fig2.add_vline(x=INTERVENTION_DATE, line_width=3, line_dash="dash", line_color="green")

fig2.update_layout(showlegend=False)


fig2.add_annotation(
    x=INTERVENTION_DATE, y=0,
    text="Intervention",
    showarrow=False,
    yshift=100,
    xshift=-60,

)

fig2.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
)
fig2.update_layout(
    # show x-axis as date but only month
    xaxis=dict(
        tickformat='%b',
        tickmode='array',
        tickvals=['2023-03-01', '2023-04-01', '2023-05-01','2023-06-01','2023-07-01'],
        ticktext=['Mar 2023', 'Apr', 'May','Jun','Jul'],
        title='',
    ),
)
# make background transparent
fig2.update_layout(
    template="presentation",
    plot_bgcolor="rgba(0,0,0,0)",
    paper_bgcolor="rgba(0,0,0,0)",
)
fig2.update_yaxes(title='Interactions with gun violence reports', row=1, col=1)

fig2.update_xaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig2.update_yaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig2.update_yaxes(zeroline=False)

In [87]:
fig2.write_image(img_folder / "rioba-cumsum.png")