In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import re

In [2]:
START_DATE = '2023-01-01'
END_DATE = '2023-05-29'
INTERVENTION_DATE = '2023-05-29'


allcases_filename = Path('../..') / 'data' / 'fogocruzado' / 'cases_1jul22_3jul23.csv'
interactions_file = Path('../..') / 'data' / 'my_intervention' / 'interactions_per_day_rjba.csv'

img_folder = Path('../..') / 'figs/'

# Load cases

In [3]:
# final_filename = Path('../..') / 'data' / 'my_intervention' / 'interventions.csv'

allcases = pd.read_csv(allcases_filename, encoding='ISO-8859-1', sep=';', parse_dates=['data_ocorrencia'], dayfirst=True, on_bad_lines='warn')

estados = {
    19: 'RJ',
    17: 'PE',
    5: 'BA',
}

allcases['estado'] = allcases['estado_id'].map(estados)

allcases['nome_cidade'] = allcases.nome_cidade.fillna('N. identificado')

allcases['populacao_cidade'] = allcases.populacao_cidade.fillna(0)

allcases = allcases[(allcases['data_ocorrencia'] >= START_DATE) & (allcases['data_ocorrencia'] <= END_DATE)]


rank_rio = allcases[allcases['nome_cidade'] == 'Rio de Janeiro'].groupby('data_ocorrencia').count()['id_ocorrencia']

rank_rio

data_ocorrencia
2023-01-01    12
2023-01-02     4
2023-01-03     4
2023-01-04     4
2023-01-05     7
              ..
2023-05-25    16
2023-05-26     6
2023-05-27     2
2023-05-28     4
2023-05-29     9
Name: id_ocorrencia, Length: 149, dtype: int64

# Load interactions

In [4]:
interactions = pd.read_csv(interactions_file)
interactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 552 entries, 0 to 551
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    552 non-null    object
 1   State   552 non-null    object
 2   total   552 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 13.1+ KB


In [5]:

interactions = interactions[(interactions['Date'] >= START_DATE) & (interactions['Date'] <= END_DATE)]

interactions.info()

<class 'pandas.core.frame.DataFrame'>
Index: 447 entries, 0 to 446
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    447 non-null    object
 1   State   447 non-null    object
 2   total   447 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 14.0+ KB


In [6]:
interactions_per_day_rj = interactions[interactions['State'] == 'Rio de Janeiro'].copy()

interactions_per_day_rj.shape

(149, 3)

In [7]:
rank_rio.shape

(149,)

# Check

In [8]:
# plot rank_rio 
fig = px.bar(rank_rio, title='<b>a) Gun violence events recorded in Rio de Janeiro</b>',template='presentation')
# change x axis and y label and remove legend
fig.update_layout(xaxis_title='Days', yaxis_title='Daily event count')

fig.update_xaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig.update_yaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)
# disable legend
fig.update_layout(showlegend=False)
fig.show()

In [9]:
# plot rank_rio 
fig2 = px.bar(interactions_per_day_rj, x='Date', y='total', title='<b> b) @FogoCruzadoRJ interactions with gun violence reports on Twitter</b>',template='presentation')

fig2.update_layout(xaxis_title='Days', yaxis_title='Replies to gun violence reports')

fig2.update_xaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)

fig2.update_yaxes(showline=True,
         linewidth=1,
         linecolor='black',
         mirror=True)
# disable legend
fig2.update_layout(showlegend=False)
fig2.show()

In [10]:
FONT_SIZE = 25
# increase fonts size of axis and title
fig.update_layout(
    font=dict(
        size=FONT_SIZE,
)
)

fig2.update_layout(
    font=dict(
        size=FONT_SIZE,
)
)

In [11]:
fig1_name = 'events.png'
fig2_name = 'interactions.png'
width = 1200
height = 800
scale = 2

fig.write_image(str(img_folder / fig1_name), width=width, height=height, scale=scale)
fig2.write_image(str(img_folder / fig2_name), width=width, height=height, scale=scale)

!convert +append {img_folder / fig1_name} {img_folder / fig2_name} {img_folder / 'events_interactions.png'}

# Scatterplot

In [12]:
y = rank_rio.values
x = interactions_per_day_rj['total'].values


px.scatter(x=x, y=y, title='c) Correlation between events registered by Fogo Cruzado Institute and interactions on Twitter', template='presentation',trendline='ols')


# Cummulative sum

In [13]:
interactions_per_day_rj

Unnamed: 0,Date,State,total
1,2023-01-01,Rio de Janeiro,74
4,2023-01-02,Rio de Janeiro,15
7,2023-01-03,Rio de Janeiro,13
10,2023-01-04,Rio de Janeiro,7
13,2023-01-05,Rio de Janeiro,12
...,...,...,...
433,2023-05-25,Rio de Janeiro,25
436,2023-05-26,Rio de Janeiro,9
439,2023-05-27,Rio de Janeiro,27
442,2023-05-28,Rio de Janeiro,19


In [14]:
# make cumulative sum
interactions_per_day_rj['total_sum'] = interactions_per_day_rj['total'].cumsum()

# plot cumsum
fig = px.line(interactions_per_day_rj, x='Date', y='total_sum', title="")

fig.update_layout(template='presentation',
                  showlegend=False,
                  xaxis_title='',
                  yaxis_title='Cummulative sum')

fig.show()
