In [7]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import re
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
from pmdarima.arima import auto_arima
from prophet import Prophet

In [8]:

df = pd.read_csv("datasets/covid_19_data.csv")
df


Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
26708,26709,05/19/2020,Wyoming,US,2020-05-20 02:32:19,776.0,10.0,0.0
26709,26710,05/19/2020,Xinjiang,Mainland China,2020-05-20 02:32:19,76.0,3.0,73.0
26710,26711,05/19/2020,Yukon,Canada,2020-05-20 02:32:19,11.0,0.0,11.0
26711,26712,05/19/2020,Yunnan,Mainland China,2020-05-20 02:32:19,185.0,2.0,183.0


In [9]:
df.dtypes

SNo                  int64
ObservationDate     object
Province/State      object
Country/Region      object
Last Update         object
Confirmed          float64
Deaths             float64
Recovered          float64
dtype: object

In [10]:
#Nomes de colunas não devem ter letras maiúsculas e nem caracteres especiais. Vamos utilizar uma função para corrigir os nomes das colunas.
def corrige_colunas(col_name):
    return re.sub(r"[/| ]", "", col_name).lower()

In [11]:
df.columns = [corrige_colunas(col) for col in df.columns]

In [12]:
df

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
26708,26709,05/19/2020,Wyoming,US,2020-05-20 02:32:19,776.0,10.0,0.0
26709,26710,05/19/2020,Xinjiang,Mainland China,2020-05-20 02:32:19,76.0,3.0,73.0
26710,26711,05/19/2020,Yukon,Canada,2020-05-20 02:32:19,11.0,0.0,11.0
26711,26712,05/19/2020,Yunnan,Mainland China,2020-05-20 02:32:19,185.0,2.0,183.0


In [13]:
#Análises

In [14]:
df.loc[df.countryregion == 'Brazil']

Unnamed: 0,sno,observationdate,provincestate,countryregion,lastupdate,confirmed,deaths,recovered
82,83,01/23/2020,,Brazil,1/23/20 17:00,0.0,0.0,0.0
2455,2456,02/26/2020,,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0
2559,2560,02/27/2020,,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0
2668,2669,02/28/2020,,Brazil,2020-02-26T23:53:02,1.0,0.0,0.0
2776,2777,02/29/2020,,Brazil,2020-02-29T21:03:05,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...
24850,24851,05/15/2020,,Brazil,2020-05-16 02:32:19,220291.0,14962.0,84970.0
25227,25228,05/16/2020,,Brazil,2020-05-17 02:32:32,233511.0,15662.0,89672.0
25604,25605,05/17/2020,,Brazil,2020-05-18 02:32:21,241080.0,16118.0,94122.0
25981,25982,05/18/2020,,Brazil,2020-05-19 02:32:18,255368.0,16853.0,100459.0


In [16]:
# Casos confirmados

In [17]:
brasil = df.loc[(df.countryregion == 'Brazil') & (df.confirmed > 0)]

In [18]:
px.line?

[1;31mSignature:[0m
[0mpx[0m[1;33m.[0m[0mline[0m[1;33m([0m[1;33m
[0m    [0mdata_frame[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mx[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0my[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mline_group[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcolor[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mline_dash[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0msymbol[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhover_name[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhover_data[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcustom_data[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mtext[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfacet_row[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mfacet_col[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [

In [19]:
px.line(brasil, 'observationdate', 'confirmed', 
        labels={'observationdate':'Data', 'confirmed':'Número de casos confirmados'},
       title='Casos confirmados no Brasil')

In [20]:
# Vamos implementar uma função para fazer a contagem de novos casos
brasil['novoscasos'] = list(map(
    lambda x: 0 if (x==0) else brasil['confirmed'].iloc[x] - brasil['confirmed'].iloc[x-1],
    np.arange(brasil.shape[0])
))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [21]:
# Visualizando
px.line(brasil, x='observationdate', y='novoscasos', title='Novos casos por dia',
       labels={'observationdate': 'Data', 'novoscasos': 'Novos casos'})

In [22]:
#Mortes

In [24]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=brasil.observationdate, y=brasil.deaths, name='Mortes', mode='lines+markers',
              line=dict(color='red'))
)
#Edita o layout
fig.update_layout(title='Mortes por COVID-19 no Brasil',
                   xaxis_title='Data',
                   yaxis_title='Número de mortes')
fig.show()