# Projeto 1 Uniasselvi

## Dados da COVID-19

## Fonte: https://brasil.io/dataset/covid19/caso/

### Importando bibliotecas necessárias

In [None]:
!conda install pandas -y

In [1]:
# Biblioteca de dataframes do Python
import pandas as pd

# Biblioteca para análise científica
import numpy as np

In [None]:
pd.set_option('display.max_rows', None)

In [2]:
# Extrair o conteúdo do gz
data = pd.read_csv( 'caso_full.csv.gz', compression='gzip', error_bad_lines=False )

### Verificando apenas as 5 primeiras linhas

In [3]:
data.head()

Unnamed: 0,city,city_ibge_code,date,epidemiological_week,estimated_population,estimated_population_2019,is_last,is_repeated,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,last_available_date,last_available_death_rate,last_available_deaths,order_for_place,place_type,state,new_confirmed,new_deaths
0,Rio Branco,1200401.0,2020-03-17,202012,413418.0,407319.0,False,False,3,0.72566,2020-03-17,0.0,0,1,city,AC,3,0
1,,12.0,2020-03-17,202012,894470.0,881935.0,False,False,3,0.33539,2020-03-17,0.0,0,1,state,AC,3,0
2,Rio Branco,1200401.0,2020-03-18,202012,413418.0,407319.0,False,False,3,0.72566,2020-03-18,0.0,0,2,city,AC,0,0
3,,12.0,2020-03-18,202012,894470.0,881935.0,False,False,3,0.33539,2020-03-18,0.0,0,2,state,AC,0,0
4,Rio Branco,1200401.0,2020-03-19,202012,413418.0,407319.0,False,False,4,0.96754,2020-03-19,0.0,0,3,city,AC,1,0


## Verificando número de linhas (registros) e colunas (atributos)

In [None]:
data.shape

In [4]:
f"O dataframe possui {data.shape[0]} registros e {data.shape[1]} colunas" 

'O dataframe possui 2135152 registros e 18 colunas'

### Verificando todos atributos (colunas) da tabela

In [None]:
list(data.columns) 

## Verificando os tipos de dados de cada coluna

In [5]:
data.dtypes

city                                              object
city_ibge_code                                   float64
date                                              object
epidemiological_week                               int64
estimated_population                             float64
estimated_population_2019                        float64
is_last                                             bool
is_repeated                                         bool
last_available_confirmed                           int64
last_available_confirmed_per_100k_inhabitants    float64
last_available_date                               object
last_available_death_rate                        float64
last_available_deaths                              int64
order_for_place                                    int64
place_type                                        object
state                                             object
new_confirmed                                      int64
new_deaths                     

In [14]:
pd.set_option('display.max_columns', None)

In [15]:
data.select_dtypes(include='object')

Unnamed: 0,city,date,last_available_date,place_type,state
0,Rio Branco,2020-03-17,2020-03-17,city,AC
1,,2020-03-17,2020-03-17,state,AC
2,Rio Branco,2020-03-18,2020-03-18,city,AC
3,,2020-03-18,2020-03-18,state,AC
4,Rio Branco,2020-03-19,2020-03-19,city,AC
...,...,...,...,...,...
2135147,Tupirama,2021-05-25,2021-05-23,city,TO
2135148,Tupiratins,2021-05-25,2021-05-23,city,TO
2135149,Wanderlândia,2021-05-25,2021-05-23,city,TO
2135150,Xambioá,2021-05-25,2021-05-23,city,TO


## Convertendo o tipo de dado da coluna 'date' para datetime

In [4]:
data['date'] = pd.to_datetime( data['date'] )

In [5]:
data.dtypes

city                                                     object
city_ibge_code                                          float64
date                                             datetime64[ns]
epidemiological_week                                      int64
estimated_population                                    float64
estimated_population_2019                               float64
is_last                                                    bool
is_repeated                                                bool
last_available_confirmed                                  int64
last_available_confirmed_per_100k_inhabitants           float64
last_available_date                                      object
last_available_death_rate                               float64
last_available_deaths                                     int64
order_for_place                                           int64
place_type                                               object
state                                   

In [None]:
data.head()

## 1 - Qual a data mais antiga dos casos?

In [None]:
data[['date']].sort_values('date', ascending=False)

## 2 - Quantos casos teve no estado de São Paulo

In [None]:
data['state'].unique()

In [None]:
data[data['state'] == 'SP'].shape[0]

## 3 - Quantos casos apenas na cidade de São Paulo

In [None]:
# Primeiramente saber quais são as cidades do estado de São Paulo
# loc
data.loc[data['state'] == "SP", 'city'].unique()

In [None]:
data[data['city'] == "São Paulo"].shape[0]

## 4 - Quantidade de cidades que estao com os campos nulos

In [None]:
data[data['city'].isnull()].shape[0]

## 5 - Gerar um relatório em CSV  estado, população estimada, últimos casos confirmados, novas mortes

In [None]:
list(data.columns)

In [6]:
col = data[['state', 'estimated_population', 'last_available_confirmed', 'new_deaths']].sort_values('estimated_population', ascending=False).head(10000)
report = col

In [7]:
report

Unnamed: 0,state,estimated_population,last_available_confirmed,new_deaths
1837165,SP,46289333.0,24041,224
2069281,SP,46289333.0,2956210,689
1887522,SP,46289333.0,500301,383
1888165,SP,46289333.0,514197,330
2004581,SP,46289333.0,1702294,54
...,...,...,...,...
316558,DF,3055149.0,164861,11
316056,DF,3055149.0,556,1
316054,DF,3055149.0,518,1
316053,DF,3055149.0,527,1


## Exportar para um arquivo CSV o dataframe

In [8]:
report.to_csv('./covid-19/report_covid-19.csv')

## Ingestão dos dados do relatório para o MySQL

In [None]:
!conda install sqlalchemy -y

In [None]:
!conda install psycopg2 -y

In [9]:
from sqlalchemy import create_engine

In [10]:
engine = create_engine(
    'postgresql+psycopg2://postgres:root@localhost/projeto1_uniasselvi'
)

In [11]:
# Ingestão
# append, adiciona outra tabela se já existir uma tabela de mesmo nome
report.to_sql('covid_19', con=engine, index=False, if_exists='append')

In [14]:
!pip install PyMySQL



In [15]:
engine = create_engine(
    'mysql+pymysql://root:root@localhost/projeto1_uniasselvi'
)

In [16]:
# Ingestão
# append, adiciona outra tabela se já existir uma tabela de mesmo nome
report.to_sql('covid_19', con=engine, index=False, if_exists='append')