<div align="center">

  <img alt="Coronavírus" width="85%" src="img/covid19.png">
  <br>

</div>

## **Projeto 02 - Análise dos Dados do COVID-19 no Brasil - [a]()**

###### **Data Science na Prática 4.0** [sigmoidal.ai](https://sigmoidal.ai)

*by[Jhefferson Z. Andreatta](https://www.linkedin.com/in/e-jhefferson-zardin-andreatta-8ab67723a/)*

---

In [2]:
!pip install datatable -q

In [3]:
import os
import shutil
import zipfile
import numpy as np
import pandas as pd
import datatable as dt
import matplotlib.pyplot as plt

In [4]:
CSV_FILE = 'https://github.com/owid/covid-19-data/raw/master/public/data/owid-covid-data.csv'
dt_df = dt.fread(CSV_FILE)

df_word = dt_df.to_pandas()
df_word['date'] = pd.to_datetime(df_word['date'])

# df_word = pd.read_csv(DATA_PATH, parse_dates=['date'])

In [5]:
df_word.shape

(429435, 67)

In [6]:
df_word.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-05,0.0,0.0,,0.0,0.0,,...,,37.75,0.5,64.83,0.51,41128772,,,,
1,AFG,Asia,Afghanistan,2020-01-06,0.0,0.0,,0.0,0.0,,...,,37.75,0.5,64.83,0.51,41128772,,,,
2,AFG,Asia,Afghanistan,2020-01-07,0.0,0.0,,0.0,0.0,,...,,37.75,0.5,64.83,0.51,41128772,,,,
3,AFG,Asia,Afghanistan,2020-01-08,0.0,0.0,,0.0,0.0,,...,,37.75,0.5,64.83,0.51,41128772,,,,
4,AFG,Asia,Afghanistan,2020-01-09,0.0,0.0,,0.0,0.0,,...,,37.75,0.5,64.83,0.51,41128772,,,,


In [5]:
df_word.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [7]:
df_word.dtypes[:20]

iso_code                                   object
continent                                  object
location                                   object
date                               datetime64[ns]
total_cases                               float64
new_cases                                 float64
new_cases_smoothed                        float64
total_deaths                              float64
new_deaths                                float64
new_deaths_smoothed                       float64
total_cases_per_million                   float64
new_cases_per_million                     float64
new_cases_smoothed_per_million            float64
total_deaths_per_million                  float64
new_deaths_per_million                    float64
new_deaths_smoothed_per_million           float64
reproduction_rate                         float64
icu_patients                              float64
icu_patients_per_million                  float64
hosp_patients                             float64


In [9]:
(df_word.isnull().sum() / df_word.shape[0]).sort_values(ascending=False).iloc[58:87]
# type(df_word.isnull().sum().sort_values(ascending=False))

new_deaths_per_million      0.043841
total_deaths_per_million    0.041056
total_cases                 0.041056
total_deaths                0.041056
total_cases_per_million     0.041056
location                    0.000000
iso_code                    0.000000
date                        0.000000
population                  0.000000
dtype: float64

In [12]:
df_wBrasil = df_word.copy()
df_wBrasil = df_wBrasil[df_wBrasil['location'] == 'Brazil']

type(df_wBrasil.isnull().sum())

pandas.core.series.Series

In [None]:
# df_wBrasil.dtypes.iloc[58:87]
df_wBrasil.dtypes.value_counts()
df_wBrasil.loc[df_wBrasil['date'].dt.year == 2024]

In [None]:
df_wBrasil.loc[(df_wBrasil['iso_code'] == 'BRA') & (df_wBrasil['date'] == '2023-03-03')]

In [None]:
# Caminho para o arquivo .zip
zip_path = 'covid19/HIST_PAINEL_COVIDBR_27jul2024.zip'

# Diretório temporário para extrair os arquivos
extract_dir = 'covid19/temp'

# Cria o diretório temporário se ele não existir
os.makedirs(extract_dir, exist_ok=True)

# Lista para armazenar DataFrames
dataframes = []

# Abre o arquivo .zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    # Extrai todos os arquivos para o diretório temporário
    zip_ref.extractall(extract_dir)
    
    # Itera sobre os arquivos extraídos
    for file_name in zip_ref.namelist():
        if file_name.endswith('.csv'):
            # Caminho completo do arquivo extraído
            file_path = os.path.join(extract_dir, file_name)
            try:
                # Lê o arquivo .csv em um DataFrame e adiciona à lista
                df = pd.read_csv(file_path, sep=';', on_bad_lines='skip')
                dataframes.append(df)
                # print(f"Lido {file_name}")
            except Exception as e:
                print(f"Erro ao ler {file_name}: {e}")

# Exlui a pasta temp
if os.path.exists(extract_dir):
    shutil.rmtree(extract_dir)

# # Agora, 'dataframes' contém uma lista de DataFrames para cada arquivo .csv
# for i, df in enumerate(dataframes):
#     print(f"DataFrame {i+1}:\n", df.head())

# Dataframe único, com todas os anos
df_brasil = pd.concat(dataframes, ignore_index=False)

In [None]:
df_brasil

In [None]:
DATA_PATH = 'covid19/HIST_PAINEL_COVIDBR_27jul2024.zip'
df_brasil = pd.read_csv(DATA_PATH)

In [None]:
# https://covid.ourworldindata.org/data/owid-covid-data.json
# https://covid.ourworldindata.org/data/owid-covid-data.csv
# blob:https://covid.saude.gov.br/dd2ea195-37f5-426f-860f-8e47c9a8684d