In [10]:
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999
pd.options.display.max_colwidth = 100

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot, offline
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

import requests

from os import listdir
import unicodedata

from paths import *
from scripts.manipulation import remove_acentos
from scripts.manipulation import normalize_cols
from scripts import scrap_data
from scripts import manipulation
from scripts import io
from scripts import vis_graphs

from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import yaml


# LOAD and SAVE DATA from WORDMETERS

In [11]:
new_data, df_final = scrap_data.load_data()

In [12]:
new_data.sort_values(by='Confirmed', ascending=False).head(30).reset_index(drop=True)

Unnamed: 0,Date_last_updated_AEDT,date,countryname,Confirmed,Deaths,Recovered
0,8/27/2020 00:17,2020-08-27,0,24332275,829665,16873305
1,8/27/2020 00:17,2020-08-27,World,24332275,829665,16873305
2,8/27/2020 00:17,2020-08-27,US,6000365,183653,3313861
3,8/27/2020 00:17,2020-08-27,Brazil,3722004,117756,2908848
4,8/27/2020 00:17,2020-08-27,India,3307749,60629,2523443
5,8/27/2020 00:17,2020-08-27,Russia,970865,16683,786150
6,8/27/2020 00:17,2020-08-27,South Africa,615701,13502,525242
7,8/27/2020 00:17,2020-08-27,Peru,613378,28124,421877
8,8/27/2020 00:17,2020-08-27,Mexico,573888,62076,396758
9,8/27/2020 00:17,2020-08-27,Colombia,572270,18184,407121


In [13]:
df_wordmeters = io.load_total_table()
df_wordmeters = manipulation.create_all_country_total_data(df_wordmeters, vars = ['confirmed','deaths','recovered','active'])


In [14]:
df_wordmeters.sort_values(by='confirmed', ascending=False).reset_index(drop=True)['countrycode'].unique()

array(['US', 'BR', 'IN', 'RU', 'ZA', 'PE', 'MX', 'CO', 'ES', 'CL', 'AR',
       'IR', 'UK', 'SA', 'BD', 'PK', 'IT', 'TR', 'FR', 'DE', 'IQ', 'PH',
       'ID', 'CA', 'QA', 'BO', 'EC', 'UA', 'IL', 'KZ', 'EG', 'DO', 'PA',
       'SE', 'CN', 'OM', 'BE', 'KW', 'RO', 'BY', 'GT', 'NL', 'AE', 'JP',
       'PL', 'HN', 'SG', 'PT', 'MA', 'NG', 'BH', 'ET', 'GH', 'KG', 'AM',
       'DZ', 'VE', 'CH', 'UZ', 'AF', 'CR', 'AZ', 'MD', 'NP', 'KE', 'RS',
       'IE', 'AT', 'AU', 'SV', 'PS', 'CM', 'KR', 'BA', 'CI', 'DK', 'BG',
       'PY', 'LB', 'SN', 'SD', 'ZM', 'NO', 'MY', 'GR', 'GN', 'AL', 'HR',
       'GA', 'FI', 'LU', 'MV', 'MR', 'DJ', 'HU', 'GQ', 'ME', 'NI', 'CG',
       'CU', 'SR', 'RW', 'SK', 'TH', 'SO', 'TN', 'LK', 'LT', 'SI', 'GM',
       'EE', 'BJ', 'IS', 'BS', 'JO', 'MT', 'JM', 'NZ', 'UY', 'CY', 'GE',
       'TT', 'LV', 'BF', 'TG', 'LR', 'NE', 'AD', 'GY', 'VN', 'TD', 'SM',
       'TZ', 'TW', 'MN', 'KH', 'BT', 'BB', 'MC', 'BN', 'SC', 'LI', 'AG',
       'FJ', 'LC', 'VA'], dtype=object)

## European Centre for Disease Prevention and Contro

https://data.europa.eu/euodp/en/data/dataset/covid-19-coronazvirus-data

In [15]:
# df_eucenter = pd.read_excel('https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide.xlsx')
# dd = manipulation.manipulate_eucenter(df_eucenter)

### Generate Graphs for Multiple Countrys

In [16]:
df = df_wordmeters.copy()

codes = ['BR','IT','ES','US','CN','KR','DE','FR','UK','IN', 'TR','MX','RU']
mask_countrys = df['countrycode'].isin(codes)

In [17]:
from scripts import vis_layout
themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)

themes = themes['all_countrys']
for var in themes['vars'].keys():
    for scale in themes['axis_legend']['scale'].keys():
        fig = vis_graphs.all_countrys(df[mask_countrys], var, scale, themes, save=False)
        
#         plot(fig, filename=f"../images/multipleCountry/{var}_{scale}.html", auto_open=False)
        plot(fig, filename=f"../../sample_pages/pages/covid-19/images/multipleCountry/{var}_{scale}.html", auto_open=False)


In [18]:
fig

## By Country

In [19]:
df = df_wordmeters.copy()

themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['unique_country_daily']

codes = ['BR','IT','ES','US','CN','KR','DE','FR','UK','IN', 'TR','MX','RU']

# codes =['BR']
for countrycode in codes:
    mask = ((df['countrycode']==countrycode) & (df['confirmed']>0))
    fig = vis_graphs.unique_country(df[mask],themes)
#     plot(fig, filename=f"../images/singleCountry/{countrycode}.html", auto_open=False)
    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/singleCountry/{countrycode}_daily.html", auto_open=False)


themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['unique_country_cum']
# codes =['BR']
for countrycode in codes:
    mask = ((df['countrycode']==countrycode) & (df['confirmed']>0))
    fig = vis_graphs.unique_country(df[mask],themes)
#     plot(fig, filename=f"../images/singleCountry/{countrycode}.html", auto_open=False)
    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/singleCountry/{countrycode}_cum.html", auto_open=False)


In [20]:
fig

## Estados

In [21]:
from scripts.io import read_sheets
from scripts import manipulation
config = yaml.load(open('../configs/config.yaml', 'r'), Loader=yaml.FullLoader)

io.update_ms_data()
print('io done!')

df = pd.read_csv("../data/ministerio_da_saude/last_data_ms_covid19.csv")

df_states = manipulation.manipule_mytable(df,config['ms_table'])

print("States table, Done!")

[WDM] - Getting latest mozilla release info for v0.27.0
[WDM] - Trying to download new driver from https://github.com/mozilla/geckodriver/releases/download/v0.27.0/geckodriver-v0.27.0-linux64.tar.gz
[WDM] - Driver has been saved in cache [/home/m/.wdm/drivers/geckodriver/linux64/v0.27.0]


downloaded
renamed
98.72476196289062
8.864017009735107
saved
io done!
States table, Done!


## Multiple

In [22]:
from scripts import vis_graphs

themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['brasil_vis_cumulative']

for var in themes['vars'].keys():
    for scale in themes['axis_legend']['scale'].keys():
        fig = vis_graphs.brasil_vis_cum(df_states, var,scale, themes)
        
        plot(fig, filename=f"../../sample_pages/pages/covid-19/images/brasil/cumulative/{var}_{scale}.html", auto_open=False)


## Diario

In [23]:
themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['brasil_vis_daily']

codes = df_states['state'].unique()

for state in codes:
    mask = ((df_states['state']==state) & (df_states['confirmed']>0))
    fig = vis_graphs.unique_country(df_states[mask], themes)

    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/brasil/daily/{state}_daily.html", auto_open=False)
    
    
themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['brasil_vis_cum']


for state in codes:
    mask = ((df_states['state']==state) & (df_states['confirmed']>0))
    fig = vis_graphs.unique_country(df_states[mask], themes)

    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/brasil/daily/{state}_cum.html", auto_open=False)

## Cidades

In [24]:
from scripts import io

!rm '../data/brIO/caso_full.csv'

!cd '../data/brIO/'
!wget 'https://data.brasil.io/dataset/covid19/caso_full.csv.gz' -P ../data/brIO/
!gunzip '../data/brIO/caso_full.csv.gz'


brio, brio_raw = io.load_brasilIO()

print("Br.IO table, Done!")

--2020-08-27 00:20:52--  https://data.brasil.io/dataset/covid19/caso_full.csv.gz
Resolving data.brasil.io (data.brasil.io)... 172.67.71.45, 104.26.8.175, 104.26.9.175, ...
Connecting to data.brasil.io (data.brasil.io)|172.67.71.45|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16085068 (15M) [application/gzip]
Saving to: ‘../data/brIO/caso_full.csv.gz’


2020-08-27 00:20:54 (7,41 MB/s) - ‘../data/brIO/caso_full.csv.gz’ saved [16085068/16085068]

Br.IO table, Done!


In [25]:
### Load Cities
cities = io.load_cities(brio_raw)

In [26]:
cities.tail()

Unnamed: 0,date,city,confirmed,new_confirmed,deaths,new_deaths
444,2020-06-24,São José dos Campos,2462.0,97.0,84.0,2.0
445,2020-06-25,São José dos Campos,2561.0,99.0,86.0,2.0
446,2020-06-26,São José dos Campos,2750.0,189.0,87.0,1.0
447,2020-06-27,São José dos Campos,2763.0,13.0,87.0,0.0
448,2020-06-28,São José dos Campos,2763.0,0.0,87.0,0.0


## Multiple Cities

In [27]:
from scripts import vis_graphs

themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['cities']

for var in themes['vars'].keys():
    for scale in themes['axis_legend']['scale'].keys():
        fig = vis_graphs.brasil_vis_cum(cities, var,scale, themes)
        
        plot(fig, filename=f"../../sample_pages/pages/covid-19/images/vale/cumulative/{var}_{scale}.html", auto_open=False)


## Diario


In [28]:
import unidecode

themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['cities_vis_daily']

codes = ['São Paulo', 'Taubaté', 'São José dos Campos']

for state in codes:
    mask = ((cities['city']==state) & (cities['confirmed']>0))
    fig = vis_graphs.unique_country(cities[mask], themes)
    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/vale/daily/{unidecode.unidecode(state.replace(' ','_').lower())}_daily.html", auto_open=False)

    
themes = yaml.load(open('../themes/sample_pages.yaml', 'r'), Loader=yaml.FullLoader)
themes = themes['cities_vis_cum']
for state in codes:
    mask = ((cities['city']==state) & (cities['confirmed']>0))
    fig = vis_graphs.unique_country(cities[mask], themes)
    plot(fig, filename=f"../../sample_pages/pages/covid-19/images/vale/daily/{unidecode.unidecode(state.replace(' ','_').lower())}_cum.html", auto_open=False)
    print(unidecode.unidecode(state.replace(' ','_').lower()))

sao_paulo
taubate
sao_jose_dos_campos


### Figures to UP

In [25]:
# from scripts import io

# f1_name = 'total_lin.html'
# f2_name = 'mortes_lin.html'
# f3_name = 'brasil_lin_total.html'
# f4_name = 'brasil_lin_mortes.html'
# # f5_name = "brasil_por_estado_confirmed.html"
# # f6_name = "brasil_por_estado_deaths.html"

# f1_path = f"../images/multipleCountry/{f1_name}"
# f2_path = f"../images/multipleCountry/{f2_name}"
# f3_path = f"../images/singleCountry/{f3_name}"
# f4_path = f"../images/singleCountry/{f4_name}"
# # f5_path = f"../images/brasil/{f5_name}"
# # f6_path = f"../images/brasil/{f6_name}"

# names_paths = {f1_name:f1_path,
#          f2_name:f2_path,
#          f3_name:f3_path,
#          f4_name:f4_path
#               }
# #          f5_name:f5_path,
# #          f6_name:f6_path}

# for name in names_paths.keys():
    
#     io.to_storage(bucket='sv-covid19',
#               bucket_folder='brasil',
#               file_name=name,
#               path_to_file=names_paths[name])