# Data about the Zika virus outbreak.

### 1. Get content

In [1]:
import pandas as pd
import requests
import os
zika_data = 'data/zika.csv'

if not os.path.exists(zika_data):
    content = "https://raw.githubusercontent.com/BuzzFeedNews/zika-data/master/data/parsed/brazil/brazil-microcephaly-2016-01-23-table-1.csv"
    data = pd.read_csv(content)
    data.to_csv(zika_data)
else:
    data = pd.read_csv(zika_data)
data.head()

Unnamed: 0.1,Unnamed: 0,no,state,cases_under_investigation,cases_confirmed,cases_discarded,cases_reported_total
0,0,1,Alagoas,158,0,0,158
1,1,2,Bahia,471,35,27,533
2,2,3,Ceará,218,4,7,229
3,3,4,Maranhão,119,0,15,134
4,4,5,Paraíba,497,31,181,709


### 2. Remove unnamed row

In [2]:
column_names={'no':'no', 'state':'State', 'cases_under_investigation':'Under Investigation', 'cases_confirmed' :'Confirmed','cases_discarded':'Discarded','cases_reported_total':'Total'}
data = (data
    .rename(columns=column_names)
    .dropna(thresh=4)
    [['no', 'State', 'Under Investigation', 'Confirmed', 'Discarded', 'Total']]
    .set_index('no')
    .sort_index())
data.head()

Unnamed: 0_level_0,State,Under Investigation,Confirmed,Discarded,Total
no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Alagoas,158,0,0,158
2,Bahia,471,35,27,533
3,Ceará,218,4,7,229
4,Maranhão,119,0,15,134
5,Paraíba,497,31,181,709


### 3. Tidy data 

In [3]:
tidy=pd.melt(data.reset_index(),id_vars=['no','State'], value_vars=list(data.columns)[2:],var_name="Case Type", value_name='No of cases')
tidy.sort_values(by='State')
tidy=tidy.set_index('no')
tidy.head()

Unnamed: 0_level_0,State,Case Type,No of cases
no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Alagoas,Confirmed,0
2,Bahia,Confirmed,35
3,Ceará,Confirmed,4
4,Maranhão,Confirmed,0
5,Paraíba,Confirmed,31
