# Zika Map 

In [0]:
### MUST INCLUDE THIS IN FIRST CELL
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))
  
import plotly.plotly as py
import numpy as np
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import Contours, Histogram2dContour, Marker, Scatter
import pandas as pd
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
#import wget

In [0]:
py.sign_in('hannah101101', '9qyvhEf5VpOG5cdufpLb')

In [0]:
py.sign_in('hannah101101', '9qyvhEf5VpOG5cdufpLb')

url = "https://raw.githubusercontent.com/hutch-gwc/science_fair/master/data/infectious_diseases/zika_southAmerica.csv" # insert your text file URL here


df = pd.read_csv(url)


#### Clean col names

As you can see, the column names did not come through exactly as we would want them to. 
We are going to rename the first three columns and then drop the first row. 

In [55]:
df = df.rename(index=str, columns={"susp/con ZIKV cases": "year", "Unnamed: 1": "date", "Unnamed: 2": "Epi Week"})
df = df.iloc[1:]
df.head()

Unnamed: 0,year,date,Epi Week,Argentina,Bolivia,Brazil,Colombia,Ecuador,French Guiana,Guyana,Paraguay,Peru,Suriname,Venezuela
1,2016.0,3-Jan,1,0.0,2.0,7619.0,2883.0,1.0,41.0,0.0,22.0,0.0,245.0,241.0
2,,10-Jan,2,0.0,0.0,7785.0,3374.0,5.0,75.0,0.0,37.0,0.0,228.0,535.0
3,,17-Jan,3,0.0,3.0,8861.0,4558.0,9.0,106.0,0.0,27.0,0.0,394.0,1203.0
4,,24-Jan,4,0.0,0.0,9772.0,5804.0,4.0,186.0,0.0,31.0,0.0,408.0,2713.0
5,,31-Jan,5,0.0,2.0,12091.0,6358.0,2.0,199.0,3.0,46.0,0.0,352.0,5011.0


#### Clean year col
Next, we need to fill in the missing values for the year column. 
The first 52 rows should all say 2016 and the next 52 rows should all say 2017. 

In [56]:
df.loc[0:52,'year'] = 2016
df.loc[52:,'year'] = 2017
df.head()

Unnamed: 0,year,date,Epi Week,Argentina,Bolivia,Brazil,Colombia,Ecuador,French Guiana,Guyana,Paraguay,Peru,Suriname,Venezuela
1,2016,3-Jan,1,0.0,2.0,7619.0,2883.0,1.0,41.0,0.0,22.0,0.0,245.0,241.0
2,2016,10-Jan,2,0.0,0.0,7785.0,3374.0,5.0,75.0,0.0,37.0,0.0,228.0,535.0
3,2016,17-Jan,3,0.0,3.0,8861.0,4558.0,9.0,106.0,0.0,27.0,0.0,394.0,1203.0
4,2016,24-Jan,4,0.0,0.0,9772.0,5804.0,4.0,186.0,0.0,31.0,0.0,408.0,2713.0
5,2016,31-Jan,5,0.0,2.0,12091.0,6358.0,2.0,199.0,3.0,46.0,0.0,352.0,5011.0


#### Melt dataframe

We want the there to be a column called `country` and a column called `cases` rather than each country being its own column. 

In [57]:
df = pd.melt(df, id_vars=["year", "date", "Epi Week"], var_name='Country', value_name='Cases')
df.head()

Unnamed: 0,year,date,Epi Week,Country,Cases
0,2016,3-Jan,1,Argentina,0.0
1,2016,10-Jan,2,Argentina,0.0
2,2016,17-Jan,3,Argentina,0.0
3,2016,24-Jan,4,Argentina,0.0
4,2016,31-Jan,5,Argentina,0.0


#### Extract month from the data 
We want to add a new column which is the month for each of the cases. 
We can do this by pulling out the month from the data. 
Each date is in the format `D-MON` so we can "split" the date on the character `-` and only take the information from the right hand side. 

In [58]:
df["month"] = df["date"].apply(lambda x: x.split("-")[1])
df.head()

Unnamed: 0,year,date,Epi Week,Country,Cases,month
0,2016,3-Jan,1,Argentina,0.0,Jan
1,2016,10-Jan,2,Argentina,0.0,Jan
2,2016,17-Jan,3,Argentina,0.0,Jan
3,2016,24-Jan,4,Argentina,0.0,Jan
4,2016,31-Jan,5,Argentina,0.0,Jan


#### Sum cases by country/year 
"we are trying to add up the cases for each month for each country. It can count Argentina, but then gets tired and thinks Bolivia is 5. Problems" 

In [59]:
cases_by_year = []
for name, group in df.groupby(["Country", "year"]):
    cases = group["Cases"].sum()
    cases_by_year.append((name[0], name[1], cases))
cases_by_year = pd.DataFrame(cases_by_year, columns=["Country", "Year", "Cases"])
cases_by_year.head()

Unnamed: 0,Country,Year,Cases
0,Argentina,2016,26.0
1,Argentina,2017,825.0
2,Bolivia,2016,343.0
3,Bolivia,2017,1353.0
4,Brazil,2016,237234.0


#### Add in Lat/Long

In [60]:
lat_long = pd.read_csv("https://raw.githubusercontent.com/hutch-gwc/Rosalind/master/GWC%20pop%2C%20lat%2Clong%20data%20-%20lat_long%20(1).csv")
lat_long = lat_long.rename(index=str, columns={"population ": "population"})
lat_long.head()

Unnamed: 0,Country,lat,long,population
0,Argentina,-38.4,-63.6,44694198.0
1,Bolivia,-16.3,-63.6,11306341.0
2,Brazil,-14.2,-51.9,208846892.0
3,Colombia,4.6,-74.3,48168996.0
4,Ecuador,-1.8,-78.2,16498502.0


In [61]:
cases_by_year = pd.merge(cases_by_year, lat_long, on="Country")
cases_by_year["population"] = cases_by_year["population"].str.replace(',', '').astype(float)
cases_by_year["incidences"] = cases_by_year["Cases"] / (cases_by_year["population"] / 100000)
cases_by_year.head()

Unnamed: 0,Country,Year,Cases,lat,long,population,incidences
0,Argentina,2016,26.0,-38.4,-63.6,44694198.0,0.058173
1,Argentina,2017,825.0,-38.4,-63.6,44694198.0,1.845877
2,Bolivia,2016,343.0,-16.3,-63.6,11306341.0,3.033696
3,Bolivia,2017,1353.0,-16.3,-63.6,11306341.0,11.966736
4,Brazil,2016,237234.0,-14.2,-51.9,208846892.0,113.592306


In [0]:
# temp = []
# for name, group in cases_by_year.groupby("Year"):
#     catch = group["incidences"]
#     s = 0
#     for i in catch:
#         s=i+s
#     group["incidences"] = (group["incidences"] / s)*100
#     temp.append(group)
# temp = pd.concat(temp)
# cases_by_year = temp.copy()

In [63]:
temp = []
for name, group in cases_by_year.groupby("Country"):
    catch = group["incidences"]
    s = 0
    for i in catch:
        s=i+s
    group["incidences"] = (group["incidences"] / s)*100
    temp.append(group)
temp = pd.concat(temp)
cases_by_year = temp.copy()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



### Set-up plot

#### Cases

In [64]:
colors = {2016: 'rgb(189,215,231)', 2017: 'rgb(238, 105, 234)'}
cases = []  # this will hold all of the year-specific dictionaries. 

for year, group in cases_by_year.groupby(["Year"]):
    print(f"We are processing year {year}.")
    cases.append(go.Scattergeo(
        lon = group['long'], 
        lat = group['lat'],
        text = group['incidences'],
        name = year,
        marker = dict(
            size = group['incidences']/2,
            color = colors[year],
            opacity=0.5,
            line = dict(width = 0)
        ),
    ) )
print("We are done processing the years.")


# We want the name of the country to show beneath each circle. 
# We will edit the first month (Sept) to add the Country name
cases[0]['text'] = cases_by_year[ cases_by_year['Year'] == 2017 ]['incidences'].map('{:.0f}'.format).astype(str)+' '+\
    cases_by_year[ cases_by_year['Year'] == 2017 ]['Country']
cases[0]['mode'] = 'markers+text'
cases[0]['textposition'] = 'bottom center'

We are processing year 2016.
We are processing year 2017.
We are done processing the years.


In [0]:
inset = [
    go.Choropleth(
        locationmode = 'country names',
        locations = cases_by_year[ cases_by_year['Year'] == 2017 ]['Country'],
        z = cases_by_year[ cases_by_year['Year'] == 2017 ]['incidences'],  # the color/size should be case counts
        text = cases_by_year[ cases_by_year['Year'] == 2017 ]['Country'],
        colorscale = [[0,'rgb(0, 0, 0)'],[1,'rgb(0, 0, 0)']],
        autocolorscale = False,
        showscale = False,
        geo = 'geo2'
    ),
    go.Scattergeo(
        lon = [-14],
        lat = [-52],
        text = ['South America'],
        mode = 'text',  # When you hover, show information held in `cases` text
        showlegend = False,
        geo = 'geo2'
    )
]

In [66]:
configure_plotly_browser_state()

layout = go.Layout(
    title = 'Zika Cases',
    geo = dict(
        resolution = 50,
        scope = 'south america',
        showframe = False,
        showcoastlines = True,
        showland = True,
        landcolor = "rgb(229, 229, 229)",
        countrycolor = "rgb(255, 255, 255)" ,
        coastlinecolor = "rgb(255, 255, 255)",
        projection = dict(
            type = 'mercator'
        ),
        lonaxis = dict( range= [ -90.0, 50 ] ),
        lataxis = dict( range= [ -60, 15 ] ),
        domain = dict(
            x = [ 0, 1 ],
            y = [ 0, 1 ]
        )
    ),
    geo2 = dict(
        scope = 'south america',
        showframe = False,
        showland = True,
        landcolor = "rgb(229, 229, 229)",
        showcountries = False,
        domain = dict(
            x = [ 0.2, 0.8 ],
            y = [ 0.0, 0.5 ]
        ),
        bgcolor = 'rgba(255, 255, 255, 0.0)',
    ),
    legend = dict(
           traceorder = 'reversed'
    )
)

In [67]:
configure_plotly_browser_state()

fig = go.Figure(layout=layout, data=cases+inset)  # give plotly the layout we want and the data
py.iplot(fig, validate=False, filename='West Africa Ebola cases 2014')  # print it out 

scratch

In [0]:
cases_by_year
cases_by_year["raw_incidences"]=cases_by_year["Cases"]/cases_by_year["population"]

In [69]:
trace2016=go.Bar(
    x=cases_by_year[cases_by_year.Year==2016]["Country"],
    y=cases_by_year[cases_by_year.Year==2016]["raw_incidences"],
    name="2016"
)
trace2017=go.Bar(
    x=cases_by_year[cases_by_year.Year==2017]["Country"],
    y=cases_by_year[cases_by_year.Year==2017]["raw_incidences"],
    name="2017"
)
data = [trace2016, trace2017]
layout=go.Layout(
    barmode="stack"
)
fig=go.Figure(data=data, layout=layout)
py.iplot(fig, filename="stacked-bar")

# Log normalization? 

In [70]:
cases_by_year["log incidences"] = cases_by_year["incidences"].apply(lambda x: np.log(x + 1))
cases_by_year

Unnamed: 0,Country,Year,Cases,lat,long,population,incidences,raw_incidences,log incidences
0,Argentina,2016,26.0,-38.4,-63.6,44694198.0,3.055229,5.81731e-07,1.400007
1,Argentina,2017,825.0,-38.4,-63.6,44694198.0,96.944771,1.845877e-05,4.584404
2,Bolivia,2016,343.0,-16.3,-63.6,11306341.0,20.224057,3.033696e-05,3.055135
3,Bolivia,2017,1353.0,-16.3,-63.6,11306341.0,79.775943,0.0001196674,4.391679
4,Brazil,2016,237234.0,-14.2,-51.9,208846892.0,93.201435,0.001135923,4.545435
5,Brazil,2017,17305.0,-14.2,-51.9,208846892.0,6.798565,8.285974e-05,2.05394
6,Colombia,2016,91055.0,4.6,-74.3,48168996.0,98.865364,0.001890324,4.603823
7,Colombia,2017,1045.0,4.6,-74.3,48168996.0,1.134636,2.169445e-05,0.758296
8,Ecuador,2016,1884.0,-1.8,-78.2,16498502.0,49.049727,0.0001141922,3.913017
9,Ecuador,2017,1957.0,-1.8,-78.2,16498502.0,50.950273,0.0001186168,3.950287


In [71]:
colors = {2017: 'rgb(189,215,231)', 2016: 'rgb(238, 105, 234)'}
cases = []  # this will hold all of the year-specific dictionaries. 

for year, group in cases_by_year.groupby(["Year"]):
    print(f"We are processing year {year}.")
    cases.append(go.Scattergeo(
        lon = group['long'], 
        lat = group['lat'],
        text = group['log incidences'],
        name = year,
        marker = dict(
            size = group['log incidences'] * 10,
            color = colors[year],
            opacity=0.5,
            line = dict(width = 0)
        ),
    ) )
print("We are done processing the years.")


# We want the name of the country to show beneath each circle. 
# We will edit the first month (Sept) to add the Country name
cases[0]['text'] = cases_by_year[ cases_by_year['Year'] == 2017 ]['incidences'].map('{:.0f}'.format).astype(str)+' '+\
    cases_by_year[ cases_by_year['Year'] == 2017 ]['Country']
cases[0]['mode'] = 'markers+text'
cases[0]['textposition'] = 'bottom center'

inset = [
    go.Choropleth(
        locationmode = 'country names',
        locations = cases_by_year[ cases_by_year['Year'] == 2017 ]['Country'],
        z = cases_by_year[ cases_by_year['Year'] == 2017 ]['incidences'],  # the color/size should be case counts
        text = cases_by_year[ cases_by_year['Year'] == 2017 ]['Country'],
        colorscale = [[0,'rgb(0, 0, 0)'],[1,'rgb(0, 0, 0)']],
        autocolorscale = False,
        showscale = False,
        geo = 'geo2'
    ),
    go.Scattergeo(
        lon = [-14],
        lat = [-52],
        text = ['South America'],
        mode = 'text',  # When you hover, show information held in `cases` text
        showlegend = False,
        geo = 'geo2'
    )
]

layout = go.Layout(
    title = 'Zika Cases',
    geo = dict(
        resolution = 50,
        scope = 'south america',
        showframe = False,
        showcoastlines = True,
        showland = True,
        landcolor = "rgb(229, 229, 229)",
        countrycolor = "rgb(255, 255, 255)" ,
        coastlinecolor = "rgb(255, 255, 255)",
        projection = dict(
            type = 'Mercator'
        ),
        lonaxis = dict( range= [ -90.0, 50 ] ),
        lataxis = dict( range= [ -60, 15 ] ),
        domain = dict(
            x = [ 0, 1 ],
            y = [ 0, 1 ]
        )
    ),
    geo2 = dict(
        scope = 'south america',
        showframe = False,
        showland = True,
        landcolor = "rgb(229, 229, 229)",
        showcountries = False,
        domain = dict(
            x = [ 0.2, 0.8 ],
            y = [ 0.0, 0.5 ]
        ),
        bgcolor = 'rgba(255, 255, 255, 0.0)',
    ),
    legend = dict(
           traceorder = 'reversed'
    )
)

We are processing year 2016.
We are processing year 2017.
We are done processing the years.


ValueError: ignored

In [0]:
fig = go.Figure(layout=layout, data=cases+inset)  # give plotly the layout we want and the data
py.iplot(fig, validate=False, filename='West Africa Ebola cases 2014')  # print it out 