# Alcohol consumption choropleth

In [1]:
import plotly.plotly as py
import json
import urllib2
import re
from bs4 import BeautifulSoup
from IPython.display import IFrame

Get the data from the Wikipedia page on the subject.

In [2]:
page_url = 'https://en.wikipedia.org/wiki/List_of_countries_by_alcohol_consumption_per_capita'

IFrame(page_url, 950, 500)

### Scrap the table

In [3]:
soup = BeautifulSoup(urllib2.urlopen(page_url))

In [4]:
countries = []
values = []

# N.B. the data of interest is the second table of the page,
#   countries are in the first column
#   values of interest are the the second column

for row in soup.findAll('table')[1].findAll('tr'):
    tds = row.findAll('td')
    if len(tds):
        countries.append(tds[0].findAll('a')[0].contents[0])
        values.append(tds[1].contents[0])

### Convert country name to ISO-3

Plotly uses [ISO-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) identifier codes to draw choropleths.

The JSON below will help us convert the country names found in the Wikipedia table to ISO-3 codes.

In [5]:
get_iso3 = json.load(urllib2.urlopen('https://raw.githubusercontent.com/etpinard/country-iso3/master/get-iso3.json'))

get_iso3

[{u'iso3': u'AFG', u'regex': u'afghan'},
 {u'iso3': u'ALA', u'regex': u'\\b(a|\xe5)land'},
 {u'iso3': u'ALB', u'regex': u'albania'},
 {u'iso3': u'DZA', u'regex': u'algeria'},
 {u'iso3': u'ASM', u'regex': u'^(?=.*americ).*samoa'},
 {u'iso3': u'AND', u'regex': u'andorra'},
 {u'iso3': u'AGO', u'regex': u'angola'},
 {u'iso3': u'AIA', u'regex': u'anguill?a'},
 {u'iso3': u'ATA', u'regex': u'antarctica'},
 {u'iso3': u'ATG', u'regex': u'antigua'},
 {u'iso3': u'ARG', u'regex': u'argentin'},
 {u'iso3': u'ARM', u'regex': u'armenia'},
 {u'iso3': u'ABW', u'regex': u'^(?!.*bonaire).*\\baruba'},
 {u'iso3': u'AUS', u'regex': u'australia'},
 {u'iso3': u'AUT', u'regex': u'^(?!.*hungary).*austria|\\baust.*\\bemp'},
 {u'iso3': u'AZE', u'regex': u'azerbaijan'},
 {u'iso3': u'BHS', u'regex': u'bahamas'},
 {u'iso3': u'BHR', u'regex': u'bahrain'},
 {u'iso3': u'BGD', u'regex': u'bangladesh|^(?=.*east).*paki?stan'},
 {u'iso3': u'BRB', u'regex': u'barbados'},
 {u'iso3': u'BLR', u'regex': u'belarus|byelo'},
 {u'is

Loop through all the countries in the dataset. For each country, try each of the regular expressions of the above JSON. When a match is found, append the data lists.

In [6]:
locations = []
z = []

# keep track of country names that do not match to a regex
countries_no_match = []

for country, value in zip(countries, values):
    for item in get_iso3:
        # N.B. an empty list in python is falsy
        if re.findall(item['regex'], country.lower()):
            locations.append(item['iso3'])
            z.append(value)
            break
    else:
        print(country.lower())
        losers.append(country)

In [7]:
len(locations), len(countries), len(countries_no_match)

# all countries have found a match!

(191, 191, 0)

### Make a choropleth

In [13]:
py.iplot(
    dict(
        data=[
            dict(
                type='choropleth',
                locations=locations,
                z=z,

                zmin=0,
                zmax=18,
                colorbar=dict(
                    tickfont=dict(
                        size=14,
                    ),
                    x=-0.1,
                    len=1.1,
                    tick0=0,
                    dtick=2,
                    thickness=10,
                    ticksuffix=' L per capita per year',
                    showticksuffix='last',
                    ticks='outside',
                    ticklen=5
                )
            )
        ],
        layout=dict(
            title='Pure alcohol consumption among adults (age 15+) in 2010',
            titlefont=dict(
                size=24
            ),
            geo=dict(
                projection=dict(
                    type='robinson'
                )
            ),
            autosize=False,
            width=800,
            height=580
        )
    ),
    validate=False,
    filename='alcohol-consumption-per-capita'
)

See the full-screen version at: [plot.ly/~etpinard/4666.embed](https://plot.ly/~etpinard/4666.embed)

In [9]:
# Inject CSS styling in the NB
from IPython.display import display, HTML
display(HTML(open('../_custom.css').read()))