In [89]:
import csv
f = open("world_alcohol.csv", "r")
data = list(csv.reader(f))
data[:5]

[['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'],
 ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
 ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
 ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
 ['1986', 'Americas', 'Colombia', 'Beer', '4.27']]

In [90]:
data[0]

['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value']

Each row specifies how many liters of a type of alcohol each citizen of a country drank in a given year. The first row shows how many liters of wine an average person in Vietnam drank in 1986. We have columns for:
- year
- 'WHO' region (the region in which the country is located)
- country
- beverage types (the type of beverage the data is for - e.g. 'Wine')
- display value (the number of liters, on average, of the beverage type a citizen of the country drank in the given year)

In [91]:
years = [a[0] for a in data]
years_set = set(years)
years_set

{'1984', '1985', '1986', '1987', '1989', 'Year'}

Were looking at alcohol consumption per capita between the years 1986 and 1989.

In [92]:
who_regions = [a[1] for a in data]
who_regions_set = set(who_regions)
who_regions_set

{'Africa',
 'Americas',
 'Eastern Mediterranean',
 'Europe',
 'South-East Asia',
 'WHO region',
 'Western Pacific'}

In 6 different regions.

In [93]:
countries = [a[2] for a in data]
countries_set = set(countries)
countries_set

{'Afghanistan',
 'Albania',
 'Algeria',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Australia',
 'Austria',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia (Plurinational State of)',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo',
 'Costa Rica',
 'Country',
 'Croatia',
 "Cte d'Ivoire",
 'Cuba',
 'Cyprus',
 'Czech Republic',
 "Democratic People's Republic of Korea",
 'Democratic Republic of the Congo',
 'Denmark',
 'Djibouti',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Germany',
 'Ghana',
 'Greece',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 'Hungary',
 'Iceland',
 'India',
 'Indonesia',
 '

In 165 countries.

In [94]:
beverages = [a[3] for a in data]
beverages_set = set(beverages)
beverages_set

{'Beer', 'Beverage Types', 'Other', 'Spirits', 'Wine'}

Looking at 4 different beverage types.

In [95]:
import numpy as np
world_alcohol = np.genfromtxt("world_alcohol.csv", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1986' 'Europe' 'Switzerland' 'Spirits' '2.54']
 ['1987' 'Western Pacific' 'Papua New Guinea' 'Other' '0']
 ['1986' 'Africa' 'Swaziland' 'Other' '5.15']]


My first question is, what is the average alcohol consumption in liters? I can take the average of the 'display value' column, which specifies how many liters of a type of alcohol the average citizen, of a particular country, drank in a given year.

In [96]:
# I first need to deal with some empty values in the alcohol_consumption column by relacing '' with 0, and then converting it to a float (integer)
is_value_empty= world_alcohol[:,4] == ''
world_alcohol[is_value_empty, 4] = 0

In [97]:
# Converting alcohol_consumption column to float type. 
alcohol_comsumption = world_alcohol[:,4]
alcohol_consumption = alcohol_comsumption.astype(float)

In [115]:
average_consumption = np.mean(alcohol_consumption)
average_consumption

1.2001719373656738

In [116]:
median_consumption = np.median(alcohol_consumption)
median_consumption

0.21

In [99]:
total_consumption = alcohol_consumption.sum()
total_consumption

3908.96

In [100]:
min_consumption = alcohol_consumption.min()
max_consumption = alcohol_consumption.max()
std_consumption = alcohol_consumption.std()

In [101]:
min_consumption

0.0

In [102]:
max_consumption

13.5

In [103]:
std_consumption

1.923231141558637

How much did the USA drink in 1986?

In [104]:
is_usa_1986 = (world_alcohol[:,0] == '1986') & (world_alcohol[:,2] == 'United States of America')
usa_1986 = world_alcohol[is_usa_1986, :]
usa_alcohol = usa_1986[:,4]
missing_string = usa_alcohol == ''
usa_alcohol[missing_string] = '0'
usa_alcohol = usa_alcohol.astype(float)
total_usa_drinking = usa_alcohol.sum()
total_usa_drinking

9.73

In 1986, the average citizen of the United States of America drank 9.73 total liters of alcohol. 

Now I want to calculate the total consumption of each of the countries in a given year - starting with 1989. 

In [105]:
# totals for each country in 1989
totals_1989 = {}
is_year = world_alcohol[:,0] == "1989"
year = world_alcohol[is_year,:]

for country in countries:
    is_country = year[:,2] == country
    country_consumption = year[is_country,:]
    alcohol_column = country_consumption[:,4]
    is_empty = alcohol_column == ''
    alcohol_column[is_empty] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals_1989[country] = alcohol_column.sum()
totals_1989

{'Afghanistan': 0.0,
 'Albania': 1.73,
 'Algeria': 0.4,
 'Angola': 2.28,
 'Antigua and Barbuda': 4.69,
 'Argentina': 10.82,
 'Australia': 12.09,
 'Austria': 13.9,
 'Bahamas': 12.290000000000001,
 'Bahrain': 4.89,
 'Bangladesh': 0.0,
 'Belarus': 7.9799999999999995,
 'Belgium': 11.61,
 'Belize': 0.0,
 'Benin': 1.3800000000000001,
 'Bhutan': 0.31,
 'Bolivia (Plurinational State of)': 3.05,
 'Botswana': 4.63,
 'Brazil': 5.52,
 'Brunei Darussalam': 2.7800000000000002,
 'Bulgaria': 11.43,
 'Burkina Faso': 3.99,
 'Burundi': 8.159999999999998,
 'Cabo Verde': 2.79,
 'Cambodia': 0.33,
 'Cameroon': 6.359999999999999,
 'Canada': 9.0,
 'Central African Republic': 2.5199999999999996,
 'Chad': 0.30000000000000004,
 'Chile': 8.649999999999999,
 'China': 3.33,
 'Colombia': 6.960000000000001,
 'Comoros': 0.10999999999999999,
 'Congo': 3.1500000000000004,
 'Costa Rica': 5.3999999999999995,
 'Country': 0.0,
 'Croatia': 10.92,
 "Cte d'Ivoire": 2.2,
 'Cuba': 4.37,
 'Cyprus': 9.29,
 'Czech Republic': 13.0099

In [106]:
# totals for each country in 1988
totals_1988 = {}
is_year = world_alcohol[:,0] == "1988"
year = world_alcohol[is_year,:]

for country in countries:
    is_country = year[:,2] == country
    country_consumption = year[is_country,:]
    alcohol_column = country_consumption[:,4]
    is_empty = alcohol_column == ''
    alcohol_column[is_empty] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals_1988[country] = alcohol_column.sum()

In [107]:
totals_1988

{'Afghanistan': 0.0,
 'Albania': 0.0,
 'Algeria': 0.0,
 'Angola': 0.0,
 'Antigua and Barbuda': 0.0,
 'Argentina': 0.0,
 'Australia': 0.0,
 'Austria': 0.0,
 'Bahamas': 0.0,
 'Bahrain': 0.0,
 'Bangladesh': 0.0,
 'Belarus': 0.0,
 'Belgium': 0.0,
 'Belize': 0.0,
 'Benin': 0.0,
 'Bhutan': 0.0,
 'Bolivia (Plurinational State of)': 0.0,
 'Botswana': 0.0,
 'Brazil': 0.0,
 'Brunei Darussalam': 0.0,
 'Bulgaria': 0.0,
 'Burkina Faso': 0.0,
 'Burundi': 0.0,
 'Cabo Verde': 0.0,
 'Cambodia': 0.0,
 'Cameroon': 0.0,
 'Canada': 0.0,
 'Central African Republic': 0.0,
 'Chad': 0.0,
 'Chile': 0.0,
 'China': 0.0,
 'Colombia': 0.0,
 'Comoros': 0.0,
 'Congo': 0.0,
 'Costa Rica': 0.0,
 'Country': 0.0,
 'Croatia': 0.0,
 "Cte d'Ivoire": 0.0,
 'Cuba': 0.0,
 'Cyprus': 0.0,
 'Czech Republic': 0.0,
 "Democratic People's Republic of Korea": 0.0,
 'Democratic Republic of the Congo': 0.0,
 'Denmark': 0.0,
 'Djibouti': 0.0,
 'Dominican Republic': 0.0,
 'Ecuador': 0.0,
 'Egypt': 0.0,
 'El Salvador': 0.0,
 'Equatorial Gu

In [108]:
# totals for each country in 1987
totals_1987 = {}
is_year = world_alcohol[:,0] == "1987"
year = world_alcohol[is_year,:]

for country in countries:
    is_country = year[:,2] == country
    country_consumption = year[is_country,:]
    alcohol_column = country_consumption[:,4]
    is_empty = alcohol_column == ''
    alcohol_column[is_empty] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals_1987[country] = alcohol_column.sum()
totals_1987

{'Afghanistan': 0.0,
 'Albania': 1.63,
 'Algeria': 0.28,
 'Angola': 1.6300000000000001,
 'Antigua and Barbuda': 3.68,
 'Argentina': 12.759999999999998,
 'Australia': 12.46,
 'Austria': 13.5,
 'Bahamas': 14.139999999999999,
 'Bahrain': 5.5600000000000005,
 'Bangladesh': 0.0,
 'Belarus': 0.0,
 'Belgium': 13.16,
 'Belize': 4.6499999999999995,
 'Benin': 1.54,
 'Bhutan': 0.46,
 'Bolivia (Plurinational State of)': 3.06,
 'Botswana': 4.26,
 'Brazil': 3.94,
 'Brunei Darussalam': 2.8899999999999997,
 'Bulgaria': 10.91,
 'Burkina Faso': 4.069999999999999,
 'Burundi': 8.22,
 'Cabo Verde': 3.36,
 'Cambodia': 0.22,
 'Cameroon': 6.500000000000001,
 'Canada': 9.120000000000001,
 'Central African Republic': 2.7800000000000002,
 'Chad': 0.30000000000000004,
 'Chile': 9.49,
 'China': 3.1599999999999997,
 'Colombia': 6.819999999999999,
 'Comoros': 0.08,
 'Congo': 4.76,
 'Costa Rica': 4.2299999999999995,
 'Country': 0.0,
 'Croatia': 13.51,
 "Cte d'Ivoire": 2.49,
 'Cuba': 4.37,
 'Cyprus': 8.870000000000001

In [109]:
# totals for each country in 1986
totals_1986 = {}
is_year = world_alcohol[:,0] == "1986"
year = world_alcohol[is_year,:]

for country in countries:
    is_country = year[:,2] == country
    country_consumption = year[is_country,:]
    alcohol_column = country_consumption[:,4]
    is_empty = alcohol_column == ''
    alcohol_column[is_empty] = "0"
    alcohol_column = alcohol_column.astype(float)
    totals_1986[country] = alcohol_column.sum()
totals_1986

{'Afghanistan': 0.0,
 'Albania': 1.38,
 'Algeria': 0.29,
 'Angola': 1.7,
 'Antigua and Barbuda': 3.55,
 'Argentina': 12.89,
 'Australia': 12.3,
 'Austria': 13.6,
 'Bahamas': 14.120000000000001,
 'Bahrain': 5.11,
 'Bangladesh': 0.0,
 'Belarus': 0.0,
 'Belgium': 12.72,
 'Belize': 5.73,
 'Benin': 1.26,
 'Bhutan': 0.46,
 'Bolivia (Plurinational State of)': 3.29,
 'Botswana': 4.4799999999999995,
 'Brazil': 4.13,
 'Brunei Darussalam': 2.5300000000000002,
 'Bulgaria': 11.47,
 'Burkina Faso': 3.71,
 'Burundi': 8.35,
 'Cabo Verde': 2.92,
 'Cambodia': 0.22,
 'Cameroon': 6.5,
 'Canada': 9.31,
 'Central African Republic': 2.94,
 'Chad': 0.32,
 'Chile': 8.93,
 'China': 2.38,
 'Colombia': 6.609999999999999,
 'Comoros': 0.16,
 'Congo': 5.4,
 'Costa Rica': 4.859999999999999,
 'Country': 0.0,
 'Croatia': 0.0,
 "Cte d'Ivoire": 2.8000000000000003,
 'Cuba': 4.04,
 'Cyprus': 8.32,
 'Czech Republic': 13.01,
 "Democratic People's Republic of Korea": 3.5100000000000002,
 'Democratic Republic of the Congo': 2.

In [110]:
totals_1987['United States of America']

9.61

In [111]:
totals_1989['United States of America']

9.16

How about the country with the greatest alcohol consumption in liters?

In [112]:
highest_value = 0
highest_key = None
for country in totals_1989:
    consumption = totals_1989[country]
    if consumption > highest_value:
        highest_value = consumption
        highest_key = country
highest_key

'Hungary'

In [113]:
highest_value

16.29

In the year 1989, the average hungarian drank 16.29 liters of alcohol.