In [1]:
import pandas as pd
import numpy as np

In [2]:
alcohol = pd.read_csv('drinks.csv', usecols=['country', 'wine_servings'],
                       index_col='country')

In [3]:
alcohol = alcohol.squeeze()

Series Iteration

In [4]:
mini_alc = alcohol[:10]

In [5]:
mini_alc

country
Afghanistan            NaN
Albania               54.0
Algeria               14.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [6]:
for i in mini_alc:
    print(i)

nan
54.0
14.0
312.0
45.0
45.0
221.0
11.0
212.0
191.0


In [7]:
for i in mini_alc.index:
    print(i)

Afghanistan
Albania
Algeria
Andorra
Angola
Antigua & Barbuda
Argentina
Armenia
Australia
Austria


In [8]:
for i in mini_alc.index:
    print(i, mini_alc[i])

Afghanistan nan
Albania 54.0
Algeria 14.0
Andorra 312.0
Angola 45.0
Antigua & Barbuda 45.0
Argentina 221.0
Armenia 11.0
Australia 212.0
Austria 191.0


In [9]:
for i in mini_alc.items():
    print(i)

('Afghanistan', nan)
('Albania', 54.0)
('Algeria', 14.0)
('Andorra', 312.0)
('Angola', 45.0)
('Antigua & Barbuda', 45.0)
('Argentina', 221.0)
('Armenia', 11.0)
('Australia', 212.0)
('Austria', 191.0)


Filtering : filter(), where(), mask()

In [10]:
alcohol

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            NaN
Zambia           4.0
Zimbabwe         4.0
Name: wine_servings, Length: 193, dtype: float64

In [12]:
alcohol.filter(regex='^V') # this is used to filter by index only

# Regex Stands for Regular Expressions

country
Vanuatu      11.0
Venezuela     3.0
Vietnam       1.0
Name: wine_servings, dtype: float64

In [13]:
alcohol.filter(like='stan') # this is used to filter by index only

country
Afghanistan      NaN
Kazakhstan      12.0
Kyrgyzstan       6.0
Pakistan         NaN
Tajikistan       NaN
Turkmenistan    32.0
Uzbekistan       8.0
Name: wine_servings, dtype: float64

In [14]:
alcohol[alcohol > 200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [15]:
alcohol.loc[alcohol > 200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [16]:
def gt200(x):
    return x > 200

In [17]:
alcohol[gt200]

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

# Where Method
The where Method is not truly meant for filtering. This is a Method that helps us efficiently replace Values where a given condition is False.

In [19]:
alcohol.where(lambda x: x > 200, other='too small').head(20)

country
Afghanistan          too small
Albania              too small
Algeria              too small
Andorra                  312.0
Angola               too small
Antigua & Barbuda    too small
Argentina                221.0
Armenia              too small
Australia                212.0
Austria              too small
Azerbaijan           too small
Bahamas              too small
Bahrain              too small
Bangladesh           too small
Barbados             too small
Belarus              too small
Belgium                  212.0
Belize               too small
Benin                too small
Bhutan               too small
Name: wine_servings, dtype: object

In [20]:
alcohol.where(lambda x: x > 200).head(20)

country
Afghanistan            NaN
Albania                NaN
Algeria                NaN
Andorra              312.0
Angola                 NaN
Antigua & Barbuda      NaN
Argentina            221.0
Armenia                NaN
Australia            212.0
Austria                NaN
Azerbaijan             NaN
Bahamas                NaN
Bahrain                NaN
Bangladesh             NaN
Barbados               NaN
Belarus                NaN
Belgium              212.0
Belize                 NaN
Benin                  NaN
Bhutan                 NaN
Name: wine_servings, dtype: float64

In [22]:
alcohol.where(lambda x: x > 200).dropna()

country
Andorra              312.0
Argentina            221.0
Australia            212.0
Belgium              212.0
Croatia              254.0
Denmark              278.0
Equatorial Guinea    233.0
Greece               218.0
Italy                237.0
Luxembourg           271.0
Portugal             339.0
Slovenia             276.0
Name: wine_servings, dtype: float64

In [24]:
alcohol.where(lambda x: x < 200).head(20)

country
Afghanistan            NaN
Albania               54.0
Algeria               14.0
Andorra                NaN
Angola                45.0
Antigua & Barbuda     45.0
Argentina              NaN
Armenia               11.0
Australia              NaN
Austria              191.0
Azerbaijan             5.0
Bahamas               51.0
Bahrain                7.0
Bangladesh             NaN
Barbados              36.0
Belarus               42.0
Belgium                NaN
Belize                 8.0
Benin                 13.0
Bhutan                 NaN
Name: wine_servings, dtype: float64

In [25]:
alcohol.where(lambda x: x < 200).dropna()

country
Albania              54.0
Algeria              14.0
Angola               45.0
Antigua & Barbuda    45.0
Armenia              11.0
                     ... 
Vanuatu              11.0
Venezuela             3.0
Vietnam               1.0
Zambia                4.0
Zimbabwe              4.0
Name: wine_servings, Length: 150, dtype: float64

# The mask Method
'where' replaces where the condition is False whereas the mask replaces where the condition is True

In [26]:
alcohol.mask(lambda x: x >200)

country
Afghanistan     NaN
Albania        54.0
Algeria        14.0
Andorra         NaN
Angola         45.0
               ... 
Venezuela       3.0
Vietnam         1.0
Yemen           NaN
Zambia          4.0
Zimbabwe        4.0
Name: wine_servings, Length: 193, dtype: float64

In [27]:
alcohol.mask(lambda x: x >200).dropna()

country
Albania              54.0
Algeria              14.0
Angola               45.0
Antigua & Barbuda    45.0
Armenia              11.0
                     ... 
Vanuatu              11.0
Venezuela             3.0
Vietnam               1.0
Zambia                4.0
Zimbabwe              4.0
Name: wine_servings, Length: 150, dtype: float64

Transforming With update(), apply() and map()

In [28]:
alcohol.head(10)

country
Afghanistan            NaN
Albania               54.0
Algeria               14.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [None]:
# spot Vs Global Transforms

In [29]:
alcohol.loc['Algeria'] = 19

In [30]:
alcohol.head(10)

country
Afghanistan            NaN
Albania               54.0
Algeria               19.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [31]:
alcohol.loc['Albania'] = 190
alcohol.loc['Afghanistan'] = 20
alcohol.loc['Andorra'] = 29

In [33]:
alcohol.head(10)

country
Afghanistan           20.0
Albania              190.0
Algeria               19.0
Andorra               29.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [34]:
alcohol.update(pd.Series(data=[200, 20], index=['Albania', 'Algeria']))

In [36]:
alcohol.head(10)

country
Afghanistan           20.0
Albania              200.0
Algeria               20.0
Andorra               29.0
Angola                45.0
Antigua & Barbuda     45.0
Argentina            221.0
Armenia               11.0
Australia            212.0
Austria              191.0
Name: wine_servings, dtype: float64

In [None]:
# apply Method()

In [37]:
alcohol.apply(lambda x: x**2)

country
Afghanistan      400.0
Albania        40000.0
Algeria          400.0
Andorra          841.0
Angola          2025.0
                ...   
Venezuela          9.0
Vietnam            1.0
Yemen              NaN
Zambia            16.0
Zimbabwe          16.0
Name: wine_servings, Length: 193, dtype: float64

In [38]:
alcohol.apply(np.square)

country
Afghanistan      400.0
Albania        40000.0
Algeria          400.0
Andorra          841.0
Angola          2025.0
                ...   
Venezuela          9.0
Vietnam            1.0
Yemen              NaN
Zambia            16.0
Zimbabwe          16.0
Name: wine_servings, Length: 193, dtype: float64

In [39]:
def multiply_by_self(x):
    return x**2

In [40]:
alcohol.apply(multiply_by_self)

country
Afghanistan      400.0
Albania        40000.0
Algeria          400.0
Andorra          841.0
Angola          2025.0
                ...   
Venezuela          9.0
Vietnam            1.0
Yemen              NaN
Zambia            16.0
Zimbabwe          16.0
Name: wine_servings, Length: 193, dtype: float64

In [41]:
alcohol[multiply_by_self]

KeyError: "None of [Index([  400.0, 40000.0,   400.0,   841.0,  2025.0,  2025.0, 48841.0,   121.0,\n       44944.0, 36481.0,\n       ...\n           1.0,  7056.0,   484.0,    64.0,   121.0,     9.0,     1.0,     nan,\n          16.0,    16.0],\n      dtype='float64', name='country', length=193)] are in the [index]"

In [42]:
def multiply_by_self_with_min(x, min_servings):
    if x < min_servings:
        return x**2
    
    return x

In [43]:
alcohol.apply(multiply_by_self_with_min, min_servings= 200)

country
Afghanistan     400.0
Albania         200.0
Algeria         400.0
Andorra         841.0
Angola         2025.0
                ...  
Venezuela         9.0
Vietnam           1.0
Yemen             NaN
Zambia           16.0
Zimbabwe         16.0
Name: wine_servings, Length: 193, dtype: float64

In [None]:
# the map() Method, its a substitution Method

In [44]:
alcohol.map(lambda x: x**2)

country
Afghanistan      400.0
Albania        40000.0
Algeria          400.0
Andorra          841.0
Angola          2025.0
                ...   
Venezuela          9.0
Vietnam            1.0
Yemen              NaN
Zambia            16.0
Zimbabwe          16.0
Name: wine_servings, Length: 193, dtype: float64

In [45]:
alcohol.map(np.square)

country
Afghanistan      400.0
Albania        40000.0
Algeria          400.0
Andorra          841.0
Angola          2025.0
                ...   
Venezuela          9.0
Vietnam            1.0
Yemen              NaN
Zambia            16.0
Zimbabwe          16.0
Name: wine_servings, Length: 193, dtype: float64

In [47]:
alcohol.map(multiply_by_self_with_min, arg=(200,))

TypeError: Series.map() got multiple values for argument 'arg'

In [None]:
Recap:
update(): updates series values inplace using another series.
series.update(other_series)

apply(): applies functions(or ufuncs) on each series 
series.apply(np.square)

map(): substitutes series values with others from a function, series or dict
series.map({'old_value': 'new_value'})

Q1. Read the drink.csv dataset again, this time time bringing in the beer_servings sequence into a new series with country again acting 
as the index

Q2. Calculate the mean, median and standard deviation of beer servings in the beers. Is the distribution right or left skewed?

Q3. Slice the first 10 countries from beers. Are these relatively large or small relative to the rest of the sample?
Bonus : To answer that, we could compare each value to the mean or median. An even better approach would be to calculate standard
scores or z-scores as they are known.

Q4. Create a new series that uses series arithmatics to calculate the z-score for each serving by subtracting from each the mean and
dividing the difference by the standard deviation. Point the variable z_scores to this series.

Which country has the largest absolute z-score? Is this a positive or negative deviation?
