## Day 17: What do I need to know about the pandas index? (Part 2)
https://courses.dataschool.io/view/courses/pandas-in-30-days/2341096-course-videos/7555293-day-18-what-do-i-need-to-know-about-the-pandas-index-part-2

In [1]:
import pandas as pd

In [2]:
drinks = pd.read_csv('./data/drinks.csv')

In [3]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [4]:
drinks.continent.head()

0      Asia
1    Europe
2    Africa
3    Europe
4    Africa
Name: continent, dtype: object

In [5]:
drinks.set_index('country', inplace=True)
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [6]:
drinks.continent.head()

country
Afghanistan      Asia
Albania        Europe
Algeria        Africa
Andorra        Europe
Angola         Africa
Name: continent, dtype: object

In [7]:
drinks.continent.value_counts()

continent
Africa           53
Europe           45
Asia             44
North America    23
Oceania          16
South America    12
Name: count, dtype: int64

In [8]:
drinks.continent.value_counts().index

Index(['Africa', 'Europe', 'Asia', 'North America', 'Oceania',
       'South America'],
      dtype='object', name='continent')

In [19]:
drinks.continent.value_counts()['South America']

12

In [20]:
drinks.continent.value_counts()['Africa']

53

In [21]:
drinks.continent.value_counts().sort_values()

continent
South America    12
Oceania          16
North America    23
Asia             44
Europe           45
Africa           53
Name: count, dtype: int64

In [22]:
drinks.continent.value_counts().index.sort_values()

Index(['Africa', 'Asia', 'Europe', 'North America', 'Oceania',
       'South America'],
      dtype='object', name='continent')

In [23]:
drinks.continent.value_counts().sort_index()

continent
Africa           53
Asia             44
Europe           45
North America    23
Oceania          16
South America    12
Name: count, dtype: int64

In [27]:
# Alignment 
people = pd.Series([3000000, 85000], index=['Albania', 'Andorra'], name='population')
people

Albania    3000000
Andorra      85000
Name: population, dtype: int64

In [28]:
# Combine people with drinks
# Total beer servings in those countries
drinks.beer_servings

country
Afghanistan      0
Albania         89
Algeria         25
Andorra        245
Angola         217
              ... 
Venezuela      333
Vietnam        111
Yemen            6
Zambia          32
Zimbabwe        64
Name: beer_servings, Length: 193, dtype: int64

In [30]:
# This looks at the indices of both drinks and people and multiplies whenever it finds a match
drinks.beer_servings * people

Afghanistan            NaN
Albania        267000000.0
Algeria                NaN
Andorra         20825000.0
Angola                 NaN
                  ...     
Venezuela              NaN
Vietnam                NaN
Yemen                  NaN
Zambia                 NaN
Zimbabwe               NaN
Length: 193, dtype: float64

In [31]:
pd.concat([drinks, people], axis=1).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,population
Afghanistan,0,0,0,0.0,Asia,
Albania,89,132,54,4.9,Europe,3000000.0
Algeria,25,0,14,0.7,Africa,
Andorra,245,138,312,12.4,Europe,85000.0
Angola,217,57,45,5.9,Africa,


In [15]:
drinks.shape

(193, 5)

In [16]:
drinks.index.name = None

In [17]:
drinks.head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [18]:
# Move index back into a column, and recover the original index
drinks.index.name = 'country'
drinks.reset_index(inplace=True)
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [19]:
# Bonus
drinks.describe()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
count,193.0,193.0,193.0,193.0
mean,106.160622,80.994819,49.450777,4.717098
std,101.143103,88.284312,79.697598,3.773298
min,0.0,0.0,0.0,0.0
25%,20.0,4.0,1.0,1.3
50%,76.0,56.0,8.0,4.2
75%,188.0,128.0,59.0,7.2
max,376.0,438.0,370.0,14.4


In [21]:
drinks.describe().index

Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'], dtype='object')

In [23]:
drinks.describe().columns

Index(['beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol'],
      dtype='object')

In [24]:
drinks.describe().loc['25%',:]

beer_servings                   20.0
spirit_servings                  4.0
wine_servings                    1.0
total_litres_of_pure_alcohol     1.3
Name: 25%, dtype: float64

In [25]:
drinks.describe().loc['25%','beer_servings']

20.0