# What do I need to know about the pandas index? (Part 2)

In [1]:
import pandas as pd

In [3]:
url = 'http://bit.ly/drinksbycountry'
drinks = pd.read_csv(url)

In [4]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [8]:
# A Series also have an index that comes from the DatFrame
drinks['continent'].head()

0      Asia
1    Europe
2    Africa
3    Europe
4    Africa
Name: continent, dtype: object

In [11]:
# Setting the country column as index
drinks.set_index('country', inplace=True) # making changes in place
drinks.head()

KeyError: 'country'

In [14]:
# We can see the index has changed
drinks.continent.value_counts()

Africa           53
Europe           45
Asia             44
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

In [16]:
# Accessing a value by its index
drinks.continent.value_counts().loc['Europe']

45

In [18]:
# We can sort values as well as indexes
print(drinks.continent.value_counts().sort_values()) # Sorting the values
drinks.continent.value_counts().sort_index() # Sorting the index

South America    12
Oceania          16
North America    23
Asia             44
Europe           45
Africa           53
Name: continent, dtype: int64


Africa           53
Asia             44
Europe           45
North America    23
Oceania          16
South America    12
Name: continent, dtype: int64

### The index is useful for aligment

In [23]:
# Creating a new Series for demostration
people = pd.Series([3000000, 85000], index=['Albania', 'Andorra'], name='population')
people

Albania    3000000
Andorra      85000
Name: population, dtype: int64

In [28]:
# Now we are going to multiply the values from beer_servings with the population numbers.
# Thanks to the index pandas will be able to align the values that have a shared index and compute the multiplication.
drinks.beer_servings.head() * people

Afghanistan            NaN
Albania        267000000.0
Algeria                NaN
Andorra         20825000.0
Angola                 NaN
dtype: float64

In [31]:
# We can add a Series to a DataFrame and it will align the values acording to the shared index
pd.concat([drinks, people], axis=1, sort=True).head() # concatenating drinks and people along the column axis
# We explicitly pass a sort= value to silence the warning it gives about a change of behaviour in a future pandas version 

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,population
Afghanistan,0,0,0,0.0,Asia,
Albania,89,132,54,4.9,Europe,3000000.0
Algeria,25,0,14,0.7,Africa,
Andorra,245,138,312,12.4,Europe,85000.0
Angola,217,57,45,5.9,Africa,
