In [1]:
import pandas as pd

In [2]:
drinks_df = pd.read_csv('../data/drinks-by-country.csv')
drinks_df.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa


# Pandas Index usage
**Identification, Selection, Alignment**

In [3]:
drinks_df.index

RangeIndex(start=0, stop=193, step=1)

In [4]:
drinks_df.columns

Index(['country', 'beer_servings', 'spirit_servings', 'wine_servings',
       'total_litres_of_pure_alcohol', 'continent'],
      dtype='object')

## Selection

Works on anything that returns a DataFrame or Series

In [5]:
drinks_df.loc[52]

country                               Ecuador
beer_servings                             162
spirit_servings                            74
wine_servings                               3
total_litres_of_pure_alcohol              4.2
continent                       South America
Name: 52, dtype: object

In [6]:
drinks_df.loc[52, 'country']

'Ecuador'

## Identification

In [7]:
drinks_df.set_index('country', inplace = True)
drinks_df.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [8]:
drinks_df.index

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua & Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'Tanzania', 'USA', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela',
       'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=193)

In [9]:
drinks_df.loc['Ecuador']

beer_servings                             162
spirit_servings                            74
wine_servings                               3
total_litres_of_pure_alcohol              4.2
continent                       South America
Name: Ecuador, dtype: object

In [10]:
drinks_df.index.name = None    # Helps remove the index name, i.e. country in this case
drinks_df.head(3)

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa


## Alignment
Series aligns itself based on the index while concatenating

In [11]:
popule = pd.Series([3000000, 85000], index = ['Albania', 'Andorra'], name = 'population')
popule

Albania    3000000
Andorra      85000
Name: population, dtype: int64

In [12]:
pd.concat([drinks_df, popule], axis = 1).head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent,population
Afghanistan,0,0,0,0.0,Asia,
Albania,89,132,54,4.9,Europe,3000000.0
Algeria,25,0,14,0.7,Africa,
Andorra,245,138,312,12.4,Europe,85000.0
Angola,217,57,45,5.9,Africa,


# Restoring Index

In [13]:
drinks_df.index.name = 'country'    # Need to restore the index name, before separating it into a column
drinks_df.reset_index(inplace = True)
drinks_df.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
