MultiIndex Series and DataFrame

In [5]:
import numpy as numpy
import pandas as pd

In [6]:
# how to create multiindex object
# 1. pd.MultiIndex.from_tuples()
index = pd.MultiIndex.from_tuples([
    ('USA', 'New York'),
    ('USA', 'California'),
    ('India', 'Delhi'),
    ('India', 'Mumbai')
], names=['Country', 'City'])

df = pd.DataFrame({
    'Population': [8419600, 39538223, 31870000, 20411000],
    'GDP': [1.5, 3.9, 0.3, 0.4]
}, index=index)

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
Country,City,Unnamed: 2_level_1,Unnamed: 3_level_1
USA,New York,8419600,1.5
USA,California,39538223,3.9
India,Delhi,31870000,0.3
India,Mumbai,20411000,0.4


In [21]:
# Accessing Rows in DataFrame Using .loc[]
df.loc['USA']

Unnamed: 0_level_0,Population,GDP
City,Unnamed: 1_level_1,Unnamed: 2_level_1
New York,8419600,1.5
California,39538223,3.9


In [22]:
#2️ Accessing Specific City
df.loc[('USA', 'New York')]

Population    8419600.0
GDP                 1.5
Name: (USA, New York), dtype: float64

In [23]:
df.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
Country,City,Unnamed: 2_level_1,Unnamed: 3_level_1
India,Delhi,31870000,0.3
India,Mumbai,20411000,0.4
USA,California,39538223,3.9
USA,New York,8419600,1.5


In [7]:
# 2. pd.MultiIndex.from_product()
countries = ['USA', 'India']
cities = ['New York', 'Delhi']

index = pd.MultiIndex.from_product([countries, cities], names=['Country', 'City'])
index

df2 = pd.DataFrame({'Population': [8419600, 31870000, 39538223, 20411000]}, index=index)
df2


Unnamed: 0_level_0,Unnamed: 1_level_0,Population
Country,City,Unnamed: 2_level_1
USA,New York,8419600
USA,Delhi,31870000
India,New York,39538223
India,Delhi,20411000


In [8]:
# creating a series with multiindex object
index = pd.MultiIndex.from_tuples([
    ('USA', 'New York'),
    ('USA', 'California'),
    ('India', 'Delhi'),
    ('India', 'Mumbai')
], names=['Country', 'City'])

population = pd.Series([8419600, 39538223, 31870000, 20411000], index=index)

population

Country  City      
USA      New York       8419600
         California    39538223
India    Delhi         31870000
         Mumbai        20411000
dtype: int64

In [12]:
# how to fetch items from such a series
population.loc['India']
population.loc['India']['Delhi'].item()

31870000

In [24]:
# stack()
data = {
    'Math': [90, 85, 80],
    'Science': [95, 88, 92]
}
index = ['Alice', 'Bob', 'Charlie']
df = pd.DataFrame(data, index=index)
df


Unnamed: 0,Math,Science
Alice,90,95
Bob,85,88
Charlie,80,92


In [39]:
# Applying stack()
stacked_df = df.stack()
stacked_df
stacked_df.loc['Bob']

Math       85
Science    88
dtype: int64

In [30]:
# unstack()
index = pd.MultiIndex.from_tuples([
    ('Alice', 'Math'),
    ('Alice', 'Science'),
    ('Bob', 'Math'),
    ('Bob', 'Science'),
    ('Charlie', 'Math'),
    ('Charlie', 'Science')
], names=['Student', 'Subject'])

scores = pd.Series([90, 95, 85, 88, 80, 92], index=index)
scores

Student  Subject
Alice    Math       90
         Science    95
Bob      Math       85
         Science    88
Charlie  Math       80
         Science    92
dtype: int64

In [38]:
# applying unstack()
unstacked_score = scores.unstack()
unstacked_score
unstacked_score['Math']

Student
Alice      90
Bob        85
Charlie    80
Name: Math, dtype: int64