## Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

import utils_07 as utils

%load_ext autoreload
%autoreload 2

## 01 Intro to the MultiIndex Module

In [6]:
b = utils.BIGMAC()
b.bigmac.round(2).head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2000-04-01,Argentina,2.5
1,2000-04-01,Australia,1.54
2,2000-04-01,Brazil,1.65
3,2000-04-01,Canada,1.94
4,2000-04-01,Switzerland,3.47


In [7]:
b.bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [8]:
b.bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386 entries, 0 to 1385
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 1386 non-null   datetime64[ns]
 1   Country              1386 non-null   object        
 2   Price in US Dollars  1386 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 32.6+ KB


## 02 Create a MultiIndex

In [10]:
index_col = ['Date', 'Country']
b = utils.BIGMAC(index_col=index_col)
b.bigmac.round(2).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.54
2000-04-01,Brazil,1.65
2000-04-01,Canada,1.94
2000-04-01,Switzerland,3.47


In [12]:
b.bigmac.shape

(1386, 1)

In [17]:
# Get index values as list of tuples
b.bigmac.index[:5], len(b.bigmac.index)

(MultiIndex([('2000-04-01',   'Argentina'),
             ('2000-04-01',   'Australia'),
             ('2000-04-01',      'Brazil'),
             ('2000-04-01',      'Canada'),
             ('2000-04-01', 'Switzerland')],
            names=['Date', 'Country']),
 1386)

In [16]:
# Get index values for a specific level
b.bigmac.index.get_level_values('Country')[:5], len(b.bigmac.index.get_level_values('Country'))

(Index(['Argentina', 'Australia', 'Brazil', 'Canada', 'Switzerland'], dtype='object', name='Country'),
 1386)

## 03 Extract Rows from a MultiIndex DataFrame

In [22]:
index_col = ['Date', 'Country']
b = utils.BIGMAC(index_col=index_col, round=True)
b.bigmac.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2000-04-01,Argentina,2.5
2000-04-01,Australia,1.54
2000-04-01,Brazil,1.65
2000-04-01,Canada,1.94
2000-04-01,Switzerland,3.47


In [None]:
# Extract rows for a date '2000-04-01' with loc
b.bigmac.loc['2000-04-01'].head()

Unnamed: 0_level_0,Price in US Dollars
Country,Unnamed: 1_level_1
Argentina,2.5
Australia,1.54
Brazil,1.65
Canada,1.94
Switzerland,3.47


In [26]:
# Extract rows for a date '2000-04-01' and country 'Canada' with loc
b.bigmac.loc[('2000-04-01', 'Canada')]

Price in US Dollars    1.94
Name: (2000-04-01 00:00:00, Canada), dtype: float64

## 04 The `stack` and `unstack` methods

In [29]:
# Read data/worldstats.csv with a MultiIndex
index_col = ['year', 'country']
world = pd.read_csv('data/worldstats.csv', index_col=index_col).sort_index()
world.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,8994793.0,537777800.0
1960,Algeria,11124892.0,2723638000.0
1960,Australia,10276477.0,18567590000.0
1960,Austria,7047539.0,6592694000.0
1960,"Bahamas, The",109526.0,169802300.0


In [30]:
world.stack().head()

year  country                
1960  Afghanistan  Population    8.994793e+06
                   GDP           5.377778e+08
      Algeria      Population    1.112489e+07
                   GDP           2.723638e+09
      Australia    Population    1.027648e+07
dtype: float64

In [31]:
world.stack().unstack().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,Afghanistan,8994793.0,537777800.0
1960,Algeria,11124892.0,2723638000.0
1960,Australia,10276477.0,18567590000.0
1960,Austria,7047539.0,6592694000.0
1960,"Bahamas, The",109526.0,169802300.0


## 05 Coding Challenge

### 01 Data Preparation

In [37]:
i = utils.INVESTMENTS()
i.investments.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Name,Market
Status,Funding Rounds,State,Unnamed: 3_level_1,Unnamed: 4_level_1
Acquired,1,AB,Hallpass Media,Games
Acquired,1,AL,EnteGreat,Enterprise Software
Acquired,1,AL,Onward Behavioral Health,Biotechnology
Acquired,1,AL,Proxsys,Biotechnology
Acquired,1,AZ,Envox Group,Public Relations


### 02 Challenges

In [39]:
# 1 Extract all rows with a Status of "Closed" using .loc
i.investments.loc['Closed'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Market
Funding Rounds,State,Unnamed: 2_level_1,Unnamed: 3_level_1
1,AB,Cardinal Media Technologies,Social Network Media
1,AB,Easy Bill Online,Tracking
1,AB,Globel Direct,Public Relations
1,AB,Ph03nix New Media,Games
1,AL,Naubo,News


In [43]:
# 2 Extract all rows with a Status of "Acquired" and 10 funding rounds.
i.investments.loc[('Acquired', 10)].head()

Unnamed: 0_level_0,Name,Market
State,Unnamed: 1_level_1,Unnamed: 2_level_1
NY,Genesis Networks,Web Hosting
TX,ACTIVE Network,Software


In [44]:
# 3 Extract all rows with a Status of "Operating", six funding rounds, and a State
# of "NJ".
i.investments.loc[('Operating', 6, 'NJ')].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Name,Market
Status,Funding Rounds,State,Unnamed: 3_level_1,Unnamed: 4_level_1
Operating,6,NJ,Agile Therapeutics,Biotechnology
Operating,6,NJ,Agilence,Retail Technology
Operating,6,NJ,Edge Therapeutics,Biotechnology
Operating,6,NJ,Nistica,Web Hosting


In [45]:
# 4 Extract all rows with a Status of "Closed" and eight funding rounds. Pull out
# only the Name column.
i.investments.loc[('Closed', 8), 'Name'].head()

State
CA             CipherMax
CA    Dilithium Networks
CA               Moblyng
CA              SolFocus
CA              Solyndra
Name: Name, dtype: object

In [None]:
# 5 Extract all rows with a State of "NJ", irrespective of the values in the Status and
# Funding Rounds levels.
i.investments.xs(key='NJ', level='State').head()

In [46]:
# 6 Reincorporate the MultiIndex levels back into the DataFrame as columns
i.investments.reset_index().head()

Unnamed: 0,Status,Funding Rounds,State,Name,Market
0,Acquired,1,AB,Hallpass Media,Games
1,Acquired,1,AL,EnteGreat,Enterprise Software
2,Acquired,1,AL,Onward Behavioral Health,Biotechnology
3,Acquired,1,AL,Proxsys,Biotechnology
4,Acquired,1,AZ,Envox Group,Public Relations
