### Creating a series

In [1]:
import pandas as pd

In [2]:
prices = [10.70, 10.86, 10.74, 10.71, 10.79]

In [3]:
shares = pd.Series(prices)

In [4]:
shares

0    10.70
1    10.86
2    10.74
3    10.71
4    10.79
dtype: float64

### Creating an index

In [5]:
days = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri']

In [6]:
shares = pd.Series(prices, index=days)

In [8]:
shares

Mon     10.70
Tue     10.86
Wed     10.74
Thur    10.71
Fri     10.79
dtype: float64

### Examining an index

In [9]:
shares.index

Index(['Mon', 'Tue', 'Wed', 'Thur', 'Fri'], dtype='object')

In [11]:
shares.index[2]

'Wed'

In [12]:
shares.index[:2]

Index(['Mon', 'Tue'], dtype='object')

In [13]:
shares.index[-2:]

Index(['Thur', 'Fri'], dtype='object')

In [15]:
print(shares.index.name)

None


### Modifying index entries

In [16]:
shares.index[2] = 'Wednesday'

TypeError: Index does not support mutable operations

In [17]:
shares.index[:4] = ['Monday', 'Tuesday', 
                    'Wednesday', 'Thursday']

TypeError: Index does not support mutable operations

### Modifying all index entries

In [18]:
shares.index = ['Monday', 'Tuesday', 'Wednesday',
                'Thursday', 'Friday']

In [19]:
shares

Monday       10.70
Tuesday      10.86
Wednesday    10.74
Thursday     10.71
Friday       10.79
dtype: float64

### Unemployment data

In [20]:
unemployment = {'Zip':[1001, 1002, 1003, 1005, 1007],
               'unemployment':[0.06, 0.09, 0.17, 0.10, 0.05],
               'participants':[13801, 24551, 11477, 4886, 11362]}

In [21]:
unemployment = pd.DataFrame(unemployment)

In [22]:
unemployment

Unnamed: 0,Zip,participants,unemployment
0,1001,13801,0.06
1,1002,24551,0.09
2,1003,11477,0.17
3,1005,4886,0.1
4,1007,11362,0.05


In [23]:
unemployment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
Zip             5 non-null int64
participants    5 non-null int64
unemployment    5 non-null float64
dtypes: float64(1), int64(2)
memory usage: 200.0 bytes


In [24]:
unemployment.index = unemployment['Zip']

In [25]:
unemployment.head()

Unnamed: 0_level_0,Zip,participants,unemployment
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,1001,13801,0.06
1002,1002,24551,0.09
1003,1003,11477,0.17
1005,1005,4886,0.1
1007,1007,11362,0.05


### Removing extra column

In [26]:
unemployment.head(3)

Unnamed: 0_level_0,Zip,participants,unemployment
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,1001,13801,0.06
1002,1002,24551,0.09
1003,1003,11477,0.17


In [27]:
del unemployment['Zip']

In [28]:
unemployment.head(3)

Unnamed: 0_level_0,participants,unemployment
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,13801,0.06
1002,24551,0.09
1003,11477,0.17


In [29]:
unemployment.index

Int64Index([1001, 1002, 1003, 1005, 1007], dtype='int64', name='Zip')

In [30]:
unemployment.index.name

'Zip'

In [31]:
type(unemployment.index)

pandas.core.indexes.numeric.Int64Index

In [32]:
unemployment.columns

Index(['participants', 'unemployment'], dtype='object')

### read_csv() with index_col

In [None]:
unemployment = pd.read_csv('Unemployment.csv', 
                           index_col='Zip')

In [34]:
unemployment.head()

Unnamed: 0_level_0,participants,unemployment
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,13801,0.06
1002,24551,0.09
1003,11477,0.17
1005,4886,0.1
1007,11362,0.05


# Hierarchical Indexing

### Stock data

In [43]:
import pandas as pd

In [44]:
stocks = {'Date':['2016-10-03', '2016-10-03', '2016-10-03',
                 '2016-10-04', '2016-10-04', '2016-10-04',
                 '2016-10-05', '2016-10-05', '2016-10-05'],
         'Close':[31.50, 112.52, 57.42,
                 113.00, 57.24, 31.35,
                 57.64, 31.59, 113.05],
         'Volume':[14070500, 21701800, 19189500,
                  29736800, 20085900, 18460400,
                  16726400, 11808600, 21453100],
         'Symbol':['CSCO', 'AAPL', 'MSFT',
                  'AAPL', 'MSFT', 'CSCO',
                  'MSFT', 'CSCO', 'AAPL']}


In [45]:
stocks = pd.DataFrame(stocks)

### Setting index

In [46]:
stocks = stocks.set_index(['Symbol', 'Date'])

In [47]:
stocks

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
AAPL,2016-10-03,112.52,21701800
MSFT,2016-10-03,57.42,19189500
AAPL,2016-10-04,113.0,29736800
MSFT,2016-10-04,57.24,20085900
CSCO,2016-10-04,31.35,18460400
MSFT,2016-10-05,57.64,16726400
CSCO,2016-10-05,31.59,11808600
AAPL,2016-10-05,113.05,21453100


### MultiIndex on DataFrame

In [48]:
print(stocks.index)

MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],
           labels=[[1, 0, 2, 0, 2, 1, 2, 1, 0], [0, 0, 0, 1, 1, 1, 2, 2, 2]],
           names=['Symbol', 'Date'])


In [50]:
print(stocks.index.name)

None


In [51]:
print(stocks.index.names)

['Symbol', 'Date']


### Sorting index

In [52]:
stocks = stocks.sort_index()

In [53]:
print(stocks)

                    Close    Volume
Symbol Date                        
AAPL   2016-10-03  112.52  21701800
       2016-10-04  113.00  29736800
       2016-10-05  113.05  21453100
CSCO   2016-10-03   31.50  14070500
       2016-10-04   31.35  18460400
       2016-10-05   31.59  11808600
MSFT   2016-10-03   57.42  19189500
       2016-10-04   57.24  20085900
       2016-10-05   57.64  16726400


### Indexing (individual row)

In [54]:
stocks.loc[('CSCO', '2016-10-04')]

Close           31.35
Volume    18460400.00
Name: (CSCO, 2016-10-04), dtype: float64

In [56]:
stocks.loc[('CSCO', '2016-10-04'), 'Volume']

18460400.0

### Slicing (outermost index)

In [58]:
stocks.loc['AAPL']

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-03,112.52,21701800
2016-10-04,113.0,29736800
2016-10-05,113.05,21453100


In [59]:
stocks.loc['CSCO':'MSFT']

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
CSCO,2016-10-05,31.59,11808600
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
MSFT,2016-10-05,57.64,16726400


### Fancy indexing (outermost index)

In [60]:
stocks.loc[(['AAPL', 'MSFT'], '2016-10-05'),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-05,113.05,21453100
MSFT,2016-10-05,57.64,16726400


In [61]:
stocks.loc[(['AAPL', 'MSFT'], '2016-10-05'), 'Close']

Symbol  Date      
AAPL    2016-10-05    113.05
MSFT    2016-10-05     57.64
Name: Close, dtype: float64

### Fancy indexing (innermost index)

In [62]:
stocks.loc[('CSCO', ['2016-10-05', '2016-10-03']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-05,31.59,11808600


### Slicing (both indexes)

In [64]:
stocks.loc[(slice(None), slice('2016-10-03', '2016-10-04')),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701800
AAPL,2016-10-04,113.0,29736800
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
