# Chapter 2
The index column is unmutable and homogenious. You can change it - but only all at once. i.e. you cannot change slices or individual index values - but you can replace an entire index Series, with another of the same length.

In [6]:
# data prep
import pandas as pd
ind = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
eggs = [47, 110, 221, 77, 132, 205]
salt = [12.0, 50.0, 89.0, 87.0, 'NaN', 60.0]
spam =  [17, 31, 72, 20, 52, 55]
sales = pd.DataFrame({'eggs':eggs, 'salt':salt, 'spam':spam}, index=ind)
sales.index.name = 'month'
print(sales)

       eggs salt  spam
month                 
Jan      47   12    17
Feb     110   50    31
Mar     221   89    72
Apr      77   87    20
May     132  NaN    52
Jun     205   60    55


In [7]:
# Create the list of new indexes: new_idx
new_idx = [month.upper() for month in sales.index]

# Assign new_idx to sales.index
sales.index = new_idx

# Print the sales DataFrame
print(sales)


     eggs salt  spam
JAN    47   12    17
FEB   110   50    31
MAR   221   89    72
APR    77   87    20
MAY   132  NaN    52
JUN   205   60    55


In [8]:
# Assign the string 'MONTHS' to sales.index.name
sales.index.name = 'MONTHS'

# Print the sales DataFrame
print(sales)

# Assign the string 'PRODUCTS' to sales.columns.name 
sales.columns.name = 'PRODUCTS'

# Print the sales dataframe again
print(sales)


        eggs salt  spam
MONTHS                 
JAN       47   12    17
FEB      110   50    31
MAR      221   89    72
APR       77   87    20
MAY      132  NaN    52
JUN      205   60    55
PRODUCTS  eggs salt  spam
MONTHS                   
JAN         47   12    17
FEB        110   50    31
MAR        221   89    72
APR         77   87    20
MAY        132  NaN    52
JUN        205   60    55


In [11]:
old_sales = sales
sales.index = list(range(len(sales)))
print(sales)

# Generate the list of months: months
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

# Assign months to sales.index
sales.index = months

# Print the modified sales DataFrame
print(sales)


PRODUCTS  eggs salt  spam
0           47   12    17
1          110   50    31
2          221   89    72
3           77   87    20
4          132  NaN    52
5          205   60    55
PRODUCTS  eggs salt  spam
Jan         47   12    17
Feb        110   50    31
Mar        221   89    72
Apr         77   87    20
May        132  NaN    52
Jun        205   60    55


## Multi Indexes
You can create indexes of multiple columns, which will be organised heirarchically. For instance, with stock data you could create the index ('stock', 'date'). You would add this complexity for speed - index selection or slicing, is much quicker than column filtering on very large DataFrames.

The syntax for slicing multi-indexes is different to that used when slicing columns. 

In [26]:
# data prep
state = ['CA', 'CA', 'NY', 'NY', 'TX', 'TX']
month = [1,2,1,2,1,2]
eggs= [47, 110, 221, 77, 132, 205]
salt =  [12.0, 50.0, 89.0, 87.0, 'NaN', 60.0]
spam = [17, 31, 72, 20, 52, 55]

sales = pd.DataFrame({'state':state, 'month':month, 'eggs':eggs, 'salt':salt, 'spam':spam})
full_sales = sales
sales = sales.set_index([state, month])
del sales['state']
del sales['month']
print(sales)

      eggs salt  spam
CA 1    47   12    17
   2   110   50    31
NY 1   221   89    72
   2    77   87    20
TX 1   132  NaN    52
   2   205   60    55


In [23]:
# Print sales.loc[['CA', 'TX']]
print(sales.loc[['CA', 'TX']])

# Print sales['CA':'TX']
print(sales['CA':'TX'])

      eggs salt  spam
CA 1    47   12    17
   2   110   50    31
TX 1   132  NaN    52
   2   205   60    55
      eggs salt  spam
CA 1    47   12    17
   2   110   50    31
NY 1   221   89    72
   2    77   87    20
TX 1   132  NaN    52
   2   205   60    55


In [27]:
# data prep
sales = full_sales

# Set the index to be the columns ['state', 'month']: sales
sales = sales.set_index(['state', 'month'])

# Sort the MultiIndex: sales
sales = sales.sort_index()

# Print the sales DataFrame
print(sales)


             eggs salt  spam
state month                 
CA    1        47   12    17
      2       110   50    31
NY    1       221   89    72
      2        77   87    20
TX    1       132  NaN    52
      2       205   60    55


In [29]:
# Data prep
sales = full_sales

# Set the index to the column 'state': sales
sales = sales.set_index('state')

# Print the sales DataFrame
print(sales)

# Access the data from 'NY'
print(sales.loc['NY'])


       eggs  month salt  spam
state                        
CA       47      1   12    17
CA      110      2   50    31
NY      221      1   89    72
NY       77      2   87    20
TX      132      1  NaN    52
TX      205      2   60    55
       eggs  month salt  spam
state                        
NY      221      1   89    72
NY       77      2   87    20


In [43]:
# Data prep
sales = full_sales.set_index(['state', 'month'])

# Look up data for NY in month 1: NY_month1
NY_month1 = sales.loc[('NY', 1),:]

# Look up data for CA and TX in month 2: CA_TX_month2
CA_TX_month2 = sales.loc[(slice('CA', 'TX'), 2),:]

# Look up data for all states in month 2: all_month2
all_month2 = sales.loc[(slice(None), 2),:]

print(NY_month1)
print(CA_TX_month2)
print(all_month2)

eggs    221
salt     89
spam     72
Name: (NY, 1), dtype: object
             eggs salt  spam
state month                 
CA    2       110   50    31
NY    2        77   87    20
TX    2       205   60    55
             eggs salt  spam
state month                 
CA    2       110   50    31
NY    2        77   87    20
TX    2       205   60    55
