# Index Objects and Labeled Data

### pandas Data Structures

Key building blocks:

	Indexes: Sequence of labels

	Series: 1D array with Index

	DataFrames: 2D array with Series as columns

### Creating a Series

In [16]:
import pandas as pd
from random import uniform

prices = [round(uniform(10, 11), 2) for p in range(0, 5)]

prices

[10.16, 10.73, 10.82, 10.94, 10.34]

In [18]:
shares = pd.Series(prices)

shares

# Indixes go from 0 to 4

0    10.16
1    10.73
2    10.82
3    10.94
4    10.34
dtype: float64

### Creating an index

In [19]:
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']

shares = pd.Series(prices, index=days)

shares

# strings are accosiated with the data

Mon    10.16
Tue    10.73
Wed    10.82
Thu    10.94
Fri    10.34
dtype: float64

### Index attribute 'name'

In [22]:
print(shares.index.name)

None


In [23]:
shares.index.name = 'weekday'

shares

weekday
Mon    10.16
Tue    10.73
Wed    10.82
Thu    10.94
Fri    10.34
dtype: float64

In [24]:
zips = [1001, 1002, 1003, 1005, 1007]

unemployments = [0.06, 0.09, 0.17, 0.10, 0.05]

participants = [13801, 24551, 11477, 4086, 11362]

list_label = ['zip', 'unemployments', 'participants']

list_cols = [zips, unemployments, participants]

zipped = list(zip(list_label, list_cols))

data = dict(zipped)

df = pd.DataFrame(data)

df

Unnamed: 0,participants,unemployments,zip
0,13801,0.06,1001
1,24551,0.09,1002
2,11477,0.17,1003
3,4086,0.1,1005
4,11362,0.05,1007


Lets assign zip column for our df index

In [25]:
df.index = df.zip

df

Unnamed: 0_level_0,participants,unemployments,zip
zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,13801,0.06,1001
1002,24551,0.09,1002
1003,11477,0.17,1003
1005,4086,0.1,1005
1007,11362,0.05,1007


We can delete the redundant zip column now

In [27]:
del df['zip']

df

Unnamed: 0_level_0,participants,unemployments
zip,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,13801,0.06
1002,24551,0.09
1003,11477,0.17
1005,4086,0.1
1007,11362,0.05


We can also give name to the column labels

In [29]:
df.columns.name = 'election_parameters'

df

election_parameters,participants,unemployments
zip,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,13801,0.06
1002,24551,0.09
1003,11477,0.17
1005,4086,0.1
1007,11362,0.05


## Hierarchical indexing

In [46]:
date = ['2016-10-03', '2016-10-03','2016-10-04', '2016-10-05']
close = [31.50, 112.52, 113.00, 57.64]
volume = [14070500, 21701180, 29736800, 16726400]
symbol = ['CSCO', 'AAPL', 'MSFT', 'AAPL']

labels = ['date', 'close', 'volume', 'symbol']
list_columns = [date, close, volume, symbol]

zipped = list(zip(labels, list_columns))
data = dict(zipped)

df = pd.DataFrame(data)
df

Unnamed: 0,close,date,symbol,volume
0,31.5,2016-10-03,CSCO,14070500
1,112.52,2016-10-03,AAPL,21701180
2,113.0,2016-10-04,MSFT,29736800
3,57.64,2016-10-05,AAPL,16726400


dates may not be unique  , thus we can index with date and symbol together as a tuple.

In [48]:
df = df.set_index(['symbol', 'date'])

df # index thus compromises 2 columns

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
AAPL,2016-10-03,112.52,21701180
MSFT,2016-10-04,113.0,29736800
AAPL,2016-10-05,57.64,16726400


In [49]:
df.index

MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],
           labels=[[1, 0, 2, 0], [0, 0, 1, 2]],
           names=['symbol', 'date'])

In [51]:
print(df.index.name)

None


In [52]:
print(df.index.names)

['symbol', 'date']


### Sorting index

In [53]:
df = df.sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701180
AAPL,2016-10-05,57.64,16726400
CSCO,2016-10-03,31.5,14070500
MSFT,2016-10-04,113.0,29736800


Such an index sorting is useful for DF slicing

In [56]:
df.loc['AAPL']

Unnamed: 0_level_0,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-03,112.52,21701180
2016-10-05,57.64,16726400


In [60]:
df.loc['AAPL':'CSCO']

Unnamed: 0_level_0,Unnamed: 1_level_0,close,volume
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701180
AAPL,2016-10-05,57.64,16726400
CSCO,2016-10-03,31.5,14070500
