In [17]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

# 5.1.3 Index Objects (索引对象)

In [2]:
ser = pd.Series(range(3), index=['a', 'b', 'c'])
ser

a    0
b    1
c    2
dtype: int32

In [3]:
index = ser.index
print(type(index))
print(index)

<class 'pandas.core.indexes.base.Index'>
Index(['a', 'b', 'c'], dtype='object')


#### index object是不可更改的：

In [4]:
try:
    index[1] = 'd'
except TypeError as e:
    print("错误信息: ", e)

错误信息:  Index does not support mutable operations


In [5]:
labels = pd.Index(np.arange(3))
obj2 = pd.Series([1.5, -2.5, 0], index=labels)
obj2.index is labels

True

index除了长得像数组，功能还像大小固定的set，但是可以有重复的labels

In [6]:
myDict = {'上海': {2001: 2.4, 2002: 2.9},
       '北京': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
df = pd.DataFrame(myDict)
df

Unnamed: 0,上海,北京
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [7]:
print(type(df.columns))
df.columns

<class 'pandas.core.indexes.base.Index'>


Index(['上海', '北京'], dtype='object')

In [8]:
print(type(df.index))
df.index

<class 'pandas.core.indexes.numeric.Int64Index'>


Int64Index([2000, 2001, 2002], dtype='int64')

### 综上: 行和列索引的类型是不太一样的,这是由索引元素类型造成的

In [9]:
'北京' in df.columns

True

In [10]:
2003 in df.columns

False

### 与python里的set不同，pandas的index可以有重复的labels：

In [11]:
dup_labels = pd.Index(['foo', 'foo', 'bar', 'bar'])
dup_labels

Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

In [12]:
myDict2 = {'city': ['北京', '北京', '北京', '上海', '上海', '上海'], 
        'year': [2000, 2001, 2002, 2001, 2002, 2003], 
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

In [13]:
df2 = pd.DataFrame(myDict2, columns=['year', 'state', 'pop', 'pop'], 
                      index=['one', 'two', 'three', 'four', 'five', 'five'])
df2 # 两个pop  两个five  列和索引都可以重复

Unnamed: 0,year,state,pop,pop.1
one,2000,,1.5,1.5
two,2001,,1.7,1.7
three,2002,,3.6,3.6
four,2001,,2.4,2.4
five,2002,,2.9,2.9
five,2003,,3.2,3.2


### 这时会选中所有相同的标签。

In [14]:
df2['pop']

Unnamed: 0,pop,pop.1
one,1.5,1.5
two,1.7,1.7
three,3.6,3.6
four,2.4,2.4
five,2.9,2.9
five,3.2,3.2


In [15]:
df2.loc['five']

Unnamed: 0,year,state,pop,pop.1
five,2002,,2.9,2.9
five,2003,,3.2,3.2


# ======================

In [None]:
# 索引的方法, 类似于集合

In [46]:
index1 = pd.Index([0, '1', 2, 2, 2, 5, 5])
index2 = pd.Index([1, 2, 3, 4, 4])
print(index1)
index2

Index([0, '1', 2, 2, 2, 5, 5], dtype='object')


Int64Index([1, 2, 3, 4, 4], dtype='int64')

In [47]:
index1.append(index2) # index1和index2并不变

Index([0, '1', 2, 2, 2, 5, 5, 1, 2, 3, 4, 4], dtype='object')

In [48]:
index1.difference(index2)

Index([0, 5, '1'], dtype='object')

In [49]:
# 交集
index1.intersection(index2) 

Index([2], dtype='object')

In [50]:
# 并集
index1.union(index2) 

InvalidIndexError: Reindexing only valid with uniquely valued Index objects