# <font color=steelblue>DataFrame的Selecting和Indexing</font>

In [1]:
import numpy as np
import pandas as pd

In [2]:
imdb = pd.read_csv('../Data/movie_metadata.csv')

In [3]:
imdb.shape

(5043, 28)

In [4]:
#df[]选择列
tmp_df = imdb[['director_name','movie_title','imdb_score']]
tmp_df.head()

Unnamed: 0,director_name,movie_title,imdb_score
0,James Cameron,Avatar,7.9
1,Gore Verbinski,Pirates of the Caribbean: At World's End,7.1
2,Sam Mendes,Spectre,6.8
3,Christopher Nolan,The Dark Knight Rises,8.5
4,Doug Walker,Star Wars: Episode VII - The Force Awakens ...,7.1


In [5]:
#df.loc[]按label选择，且包括最后一个索引
sub_df = tmp_df.loc[3:7,'director_name':'movie_title']
sub_df

Unnamed: 0,director_name,movie_title
3,Christopher Nolan,The Dark Knight Rises
4,Doug Walker,Star Wars: Episode VII - The Force Awakens ...
5,Andrew Stanton,John Carter
6,Sam Raimi,Spider-Man 3
7,Nathan Greno,Tangled


In [6]:
#df.iloc[]按index选择，不包括最后一个索引
sub_df.iloc[0:3,1:2]

Unnamed: 0,movie_title
3,The Dark Knight Rises
4,Star Wars: Episode VII - The Force Awakens ...
5,John Carter


# <font color=steelblue>Series和DataFrame的Reindexing</font>

***
<font color=orange size=4>Series的reindex</font>

In [7]:
s1 = pd.Series(['A','B','C'],index=[1,3,5])

In [8]:
s1.reindex(index=range(6))

0    NaN
1      A
2    NaN
3      B
4    NaN
5      C
dtype: object

In [9]:
#reindex不改变原值
s1

1    A
3    B
5    C
dtype: object

In [10]:
#两种填充方法
s1.reindex(index=range(6),fill_value='F')

0    F
1    A
2    F
3    B
4    F
5    C
dtype: object

In [11]:
s1.reindex(index=range(6),method='ffill')

0    NaN
1      A
2      A
3      B
4      B
5      C
dtype: object

***
<font color=orange size=4>DataFrame的reindex</font>

In [12]:
df1 = pd.DataFrame(np.random.randn(25).reshape([5,5]),
               index=['A','B','D','E','F'],
               columns=['c1','c2','c3','c4','c5'])
df1

Unnamed: 0,c1,c2,c3,c4,c5
A,1.242974,-0.267564,-0.188393,0.114098,-0.169648
B,-0.6352,-1.175575,1.861612,-0.687703,-1.480088
D,-1.395213,0.734751,0.936394,-0.307007,-0.271671
E,0.247916,-0.190745,-0.46882,0.599184,2.486614
F,1.385994,-0.323394,0.090152,0.391476,0.370319


In [13]:
df1.reindex(index=['A','B','C','D','E','F'],
           columns=['c1','c2','c3','c4','c5','c6'],
           fill_value=np.NaN)

Unnamed: 0,c1,c2,c3,c4,c5,c6
A,1.242974,-0.267564,-0.188393,0.114098,-0.169648,
B,-0.6352,-1.175575,1.861612,-0.687703,-1.480088,
C,,,,,,
D,-1.395213,0.734751,0.936394,-0.307007,-0.271671,
E,0.247916,-0.190745,-0.46882,0.599184,2.486614,
F,1.385994,-0.323394,0.090152,0.391476,0.370319,


In [14]:
df1.drop('c3',axis=1)

Unnamed: 0,c1,c2,c4,c5
A,1.242974,-0.267564,0.114098,-0.169648
B,-0.6352,-1.175575,-0.687703,-1.480088
D,-1.395213,0.734751,-0.307007,-0.271671
E,0.247916,-0.190745,0.599184,2.486614
F,1.385994,-0.323394,0.391476,0.370319
