# DAY5（数据框DataFrame行操作）

- 取一行
- 取多行
- 添加新行
- 插入新行
- 移除行

In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(1)
df = pd.DataFrame(np.random.randn(10,4), 
                  columns=list('ABCD'),
                  index = pd.date_range('20130101', periods=10))
df

Unnamed: 0,A,B,C,D
2013-01-01,1.624345,-0.611756,-0.528172,-1.072969
2013-01-02,0.865408,-2.301539,1.744812,-0.761207
2013-01-03,0.319039,-0.24937,1.462108,-2.060141
2013-01-04,-0.322417,-0.384054,1.133769,-1.099891
2013-01-05,-0.172428,-0.877858,0.042214,0.582815
2013-01-06,-1.100619,1.144724,0.901591,0.502494
2013-01-07,0.900856,-0.683728,-0.12289,-0.935769
2013-01-08,-0.267888,0.530355,-0.691661,-0.396754
2013-01-09,-0.687173,-0.845206,-0.671246,-0.012665
2013-01-10,-1.11731,0.234416,1.659802,0.742044


### 1. 选择行

In [3]:
df.loc['2013-01-03']

A    0.319039
B   -0.249370
C    1.462108
D   -2.060141
Name: 2013-01-03 00:00:00, dtype: float64

In [4]:
df.iloc[1]

A    0.865408
B   -2.301539
C    1.744812
D   -0.761207
Name: 2013-01-02 00:00:00, dtype: float64

In [5]:
df[2:4]

Unnamed: 0,A,B,C,D
2013-01-03,0.319039,-0.24937,1.462108,-2.060141
2013-01-04,-0.322417,-0.384054,1.133769,-1.099891


In [6]:
df.iloc[2:4]

Unnamed: 0,A,B,C,D
2013-01-03,0.319039,-0.24937,1.462108,-2.060141
2013-01-04,-0.322417,-0.384054,1.133769,-1.099891


In [7]:
df.loc[:'2013-01-04']

Unnamed: 0,A,B,C,D
2013-01-01,1.624345,-0.611756,-0.528172,-1.072969
2013-01-02,0.865408,-2.301539,1.744812,-0.761207
2013-01-03,0.319039,-0.24937,1.462108,-2.060141
2013-01-04,-0.322417,-0.384054,1.133769,-1.099891


In [8]:
df.loc['2013-01-04':'2013-01-08']

Unnamed: 0,A,B,C,D
2013-01-04,-0.322417,-0.384054,1.133769,-1.099891
2013-01-05,-0.172428,-0.877858,0.042214,0.582815
2013-01-06,-1.100619,1.144724,0.901591,0.502494
2013-01-07,0.900856,-0.683728,-0.12289,-0.935769
2013-01-08,-0.267888,0.530355,-0.691661,-0.396754


### 2. 添加行

In [9]:
df1 = pd.DataFrame(np.random.randint(0,10,(5,4)),columns = 'A B C D'.split())

In [10]:
df1

Unnamed: 0,A,B,C,D
0,8,7,7,1
1,1,3,0,8
2,6,4,5,6
3,2,5,7,8
4,4,4,7,7


In [11]:
df1.loc[len(df1.index)] = 4;df1

Unnamed: 0,A,B,C,D
0,8,7,7,1
1,1,3,0,8
2,6,4,5,6
3,2,5,7,8
4,4,4,7,7
5,4,4,4,4


### 3. 插入行

In [12]:
df1 = pd.DataFrame(np.random.randint(0,10,(5,4)),
                   columns = 'A B C D'.split())

In [13]:
df1

Unnamed: 0,A,B,C,D
0,4,9,0,2
1,0,7,1,7
2,9,8,4,0
3,1,9,8,2
4,3,1,2,7


In [14]:
df1.loc[1.5] = [1,2,3,4]

In [15]:
df1

Unnamed: 0,A,B,C,D
0.0,4,9,0,2
1.0,0,7,1,7
2.0,9,8,4,0
3.0,1,9,8,2
4.0,3,1,2,7
1.5,1,2,3,4


In [16]:
df1.sort_index().reset_index(drop=True)

Unnamed: 0,A,B,C,D
0,4,9,0,2
1,0,7,1,7
2,1,2,3,4
3,9,8,4,0
4,1,9,8,2
5,3,1,2,7


### 4. 移除行

In [17]:
df1 = df1.drop(1.5).reset_index(drop=True)

In [18]:
df1.drop([1,2])

Unnamed: 0,A,B,C,D
0,4,9,0,2
3,1,9,8,2
4,3,1,2,7


### 5. 习题

In [19]:
np.random.seed(1)
df = pd.DataFrame(np.random.randn(1000,4), 
                  columns=list('ABCD'),
                  index = pd.date_range('20200101', periods=1000))

In [20]:
df

Unnamed: 0,A,B,C,D
2020-01-01,1.624345,-0.611756,-0.528172,-1.072969
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-03,0.319039,-0.249370,1.462108,-2.060141
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-05,-0.172428,-0.877858,0.042214,0.582815
...,...,...,...,...
2022-09-22,-0.138881,2.652140,-0.656247,0.279562
2022-09-23,-0.607715,0.729814,-0.887188,0.077327
2022-09-24,0.073416,0.416026,-1.879200,0.575459
2022-09-25,0.102062,1.184304,-0.794843,-0.125903


In [21]:
df[pd.Series(df.index, index = df.index).dt.day % 2 == 0]

Unnamed: 0,A,B,C,D
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-06,-1.100619,1.144724,0.901591,0.502494
2020-01-08,-0.267888,0.530355,-0.691661,-0.396754
2020-01-10,-1.117310,0.234416,1.659802,0.742044
...,...,...,...,...
2022-09-18,-1.541780,-1.025194,0.325975,0.805144
2022-09-20,-0.527962,-2.071264,0.526165,-0.450283
2022-09-22,-0.138881,2.652140,-0.656247,0.279562
2022-09-24,0.073416,0.416026,-1.879200,0.575459


In [22]:
df[df.index.to_series().dt.day % 2 == 0]

Unnamed: 0,A,B,C,D
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-06,-1.100619,1.144724,0.901591,0.502494
2020-01-08,-0.267888,0.530355,-0.691661,-0.396754
2020-01-10,-1.117310,0.234416,1.659802,0.742044
...,...,...,...,...
2022-09-18,-1.541780,-1.025194,0.325975,0.805144
2022-09-20,-0.527962,-2.071264,0.526165,-0.450283
2022-09-22,-0.138881,2.652140,-0.656247,0.279562
2022-09-24,0.073416,0.416026,-1.879200,0.575459


In [23]:
df[df.index.day % 2 == 0]

Unnamed: 0,A,B,C,D
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-06,-1.100619,1.144724,0.901591,0.502494
2020-01-08,-0.267888,0.530355,-0.691661,-0.396754
2020-01-10,-1.117310,0.234416,1.659802,0.742044
...,...,...,...,...
2022-09-18,-1.541780,-1.025194,0.325975,0.805144
2022-09-20,-0.527962,-2.071264,0.526165,-0.450283
2022-09-22,-0.138881,2.652140,-0.656247,0.279562
2022-09-24,0.073416,0.416026,-1.879200,0.575459


In [24]:
df.loc['2022-09-27'] = 1

In [25]:
df = df.drop('2022-09-27')

In [26]:
df

Unnamed: 0,A,B,C,D
2020-01-01,1.624345,-0.611756,-0.528172,-1.072969
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-03,0.319039,-0.249370,1.462108,-2.060141
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-05,-0.172428,-0.877858,0.042214,0.582815
...,...,...,...,...
2022-09-22,-0.138881,2.652140,-0.656247,0.279562
2022-09-23,-0.607715,0.729814,-0.887188,0.077327
2022-09-24,0.073416,0.416026,-1.879200,0.575459
2022-09-25,0.102062,1.184304,-0.794843,-0.125903


In [27]:
import datetime
df.index[-1]+ datetime.timedelta(1)

Timestamp('2022-09-27 00:00:00', freq='D')

In [28]:
import datetime
n = 100
for i in range(n):
    df.loc[df.index[-1] + datetime.timedelta(1)] = i+1

In [29]:
df

Unnamed: 0,A,B,C,D
2020-01-01,1.624345,-0.611756,-0.528172,-1.072969
2020-01-02,0.865408,-2.301539,1.744812,-0.761207
2020-01-03,0.319039,-0.249370,1.462108,-2.060141
2020-01-04,-0.322417,-0.384054,1.133769,-1.099891
2020-01-05,-0.172428,-0.877858,0.042214,0.582815
...,...,...,...,...
2022-12-31,96.000000,96.000000,96.000000,96.000000
2023-01-01,97.000000,97.000000,97.000000,97.000000
2023-01-02,98.000000,98.000000,98.000000,98.000000
2023-01-03,99.000000,99.000000,99.000000,99.000000
