# Deleting rows and columsn
## Axis
- Series
    There is only one dimension(axis)
- DataFrame (2 dimensions)
    - axis=0 (vertical), raw labels (column)
    - axis=1 (horizontal), columns(row)

In [5]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.rand(7,5), columns=['A','B','C','D','E'])
df

Unnamed: 0,A,B,C,D,E
0,0.871741,0.361125,0.968398,0.156694,0.425446
1,0.800992,0.472609,0.429634,0.002277,0.424425
2,0.644317,0.927834,0.49362,0.077457,0.788661
3,0.925472,0.958447,0.712807,0.361978,0.027291
4,0.51148,0.506466,0.509286,0.666623,0.296353
5,0.309449,0.068999,0.404454,0.314298,0.993328
6,0.902145,0.555427,0.222548,0.902405,0.613529


##### drop() returns a new object, but it does not modify the original object

In [16]:
df.drop([0,1,2])

Unnamed: 0,A,B,C,D,E
3,0.925472,0.958447,0.712807,0.361978,0.027291
4,0.51148,0.506466,0.509286,0.666623,0.296353
5,0.309449,0.068999,0.404454,0.314298,0.993328
6,0.902145,0.555427,0.222548,0.902405,0.613529


In [15]:
df.drop(['A','D'],axis=1)

Unnamed: 0,B,C,E
0,0.361125,0.968398,0.425446
1,0.472609,0.429634,0.424425
2,0.927834,0.49362,0.788661
3,0.958447,0.712807,0.027291
4,0.506466,0.509286,0.296353
5,0.068999,0.404454,0.993328
6,0.555427,0.222548,0.613529


In [18]:
df2 = df.drop([0,1,2])
df2

Unnamed: 0,A,B,C,D,E
3,0.925472,0.958447,0.712807,0.361978,0.027291
4,0.51148,0.506466,0.509286,0.666623,0.296353
5,0.309449,0.068999,0.404454,0.314298,0.993328
6,0.902145,0.555427,0.222548,0.902405,0.613529


## Selecting elements
- Slicing
- .loc() and .iloc()

In [46]:
s = pd.Series(np.random.rand(6), index=['a','b','c','d','d','e'])
s

a    0.088334
b    0.394953
c    0.493798
d    0.160724
d    0.109050
e    0.764754
dtype: float64

In [27]:
# Selecting specific entries
s[['b','c','d']]

b    0.042582
c    0.833911
d    0.842138
d    0.602145
dtype: float64

In [30]:
# Selecting a range of entries
# the range is inclusive, the last one is included
s['b':'d']

b    0.042582
c    0.833911
d    0.842138
d    0.602145
dtype: float64

In [31]:
s > 0.5

a    False
b    False
c     True
d     True
d     True
e    False
dtype: bool

In [33]:
mask = s > 0.5

In [39]:
# only return values that are "True"
s[mask]

c    0.833911
d    0.842138
d    0.602145
dtype: float64

In [40]:
# only return values that are "True"
s[s > 0.5]

c    0.833911
d    0.842138
d    0.602145
dtype: float64

### Assign a value to selected entries

In [47]:
s

a    0.088334
b    0.394953
c    0.493798
d    0.160724
d    0.109050
e    0.764754
dtype: float64

In [48]:
s['b':'c'] = -1
s

a    0.088334
b   -1.000000
c   -1.000000
d    0.160724
d    0.109050
e    0.764754
dtype: float64

In [49]:
s[ s == -1] = 1
s

a    0.088334
b    1.000000
c    1.000000
d    0.160724
d    0.109050
e    0.764754
dtype: float64

## Selecting entries in DataFrame

In [50]:
df = pd.DataFrame(np.random.rand(4,5), columns=['A','B','C','D','E'])
df

Unnamed: 0,A,B,C,D,E
0,0.174485,0.662993,0.027938,0.170503,0.572313
1,0.836235,0.139266,0.338256,0.304863,0.633423
2,0.278125,0.563993,0.701639,0.440861,0.001475
3,0.1127,0.93746,0.290115,0.687938,0.404869


In [57]:
# It does not modify the original dataframe
df[['A','C']]

Unnamed: 0,A,C
0,0.174485,0.027938
1,0.836235,0.338256
2,0.278125,0.701639
3,0.1127,0.290115


In [59]:
# only return to "True"
df [df['D']>0.5]

Unnamed: 0,A,B,C,D,E
3,0.1127,0.93746,0.290115,0.687938,0.404869


In [61]:
# Checking for boolean condition
df['D']>0.5

0    False
1    False
2    False
3     True
Name: D, dtype: bool

## Using .loc
### use axis label

In [68]:
df = pd.DataFrame(np.random.rand(4,3), columns=['A','B','C'], index=['a','b','c','d'])
df

Unnamed: 0,A,B,C
a,0.327418,0.217434,0.673961
b,0.928304,0.289233,0.230196
c,0.912113,0.25329,0.878523
d,0.178699,0.819916,0.233613


In [72]:
#loc[row range, column range]
df.loc['a':'b',:]

Unnamed: 0,A,B,C
a,0.327418,0.217434,0.673961
b,0.928304,0.289233,0.230196


In [79]:
df['a':'b']

Unnamed: 0,A,B,C
a,0.327418,0.217434,0.673961
b,0.928304,0.289233,0.230196


### Selected rows, all columns

In [82]:
df.loc[:,['B','C']]

Unnamed: 0,B,C
a,0.217434,0.673961
b,0.289233,0.230196
c,0.25329,0.878523
d,0.819916,0.233613


In [86]:
df.loc['a':'b',['A','C']]

Unnamed: 0,A,C
a,0.327418,0.673961
b,0.928304,0.230196


# Using .iloc
### integer indices

In [87]:
df = pd.DataFrame(np.random.rand(4,3), columns=['A','B','C'], index=['a','b','c','d'])
df

Unnamed: 0,A,B,C
a,0.761031,0.790583,0.623606
b,0.501803,0.001292,0.386625
c,0.138778,0.98361,0.641597
d,0.716474,0.879387,0.683912


### Selecting rows

In [104]:
# one row
df.iloc[1]

A    0.501803
B    0.001292
C    0.386625
Name: b, dtype: float64

In [106]:
# multiple rows
df.iloc[[1,2]]

Unnamed: 0,A,B,C
b,0.501803,0.001292,0.386625
c,0.138778,0.98361,0.641597


### Selecting column(s)

In [108]:
# one column
df.iloc[:,1]

a    0.790583
b    0.001292
c    0.983610
d    0.879387
Name: B, dtype: float64

In [109]:
# multiple columns
df.iloc[:,[1,2]]

Unnamed: 0,B,C
a,0.790583,0.623606
b,0.001292,0.386625
c,0.98361,0.641597
d,0.879387,0.683912


### Selecting rows and columns

In [110]:
df.iloc[[0,1],[1,2]]

Unnamed: 0,B,C
a,0.790583,0.623606
b,0.001292,0.386625


In [111]:
s = pd.Series([0.0,0.1,0.2], index=[1,2,3])
s

1    0.0
2    0.1
3    0.2
dtype: float64

### Selecting a range

In [115]:
# integer range (inclusive, exclusive)
s[1:3]

2    0.1
3    0.2
dtype: float64

In [116]:
# loc is lable based access
s.loc[1:3]

1    0.0
2    0.1
3    0.2
dtype: float64

In [117]:
# iloc index range(inclusive, exclusive)
s.iloc[1:3]

2    0.1
3    0.2
dtype: float64