# Documents

#### https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html

# Etc

In [2]:
import pandas as pd
import numpy as np

In [14]:
sales = {'account' : ['Kim', 'Song', 'Noh', 'Ho'],
         'Jan' : [150, 200, 70, np.NaN],
         'Feb' : [200, 210, 90, 100],
         'Mar' : [140, np.NaN, 215, 95]}
df_orig = pd.DataFrame.from_dict(sales)
df = df_orig.copy()
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


### rename columns / index

In [4]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


In [5]:
df_new = df.rename(columns={'account':'name', 'Mar':'May'}, index={0:'zero'})
df_new

Unnamed: 0,name,Jan,Feb,May
zero,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


### rename columns/rows of original object

In [6]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


In [7]:
df2 = df.copy()
df2

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


In [8]:
df2.rename(columns={'account':'name', 'Mar':'May'}, index={0:'zero'}, inplace=True)
df2

Unnamed: 0,name,Jan,Feb,May
zero,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


### Drop Columns

#### 1. drop rows including null value

In [9]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


In [10]:
df_new2 = df.dropna(axis=0)   # drop rows which has null value
df_new2

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
2,Noh,70.0,90,215.0


#### 2. drop columns including null value

In [11]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


In [12]:
df_new3 = df.dropna(axis=1)   # drop columns which has null value
df_new3

Unnamed: 0,account,Feb
0,Kim,200
1,Song,210
2,Noh,90
3,Ho,100


## lambda

In [15]:
df

Unnamed: 0,account,Jan,Feb,Mar
0,Kim,150.0,200,140.0
1,Song,200.0,210,
2,Noh,70.0,90,215.0
3,Ho,,100,95.0


#### lambda  / True/False Tag column

In [16]:
df['new_col'] = df['account'].apply(lambda x : True if (x in ['Kim','Ho']) else False)
df

Unnamed: 0,account,Jan,Feb,Mar,new_col
0,Kim,150.0,200,140.0,True
1,Song,200.0,210,,False
2,Noh,70.0,90,215.0,False
3,Ho,,100,95.0,True


#### lambda / testing function

In [17]:
test = lambda x : True if (x > 10 and x < 20) else False
test(12)

True

### Using if else in Lambda function
lambda (arguments) : (Return value if condition is True) if (condition) else

In [24]:
user_defined_function_01 = lambda x : True if (x>10 and x<20) else False
user_defined_function_01(17)

True

### scalar 'to_replace' and 'value'

In [2]:
import pandas as pd
s = pd.Series([0, 1, 2, 3, 4])
s.replace(0, 5)

0    5
1    1
2    2
3    3
4    4
dtype: int64

In [3]:
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                   'B': [5, 6, 7, 8, 9],
                   'C': ['a', 'b', 'c', 'd', 'e']})
df.replace(0, 5)

Unnamed: 0,A,B,C
0,5,5,a
1,1,6,b
2,2,7,c
3,3,8,d
4,4,9,e


### List-like 'to_replace'

In [5]:
df.replace([0, 1, 2, 3], 4)

Unnamed: 0,A,B,C
0,4,5,a
1,4,6,b
2,4,7,c
3,4,8,d
4,4,9,e


In [6]:
df.replace([0, 1, 2, 3], [4, 3, 2, 1])

Unnamed: 0,A,B,C
0,4,5,a
1,3,6,b
2,2,7,c
3,1,8,d
4,4,9,e


In [7]:
s.replace([1, 2], method='bfill')

0    0
1    3
2    3
3    3
4    4
dtype: int64

In [9]:
df.replace({0: 10, 1: 100})

Unnamed: 0,A,B,C
0,10,5,a
1,100,6,b
2,2,7,c
3,3,8,d
4,4,9,e


### Regular expression 'to_replace'

In [10]:
df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
                   'B': ['abc', 'bar', 'xyz']})
df.replace(to_replace=r'^ba.$', value='new', regex=True)

Unnamed: 0,A,B
0,new,abc
1,foo,new
2,bait,xyz


In [11]:
df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)

Unnamed: 0,A,B
0,new,abc
1,foo,bar
2,bait,xyz


In [12]:
df.replace(regex=r'^ba.$', value='new')

Unnamed: 0,A,B
0,new,abc
1,foo,new
2,bait,xyz


In [13]:
df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})

Unnamed: 0,A,B
0,new,abc
1,xyz,new
2,bait,xyz


In [14]:
df.replace(regex=[r'^ba.$', 'foo'], value='new')

Unnamed: 0,A,B
0,new,abc
1,new,new
2,bait,xyz


In [17]:
s = pd.Series([10, 'a', 'a', 'b', 'a'])
print(s)
print('-------------------------')
s.replace({'a': None})
print(s)

0    10
1     a
2     a
3     b
4     a
dtype: object
-------------------------
0    10
1     a
2     a
3     b
4     a
dtype: object


In [19]:
s = pd.Series([10, 'a', 'a', 'b', 'a'])
print(s)
print('-------------------------')
s.replace('a', None)
print(s)

0    10
1     a
2     a
3     b
4     a
dtype: object
-------------------------
0    10
1     a
2     a
3     b
4     a
dtype: object
