In [1]:
import pandas as pd
import numpy as np

original = pd.DataFrame({
    'one': pd.Series(np.random.randint(-10, 10, (4,)), index=[2,3,5,8]),
    'two': pd.Series(np.random.randint(-10, 10, (4,)), index=[1,2,3,4]),
    'three': pd.Series(np.random.randint(-10, 10, (7,)), index=[2,3,4,5,6,7,8]),
    'four': pd.Series(np.random.randint(-10, 10, (7,)), index=[1,2,3,4,5,6,7]),
    'five': pd.Series(np.random.randint(-10, 10, (9,)))
    })

display(original)

Unnamed: 0,one,two,three,four,five
0,,,,,4
1,,-7.0,,-7.0,6
2,-1.0,7.0,-5.0,-5.0,-1
3,-1.0,-2.0,3.0,-5.0,2
4,,-4.0,-2.0,-6.0,7
5,6.0,,9.0,-4.0,7
6,,,-2.0,1.0,-4
7,,,3.0,-8.0,-1
8,5.0,,-1.0,,2


# Deep copy
- If `df = original` $\to$ change in `df` = change in `original`

In [2]:
df = original.copy()

# Rows
#### Drop rows

In [3]:
df.drop(4, inplace=True)
df

Unnamed: 0,one,two,three,four,five
0,,,,,4
1,,-7.0,,-7.0,6
2,-1.0,7.0,-5.0,-5.0,-1
3,-1.0,-2.0,3.0,-5.0,2
5,6.0,,9.0,-4.0,7
6,,,-2.0,1.0,-4
7,,,3.0,-8.0,-1
8,5.0,,-1.0,,2


In [4]:
df.drop([2,5,6], inplace=True)
df

Unnamed: 0,one,two,three,four,five
0,,,,,4
1,,-7.0,,-7.0,6
3,-1.0,-2.0,3.0,-5.0,2
7,,,3.0,-8.0,-1
8,5.0,,-1.0,,2


#### Add rows

In [5]:
df.loc[2] = [1, 2, 3, 4, 10]
df

Unnamed: 0,one,two,three,four,five
0,,,,,4
1,,-7.0,,-7.0,6
3,-1.0,-2.0,3.0,-5.0,2
7,,,3.0,-8.0,-1
8,5.0,,-1.0,,2
2,1.0,2.0,3.0,4.0,10


#### Reset index

In [6]:
df.reset_index(inplace=True)
df

Unnamed: 0,index,one,two,three,four,five
0,0,,,,,4
1,1,,-7.0,,-7.0,6
2,3,-1.0,-2.0,3.0,-5.0,2
3,7,,,3.0,-8.0,-1
4,8,5.0,,-1.0,,2
5,2,1.0,2.0,3.0,4.0,10


# Cols
#### Drop cols

In [7]:
df.drop('index', axis=1, inplace=True)
df

Unnamed: 0,one,two,three,four,five
0,,,,,4
1,,-7.0,,-7.0,6
2,-1.0,-2.0,3.0,-5.0,2
3,,,3.0,-8.0,-1
4,5.0,,-1.0,,2
5,1.0,2.0,3.0,4.0,10


In [8]:
df.drop(['two', 'five'], axis=1, inplace=True)
df

Unnamed: 0,one,three,four
0,,,
1,,,-7.0
2,-1.0,3.0,-5.0
3,,3.0,-8.0
4,5.0,-1.0,
5,1.0,3.0,4.0


#### Add cols

In [9]:
df['six'] = df['three'] + df['one']
df

Unnamed: 0,one,three,four,six
0,,,,
1,,,-7.0,
2,-1.0,3.0,-5.0,2.0
3,,3.0,-8.0,
4,5.0,-1.0,,4.0
5,1.0,3.0,4.0,4.0


In [10]:
df['seven'] = -10
df

Unnamed: 0,one,three,four,six,seven
0,,,,,-10
1,,,-7.0,,-10
2,-1.0,3.0,-5.0,2.0,-10
3,,3.0,-8.0,,-10
4,5.0,-1.0,,4.0,-10
5,1.0,3.0,4.0,4.0,-10


In [11]:
df['eights'] = df['one'].apply(lambda x: x*2)
df

Unnamed: 0,one,three,four,six,seven,eights
0,,,,,-10,
1,,,-7.0,,-10,
2,-1.0,3.0,-5.0,2.0,-10,-2.0
3,,3.0,-8.0,,-10,
4,5.0,-1.0,,4.0,-10,10.0
5,1.0,3.0,4.0,4.0,-10,2.0


In [12]:
df.insert(loc=0, column='Count', value=1)
df

Unnamed: 0,Count,one,three,four,six,seven,eights
0,1,,,,,-10,
1,1,,,-7.0,,-10,
2,1,-1.0,3.0,-5.0,2.0,-10,-2.0
3,1,,3.0,-8.0,,-10,
4,1,5.0,-1.0,,4.0,-10,10.0
5,1,1.0,3.0,4.0,4.0,-10,2.0


#### Change col order + drop

In [13]:
df = df[['one', 'seven', 'six']]
df

Unnamed: 0,one,seven,six
0,,-10,
1,,-10,
2,-1.0,-10,2.0
3,,-10,
4,5.0,-10,4.0
5,1.0,-10,4.0


# Handling NaN

#### Drop all rows

In [14]:
df = original.copy()

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,one,two,three,four,five
0,-1.0,7.0,-5.0,-5.0,-1
1,-1.0,-2.0,3.0,-5.0,2


#### Drop all cols

In [15]:
df = original.copy()

df.dropna(axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,five
0,4
1,6
2,-1
3,2
4,7
5,7
6,-4
7,-1
8,2


#### Fill with

In [16]:
df = original.copy()

In [17]:
df['one'].fillna(df['one'].mean(), inplace=True)
df['two'].fillna(5, inplace=True)
df['three'].fillna(df['three'].median(), inplace=True)
df

Unnamed: 0,one,two,three,four,five
0,2.25,5.0,-1.0,,4
1,2.25,-7.0,-1.0,-7.0,6
2,-1.0,7.0,-5.0,-5.0,-1
3,-1.0,-2.0,3.0,-5.0,2
4,2.25,-4.0,-2.0,-6.0,7
5,6.0,5.0,9.0,-4.0,7
6,2.25,5.0,-2.0,1.0,-4
7,2.25,5.0,3.0,-8.0,-1
8,5.0,5.0,-1.0,,2
