## Data Manipulation in Pandas

## Importing modules

In [1]:
import pandas as pd
import numpy as np

## Adding new column

**`DataFrame.insert()`**

Syntax:
```
DataFrame.insert(loc, column, value, allow_duplicates=False)
```

In [2]:
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [3]:
df.insert(1, 'C', [7, 8, 8], True)
df

Unnamed: 0,A,C,B
0,1,7,4
1,2,8,5
2,3,8,6


**`DataFrame.assign`**
Creates a new dataframe with a new column added to the old dataframe.

In [4]:
new_df = df.assign(d=[10, 11, 12])
new_df

Unnamed: 0,A,C,B,d
0,1,7,4,10
1,2,8,5,11
2,3,8,6,12


In [5]:
new_df = df.assign(d = df['A'] + df['B'])
new_df

Unnamed: 0,A,C,B,d
0,1,7,4,5
1,2,8,5,7
2,3,8,6,9


**using a Dictionary**

In [6]:
new_df = df.set_index('A', inplace=False)
new_df

Unnamed: 0_level_0,C,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,7,4
2,8,5
3,8,6


In [7]:
col = {1: 10, 2: 20, 3: 30}

new_df['D'] = col
new_df

Unnamed: 0_level_0,C,B,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,7,4,10
2,8,5,20
3,8,6,30


**using a List**

In [8]:
df

Unnamed: 0,A,C,B
0,1,7,4
1,2,8,5
2,3,8,6


In [9]:
values = [40, 50, 60]

df['D'] = values
df

Unnamed: 0,A,C,B,D
0,1,7,4,40
1,2,8,5,50
2,3,8,6,60


**using `DateFrame.loc()`**

In [10]:
values = [100, 200, 300]

df.loc[:, 'E'] = values
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300


**Adding more than one columns**

In [11]:
df.columns.values

array(['A', 'C', 'B', 'D', 'E'], dtype=object)

In [12]:
new_data = {'F': [500, 600, 700], 'G': [800, 900, 1000]}

new_df = df.assign(**new_data)
new_df

Unnamed: 0,A,C,B,D,E,F,G
0,1,7,4,40,100,500,800
1,2,8,5,50,200,600,900
2,3,8,6,60,300,700,1000


## Adding rows

In [13]:
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300


**using `DataFrame._append()` function**

syntax:
```
DataFrame._append(other, ignore_index=False, verify_integrity=False, sort=False)
```

In [14]:
new_df = df._append({'A': 11, 'B': 22, 'C': 33, 'D': 44, 'E': 55}, ignore_index=True)
new_df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55


**using `DataFrame.loc`**

In [15]:
new_df.loc[len(new_df)] = [111, 222, 333, 444, 555]
new_df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55
4,111,222,333,444,555


**using `pandas.concat()`**

In [16]:
np.random.seed(0)
new_df2 = pd.DataFrame(data=np.random.randint(0, 100, (2, 5)), columns=['A', 'B', 'C', 'D', 'E'])

df = pd.concat([new_df, new_df2], ignore_index=True)
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55
4,111,222,333,444,555
5,44,64,47,67,67
6,9,21,83,36,87


## Delete rows/columns

**using `drop()` method**

Syntax:
```
DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors=’raise’)
```

In [17]:
df.set_index('A', inplace=True)
df

Unnamed: 0_level_0,C,B,D,E
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,7,4,40,100
2,8,5,50,200
3,8,6,60,300
11,33,22,44,55
111,222,333,444,555
44,64,47,67,67
9,21,83,36,87


In [18]:
new_df = df.drop([1, 3, 11], inplace=False)
new_df

Unnamed: 0_level_0,C,B,D,E
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,8,5,50,200
111,222,333,444,555
44,64,47,67,67
9,21,83,36,87


In [19]:
new_df.reset_index(inplace=True)
new_df.drop(['E', 'D'], axis=1, inplace=True)
new_df

Unnamed: 0,A,C,B
0,2,8,5
1,111,222,333
2,44,64,47
3,9,21,83
