## Data Manipulation in Pandas

## Importing modules

In [1]:
import pandas as pd
import numpy as np

## Adding new column

**`DataFrame.insert()`**

Syntax:
```
DataFrame.insert(loc, column, value, allow_duplicates=False)
```

In [2]:
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [3]:
df.insert(1, 'C', [7, 8, 8], True)
df

Unnamed: 0,A,C,B
0,1,7,4
1,2,8,5
2,3,8,6


**`DataFrame.assign`**
Creates a new dataframe with a new column added to the old dataframe.

In [4]:
new_df = df.assign(d=[10, 11, 12])
new_df

Unnamed: 0,A,C,B,d
0,1,7,4,10
1,2,8,5,11
2,3,8,6,12


In [5]:
new_df = df.assign(d = df['A'] + df['B'])
new_df

Unnamed: 0,A,C,B,d
0,1,7,4,5
1,2,8,5,7
2,3,8,6,9


**using a Dictionary**

In [6]:
new_df = df.set_index('A', inplace=False)
new_df

Unnamed: 0_level_0,C,B
A,Unnamed: 1_level_1,Unnamed: 2_level_1
1,7,4
2,8,5
3,8,6


In [7]:
col = {1: 10, 2: 20, 3: 30}

new_df['D'] = col
new_df

Unnamed: 0_level_0,C,B,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,7,4,10
2,8,5,20
3,8,6,30


**using a List**

In [8]:
df

Unnamed: 0,A,C,B
0,1,7,4
1,2,8,5
2,3,8,6


In [9]:
values = [40, 50, 60]

df['D'] = values
df

Unnamed: 0,A,C,B,D
0,1,7,4,40
1,2,8,5,50
2,3,8,6,60


**using `DateFrame.loc()`**

In [10]:
values = [100, 200, 300]

df.loc[:, 'E'] = values
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300


**Adding more than one columns**

In [11]:
df.columns.values

array(['A', 'C', 'B', 'D', 'E'], dtype=object)

In [12]:
new_data = {'F': [500, 600, 700], 'G': [800, 900, 1000]}

new_df = df.assign(**new_data)
new_df

Unnamed: 0,A,C,B,D,E,F,G
0,1,7,4,40,100,500,800
1,2,8,5,50,200,600,900
2,3,8,6,60,300,700,1000


## Adding rows

In [13]:
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300


**using `DataFrame._append()` function**

syntax:
```
DataFrame._append(other, ignore_index=False, verify_integrity=False, sort=False)
```

In [14]:
new_df = df._append({'A': 11, 'B': 22, 'C': 33, 'D': 44, 'E': 55}, ignore_index=True)
new_df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55


**using `DataFrame.loc`**

In [15]:
new_df.loc[len(new_df)] = [111, 222, 333, 444, 555]
new_df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55
4,111,222,333,444,555


**using `pandas.concat()`**

In [16]:
np.random.seed(0)
new_df2 = pd.DataFrame(data=np.random.randint(0, 100, (2, 5)), columns=['A', 'B', 'C', 'D', 'E'])

df = pd.concat([new_df, new_df2], ignore_index=True)
df

Unnamed: 0,A,C,B,D,E
0,1,7,4,40,100
1,2,8,5,50,200
2,3,8,6,60,300
3,11,33,22,44,55
4,111,222,333,444,555
5,44,64,47,67,67
6,9,21,83,36,87


## Deleting rows/columns

**using `drop()` method**

Syntax:
```
DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors=’raise’)
```

In [17]:
df.set_index('A', inplace=True)
df

Unnamed: 0_level_0,C,B,D,E
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,7,4,40,100
2,8,5,50,200
3,8,6,60,300
11,33,22,44,55
111,222,333,444,555
44,64,47,67,67
9,21,83,36,87


In [18]:
new_df = df.drop([1, 3, 11], inplace=False)
new_df

Unnamed: 0_level_0,C,B,D,E
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,8,5,50,200
111,222,333,444,555
44,64,47,67,67
9,21,83,36,87


In [19]:
new_df.reset_index(inplace=True)
new_df.drop(['E', 'D'], axis=1, inplace=True)
new_df

Unnamed: 0,A,C,B
0,2,8,5
1,111,222,333
2,44,64,47
3,9,21,83


## Truncate a DataFrame
syntax:
```
DataFrame.truncate(before=None, after=None, axis=None, copy=True)
```

In [20]:
dates = pd.date_range('2024-01-01', periods=10)
np.random.seed(0)
data = np.random.randint(0, 100, (10, 4))
columns = ['A', 'B', 'C', 'D']

df = pd.DataFrame(data, index=dates, columns=columns)
df

Unnamed: 0,A,B,C,D
2024-01-01,44,47,64,67
2024-01-02,67,9,83,21
2024-01-03,36,87,70,88
2024-01-04,88,12,58,65
2024-01-05,39,87,46,88
2024-01-06,81,37,25,77
2024-01-07,72,9,20,80
2024-01-08,69,79,47,64
2024-01-09,82,99,88,49
2024-01-10,29,19,19,14


**Truncate rows**

In [21]:
truncated_df = df.truncate(before='2024-01-03', after='2024-01-07', copy=True)
truncated_df

Unnamed: 0,A,B,C,D
2024-01-03,36,87,70,88
2024-01-04,88,12,58,65
2024-01-05,39,87,46,88
2024-01-06,81,37,25,77
2024-01-07,72,9,20,80


**Truncate columns**

In [22]:
truncated_columns_df = df.truncate(before='B', after='C', axis=1, copy=True)
truncated_columns_df

Unnamed: 0,B,C
2024-01-01,47,64
2024-01-02,9,83
2024-01-03,87,70
2024-01-04,12,58
2024-01-05,87,46
2024-01-06,37,25
2024-01-07,9,20
2024-01-08,79,47
2024-01-09,99,88
2024-01-10,19,19


**Truncate with numerical index**

In [23]:
np.random.seed(0)
data = np.random.randint(0, 100, (10, 4))
columns = ['A', 'B', 'C', 'D']

df = pd.DataFrame(data, columns=columns)
df

Unnamed: 0,A,B,C,D
0,44,47,64,67
1,67,9,83,21
2,36,87,70,88
3,88,12,58,65
4,39,87,46,88
5,81,37,25,77
6,72,9,20,80
7,69,79,47,64
8,82,99,88,49
9,29,19,19,14


In [24]:
truncated_df = df.truncate(before=3, after=7)
truncated_df

Unnamed: 0,A,B,C,D
3,88,12,58,65
4,39,87,46,88
5,81,37,25,77
6,72,9,20,80
7,69,79,47,64


## Truncate a Series

In [25]:
np.random.seed(0)
data = np.random.randint(0, 100, 10)

s = pd.Series(data, name='Random Values')
s

0    44
1    47
2    64
3    67
4    67
5     9
6    83
7    21
8    36
9    87
Name: Random Values, dtype: int64

In [26]:
truncated_values = s.truncate(before=3, after=7)
truncated_values.rename('Truncated Values', inplace=True)
truncated_values

3    67
4    67
5     9
6    83
7    21
Name: Truncated Values, dtype: int64

## Iteration

In [27]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [24, 27, 22, 32, 29],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Score': [85, 88, 90, 78, 92]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Score
0,Alice,24,New York,85
1,Bob,27,Los Angeles,88
2,Charlie,22,Chicago,90
3,David,32,Houston,78
4,Eva,29,Phoenix,92


### Iterating over rows

**using `iterrows()`**

In [28]:
for index, rows in df.iterrows():
    # print(f'Index: {index}')
    # print(f'Rows: {rows}')
    print(f'Index: {index}\n{rows}')

Index: 0
Name        Alice
Age            24
City     New York
Score          85
Name: 0, dtype: object
Index: 1
Name             Bob
Age               27
City     Los Angeles
Score             88
Name: 1, dtype: object
Index: 2
Name     Charlie
Age           22
City     Chicago
Score         90
Name: 2, dtype: object
Index: 3
Name       David
Age           32
City     Houston
Score         78
Name: 3, dtype: object
Index: 4
Name         Eva
Age           29
City     Phoenix
Score         92
Name: 4, dtype: object


**using `itertuples()`**

In [29]:
for row in df.itertuples():
    print(row)

Pandas(Index=0, Name='Alice', Age=24, City='New York', Score=85)
Pandas(Index=1, Name='Bob', Age=27, City='Los Angeles', Score=88)
Pandas(Index=2, Name='Charlie', Age=22, City='Chicago', Score=90)
Pandas(Index=3, Name='David', Age=32, City='Houston', Score=78)
Pandas(Index=4, Name='Eva', Age=29, City='Phoenix', Score=92)


### Iterating over Columns

In [30]:
columns = list(df)
print('columns:', columns)

for i in columns:
    print(df[i])

columns: ['Name', 'Age', 'City', 'Score']
0      Alice
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object
0    24
1    27
2    22
3    32
4    29
Name: Age, dtype: int64
0       New York
1    Los Angeles
2        Chicago
3        Houston
4        Phoenix
Name: City, dtype: object
0    85
1    88
2    90
3    78
4    92
Name: Score, dtype: int64


In [31]:
columns = list(df)

for i in columns:
    print(df[i][0])

Alice
24
New York
85
