# **Manipulating Elements in a DataFrame**

## **Best practicies**

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv('titanic.csv')

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Change single value with _loc_**

In [4]:
titanic.loc[1, 'age'] = 40

In [5]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Change single value with _iloc_**

In [6]:
titanic.iloc[1, 3] = 38

In [7]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Change multiple values with _loc_**

In [8]:
import numpy as np
titanic.loc[1:3, 'age'] = np.NaN

In [9]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,,1,0,71.2833,C,C
2,1,3,female,,0,0,7.925,S,
3,1,1,female,,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Change multiple values with _iloc_**

In [10]:
titanic.iloc[1:4, 3] = 99

In [11]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,99.0,1,0,71.2833,C,C
2,1,3,female,99.0,0,0,7.925,S,
3,1,1,female,99.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [12]:
titanic.iloc[1:4, 3] = [38, 26, 35]

In [13]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Change multiple values with _boolean indexing_**

In [14]:
babies_idx = titanic.loc[titanic.age < 1].index

In [15]:
# titanic.loc[titanic.age < 1, 'age'] = 1
titanic.loc[babies_idx, 'age'] = 1

In [16]:
titanic.loc[babies_idx]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


### **Changing multiple values in a row**

In [17]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [18]:
titanic.loc[1, 'survived':'sex'] = [0, 0, 'male']

In [19]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,0,0,male,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Changing values in multiple rows and multiple columns**

In [20]:
titanic.replace(0, 'Zero')

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,Zero,3,male,22.0,1,Zero,7.25,S,
1,Zero,Zero,male,38.0,1,Zero,71.2833,C,C
2,1,3,female,26.0,Zero,Zero,7.925,S,
3,1,1,female,35.0,1,Zero,53.1,S,C
4,Zero,3,male,35.0,Zero,Zero,8.05,S,
...,...,...,...,...,...,...,...,...,...
886,Zero,2,male,27.0,Zero,Zero,13.0,S,
887,1,1,female,19.0,Zero,Zero,30.0,S,B
888,Zero,3,female,,1,2,23.45,S,
889,1,1,male,26.0,Zero,Zero,30.0,C,C


In [21]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,0,0,male,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## **Chained Indexing: How you should NOT do it (Part 1)**

### **Example 1**

In [22]:
import pandas as pd

In [23]:
titanic = pd.read_csv('titanic.csv')

In [24]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [25]:
age = titanic.age

In [26]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [27]:
age[1] = 40

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [28]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [29]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [30]:
# Chained indexing
titanic.age[1] = 42

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic.age[1] = 42


In [31]:
titanic.age[1]

42.0

In [32]:
# Non-chained indexing
titanic.loc[1, 'age'] = 38

In [33]:
titanic.age[1]

38.0

In [34]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Example 2**

In [35]:
slice1 = titanic[['sex', 'age']]

In [36]:
slice1.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,38.0
2,female,26.0
3,female,35.0
4,male,35.0


In [37]:
slice1.iloc[1, 1] = 43

In [38]:
slice1.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,43.0
2,female,26.0
3,female,35.0
4,male,35.0


In [39]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Example 3**

In [40]:
slice2 = titanic.loc[:, ['sex', 'age']]

In [41]:
slice2.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,38.0
2,female,26.0
3,female,35.0
4,male,35.0


In [42]:
slice2.iloc[1, 1] = 42

In [43]:
slice2.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,42.0
2,female,26.0
3,female,35.0
4,male,35.0


In [44]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## **Chained Indexing: How you should NOT do it (Part 2)**

In [45]:
import pandas as pd

In [46]:
titanic = pd.read_csv('titanic.csv')

In [47]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


### **Example 4**

In [48]:
idx_babies = titanic[titanic.age < 1].index

In [49]:
titanic[titanic.age < 1]['age'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic[titanic.age < 1]['age'] = 1


In [50]:
titanic.loc[idx_babies, :]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


### **Example 5**

In [51]:
titanic['age'][titanic.age < 1] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic['age'][titanic.age < 1] = 1


In [52]:
titanic.loc[idx_babies, :]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


### **Example 6**

In [53]:
titanic[['sex', 'age']][titanic.age == 1]['age'] = 0

In [54]:
titanic.loc[idx_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


## **View vs. Copy**

In [55]:
import pandas as pd

In [56]:
titanic = pd.read_csv('titanic.csv')

In [57]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [58]:
age = titanic.age

In [59]:
age._is_view

True

In [60]:
age._is_copy is None

True

In [61]:
age[1] = 42

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 42


In [62]:
age.head()

0    22.0
1    42.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [63]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [64]:
babies = titanic[titanic.age < 1]

In [65]:
babies

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


In [66]:
babies._is_view

False

In [67]:
babies._is_copy is None

False

In [68]:
babies._is_copy()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


In [69]:
babies.age = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  babies.age = 1


In [70]:
babies

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1,0,2,29.0,S,
305,1,1,male,1,1,2,151.55,S,C
469,1,3,female,1,2,1,19.2583,C,
644,1,3,female,1,2,1,19.2583,C,
755,1,2,male,1,1,1,14.5,S,
803,1,3,male,1,0,1,8.5167,C,
831,1,2,male,1,1,1,18.75,S,


In [71]:
titanic.loc[babies.index]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


## **Simple rules to...**

### **Manupulate the whole dataframe**

In [72]:
import pandas as pd

In [73]:
titanic = pd.read_csv('titanic.csv')

In [74]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [75]:
titanic.iloc[1, 3] = 42

In [76]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [77]:
idx_babies = titanic.loc[titanic.age < 1, 'age'].index

In [78]:
t = titanic[titanic.age < 1]

In [79]:
idx_babies

Index([78, 305, 469, 644, 755, 803, 831], dtype='int64')

In [80]:
titanic.loc[titanic.age < 1, 'age'] = 1

In [81]:
titanic.loc[idx_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


In [82]:
titanic.loc[idx_babies, 'age'] = 0.9999

In [83]:
titanic.loc[idx_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.9999,0,2,29.0,S,
305,1,1,male,0.9999,1,2,151.55,S,C
469,1,3,female,0.9999,2,1,19.2583,C,
644,1,3,female,0.9999,2,1,19.2583,C,
755,1,2,male,0.9999,1,1,14.5,S,
803,1,3,male,0.9999,0,1,8.5167,C,
831,1,2,male,0.9999,1,1,18.75,S,


### **Manipulating a Slice**

In [84]:
import pandas as pd

In [85]:
titanic = pd.read_csv('titanic.csv')

In [86]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [87]:
age = titanic.age.copy()

In [88]:
age._is_view

False

In [89]:
age._is_copy is None

True

In [90]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [91]:
age[1] = 42

In [92]:
age.head()

0    22.0
1    42.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [93]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [94]:
babies = titanic.loc[titanic.age < 1, ['age', 'sex']].copy()

In [95]:
babies['age'] = 1

In [96]:
babies

Unnamed: 0,age,sex
78,1,male
305,1,male
469,1,female
644,1,female
755,1,male
803,1,male
831,1,male


In [97]:
titanic.loc[babies.index]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,
