# Part 1: Pandas - from Zero to Hero

## Manipulating Values in a DataFrame
A **DataFrame** is like a table in Excel, where data is organized in rows and columns. Sometimes, you might want to update or modify the values in this table. Here's how you can do it using Python with **pandas**, a popular library for data analysis.


### Best Practise (how you should do it)

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing a single Value (Option 1 with loc)

In [4]:
titanic.loc[1, "age"] = 40

In [5]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing a single Value (Option 2 with iloc) 

In [6]:
titanic.iloc[1, 3] = 41

In [7]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,41.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple values in a column (Option 1 with loc)

In [8]:
titanic.loc[1:3, "age"] = 42

In [9]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,42.0,0,0,7.925,S,
3,1,1,female,42.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple values in a column (Option 2 with iloc)

In [10]:
titanic.iloc[1:4, 3] = [43, 44, 45]

In [11]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple values in a column (Option 3 with boolean indexing)

In [12]:
index_babies = titanic.loc[titanic.age < 1, "age"].index

In [13]:
titanic.loc[titanic.age < 1, "age"] = 1

In [14]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


#### Changing multiple values in a row 

In [15]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [16]:
titanic.loc[0,"survived":"sex"] =[1, 1, "female"]

In [17]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,0,7.25,S,
1,1,1,female,43.0,1,0,71.2833,C,C
2,1,3,female,44.0,0,0,7.925,S,
3,1,1,female,45.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Changing multiple values in multiple rows/columns

In [18]:
titanic.replace(0, "Zero")

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,1,1,female,22.0,1,Zero,7.25,S,
1,1,1,female,43.0,1,Zero,71.2833,C,C
2,1,3,female,44.0,Zero,Zero,7.925,S,
3,1,1,female,45.0,1,Zero,53.1,S,C
4,Zero,3,male,35.0,Zero,Zero,8.05,S,
...,...,...,...,...,...,...,...,...,...
886,Zero,2,male,27.0,Zero,Zero,13.0,S,
887,1,1,female,19.0,Zero,Zero,30.0,S,B
888,Zero,3,female,,1,2,23.45,S,
889,1,1,male,26.0,Zero,Zero,30.0,C,C


### How you should NOT do it (Part 1)

In [85]:
import pandas as pd

In [86]:
titanic = pd.read_csv("titanic.csv")

In [87]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [88]:
age = titanic.age

In [89]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [90]:
age[1] = 40

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [91]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [92]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [93]:
titanic.age[1] = 41 #This is Chained Indexing!!!

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic.age[1] = 41 #This is Chained Indexing!!!


In [94]:
titanic.loc[1, "age"] = 42 #This is NOT Chained Indexing and the idiomatic/best way to do it!!!

![image.png](attachment:image.png)

In [95]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [29]:
slice1 = titanic[["sex", "age"]]
slice1.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,42.0
2,female,26.0
3,female,35.0
4,male,35.0


In [30]:
slice1.iloc[1,1] = 43

In [31]:
slice1

Unnamed: 0,sex,age
0,male,22.0
1,female,43.0
2,female,26.0
3,female,35.0
4,male,35.0
...,...,...
886,male,27.0
887,female,19.0
888,female,
889,male,26.0


In [32]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


![image.png](attachment:image.png)

In [33]:
slice2 = titanic.loc[:, ["sex","age"]]
slice2

Unnamed: 0,sex,age
0,male,22.0
1,female,42.0
2,female,26.0
3,female,35.0
4,male,35.0
...,...,...
886,male,27.0
887,female,19.0
888,female,
889,male,26.0


In [34]:
slice2.iloc[1,1] = 44

In [35]:
slice2.head()

Unnamed: 0,sex,age
0,male,22.0
1,female,44.0
2,female,26.0
3,female,35.0
4,male,35.0


In [36]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,42.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


![image.png](attachment:image.png)

### How you should NOT do it (Part 2)

In [37]:
import pandas as pd

In [38]:
titanic = pd.read_csv("titanic.csv")

In [39]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [40]:
index_babies = titanic[titanic.age < 1].index

In [41]:
titanic[titanic.age < 1]["age"] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic[titanic.age < 1]["age"] = 1


In [42]:
titanic.loc[index_babies,:]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


![image.png](attachment:image.png)

In [43]:
titanic["age"][titanic.age < 1] = 1

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  titanic["age"][titanic.age < 1] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  titanic["age"][titanic.age 

In [44]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


![image.png](attachment:image.png)

In [45]:
titanic[["sex", "age"]][titanic.age == 1]["age"] = 0

In [46]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


![image.png](attachment:image.png)

#### SettingWithCopyWarning: You assigned new values with Chained Indexing. It is not clear, whether you changed the original DataFrame and whether this was your intention at all. So, please check!!!

![image.png](attachment:image.png)

### View vs. Copy

#### Slicing a DataFrame / creating a view on the original DataFrame

In [47]:
import pandas as pd

In [48]:
titanic = pd.read_csv("titanic.csv")

In [49]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [50]:
age = titanic.age

In [51]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [52]:
age._is_view

True

In [53]:
age._is_copy is None

True

In [54]:
age[1] = 40

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age[1] = 40


In [55]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [56]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


#### Slicing a DataFrame / creating a copy of the original DataFrame

In [57]:
df_baby = titanic[titanic.age < 1]

In [58]:
df_baby._is_view

False

In [59]:
df_baby._is_copy is None

False

In [60]:
df_baby._is_copy()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.2500,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.9250,S,
3,1,1,female,35.0,1,0,53.1000,S,C
4,0,3,male,35.0,0,0,8.0500,S,
...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,
887,1,1,female,19.0,0,0,30.0000,S,B
888,0,3,female,,1,2,23.4500,S,
889,1,1,male,26.0,0,0,30.0000,C,C


In [61]:
df_baby.age = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_baby.age = 1


In [62]:
df_baby

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1,0,2,29.0,S,
305,1,1,male,1,1,2,151.55,S,C
469,1,3,female,1,2,1,19.2583,C,
644,1,3,female,1,2,1,19.2583,C,
755,1,2,male,1,1,1,14.5,S,
803,1,3,male,1,0,1,8.5167,C,
831,1,2,male,1,1,1,18.75,S,


In [63]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


## If you want to work with and manipulate the whole DataFrame...

## ... avoid chained Indexing!!! 

In [64]:
import pandas as pd

In [65]:
titanic = pd.read_csv("titanic.csv")

In [66]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [67]:
titanic.iloc[1, 3] = 40

In [68]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,40.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [69]:
index_babies = titanic.loc[titanic.age < 1, "age"].index

In [70]:
titanic.loc[titanic.age < 1, "age"] = 1

In [71]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,1.0,0,2,29.0,S,
305,1,1,male,1.0,1,2,151.55,S,C
469,1,3,female,1.0,2,1,19.2583,C,
644,1,3,female,1.0,2,1,19.2583,C,
755,1,2,male,1.0,1,1,14.5,S,
803,1,3,male,1.0,0,1,8.5167,C,
831,1,2,male,1.0,1,1,18.75,S,


## If you want to work with and manipulate a Slice of a DataFrame...

## ...avoid chained Indexing ...and make a copy with .copy()

In [72]:
import pandas as pd

In [73]:
titanic = pd.read_csv("titanic.csv")

In [74]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [75]:
age = titanic.age.copy()

In [76]:
age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [77]:
age[1] = 40

In [78]:
age.head()

0    22.0
1    40.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [79]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [80]:
baby_ages = titanic.loc[titanic.age < 1, ["age", "sex"]].copy()

In [81]:
baby_ages

Unnamed: 0,age,sex
78,0.83,male
305,0.92,male
469,0.75,female
644,0.75,female
755,0.67,male
803,0.42,male
831,0.83,male


In [82]:
baby_ages["age"] = 1

In [83]:
baby_ages

Unnamed: 0,age,sex
78,1,male
305,1,male
469,1,female
644,1,female
755,1,male
803,1,male
831,1,male


In [84]:
titanic.loc[index_babies]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
78,1,2,male,0.83,0,2,29.0,S,
305,1,1,male,0.92,1,2,151.55,S,C
469,1,3,female,0.75,2,1,19.2583,C,
644,1,3,female,0.75,2,1,19.2583,C,
755,1,2,male,0.67,1,1,14.5,S,
803,1,3,male,0.42,0,1,8.5167,C,
831,1,2,male,0.83,1,1,18.75,S,


![image.png](attachment:image.png)