# `.copy()` method

### Import pandas

In [1]:
import pandas as pd

### Read CSV and set rating index sorted by labels

In [2]:
cereal = pd.read_csv("cereal.csv", index_col="name")
cereal.sort_index(inplace=True)
cereal.head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


### Storing copies in memory and why

In [3]:
# create a new variable names: ratings (this is a slice from the DF)
ratings = cereal["rating"]
ratings.head()

name
100% Bran                    68.402973
100% Natural Bran            33.983679
All-Bran                     59.425505
All-Bran with Extra Fiber    93.704912
Almond Delight               34.384843
Name: rating, dtype: float64

In [4]:
# update the rating for All-Bran
ratings["All-Bran"] = 69.425505

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings["All-Bran"] = 69.425505


In [5]:
ratings["All-Bran"]

69.425505

In [6]:
# this will works
ratings.head()

name
100% Bran                    68.402973
100% Natural Bran            33.983679
All-Bran                     69.425505
All-Bran with Extra Fiber    93.704912
Almond Delight               34.384843
Name: rating, dtype: float64

In [13]:
# but also changes the cereal dataset, you might not expect this ⚠️
cereal.head(3)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505


### Solution? Create a copy!

In [14]:
# re-read the original dataset
cereal = pd.read_csv("cereal.csv", index_col="name")
cereal.sort_index(inplace=True)

In [15]:
# this way, we don't change the original dataset ✅
ratings = cereal["rating"].copy()
ratings.head(3)

name
100% Bran            68.402973
100% Natural Bran    33.983679
All-Bran             59.425505
Name: rating, dtype: float64

In [16]:
ratings["All-Bran"] = 69.425505

In [17]:
ratings["All-Bran"]

69.425505

In [18]:
cereal.head(3)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
