## Day ay 27: How do I avoid a SettingWithCopyWarning in pandas?
https://courses.dataschool.io/view/courses/pandas-in-30-days/2341096-course-videos/7587859-day-27-how-do-i-avoid-a-settingwithcopywarning-in-pandas

In [22]:
import pandas as pd

In [23]:
movies = pd.read_csv('./data/imdb_1000.csv')

In [24]:
movies.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."


In [25]:
# Look for missing values in content_rating column
movies.content_rating.isnull()

0      False
1      False
2      False
3      False
4      False
       ...  
974    False
975    False
976    False
977    False
978    False
Name: content_rating, Length: 979, dtype: bool

In [26]:
movies.loc[movies.content_rating.isnull(),:]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
187,8.2,Butch Cassidy and the Sundance Kid,,Biography,110,"[u'Paul Newman', u'Robert Redford', u'Katharin..."
649,7.7,Where Eagles Dare,,Action,158,"[u'Richard Burton', u'Clint Eastwood', u'Mary ..."
936,7.4,True Grit,,Adventure,128,"[u'John Wayne', u'Kim Darby', u'Glen Campbell']"


In [27]:
movies.content_rating.isnull().sum()

3

In [28]:
movies[movies.content_rating.isnull()]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
187,8.2,Butch Cassidy and the Sundance Kid,,Biography,110,"[u'Paul Newman', u'Robert Redford', u'Katharin..."
649,7.7,Where Eagles Dare,,Action,158,"[u'Richard Burton', u'Clint Eastwood', u'Mary ..."
936,7.4,True Grit,,Adventure,128,"[u'John Wayne', u'Kim Darby', u'Glen Campbell']"


In [29]:
movies.content_rating.value_counts()

content_rating
R            460
PG-13        189
PG           123
NOT RATED     65
APPROVED      47
UNRATED       38
G             32
PASSED         7
NC-17          7
X              4
GP             3
TV-MA          1
Name: count, dtype: int64

In [30]:
# NOT RATED should be represented as 'missing value' (NaN)
movies[movies.content_rating=='NOT RATED']


Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
5,8.9,12 Angry Men,NOT RATED,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",NOT RATED,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
41,8.5,Sunset Blvd.,NOT RATED,Drama,110,"[u'William Holden', u'Gloria Swanson', u'Erich..."
63,8.4,M,NOT RATED,Crime,99,"[u'Peter Lorre', u'Ellen Widmann', u'Inge Land..."
66,8.4,Munna Bhai M.B.B.S.,NOT RATED,Comedy,156,"[u'Sunil Dutt', u'Sanjay Dutt', u'Arshad Warsi']"
...,...,...,...,...,...,...
665,7.7,Lolita,NOT RATED,Drama,152,"[u'James Mason', u'Shelley Winters', u'Sue Lyon']"
673,7.7,Blow-Up,NOT RATED,Drama,111,"[u'David Hemmings', u'Vanessa Redgrave', u'Sar..."
763,7.6,Hunger,NOT RATED,Biography,96,"[u'Stuart Graham', u'Laine Megaw', u'Brian Mil..."
827,7.5,The Wind That Shakes the Barley,NOT RATED,Drama,127,"[u'Cillian Murphy', u'Padraic Delaney', u'Liam..."


In [31]:
import numpy as np
movies[movies.content_rating=='NOT RATED'].content_rating = np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  movies[movies.content_rating=='NOT RATED'].content_rating = np.nan


In [32]:
# It really did not modify the movies dataframe
movies.content_rating.isnull().sum()

3

In [33]:
movies[movies.content_rating.isnull()]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
187,8.2,Butch Cassidy and the Sundance Kid,,Biography,110,"[u'Paul Newman', u'Robert Redford', u'Katharin..."
649,7.7,Where Eagles Dare,,Action,158,"[u'Richard Burton', u'Clint Eastwood', u'Mary ..."
936,7.4,True Grit,,Adventure,128,"[u'John Wayne', u'Kim Darby', u'Glen Campbell']"


In [34]:
# Let us fix the warning and get the setting to NaN to work
movies.loc[movies.content_rating=='NOT RATED','content_rating'] = np.nan

In [36]:
# It worked!
movies.content_rating.isnull().sum()

68

In [37]:
movies[movies.content_rating.isnull()]

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
5,8.9,12 Angry Men,,Drama,96,"[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals..."
6,8.9,"The Good, the Bad and the Ugly",,Western,161,"[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ..."
41,8.5,Sunset Blvd.,,Drama,110,"[u'William Holden', u'Gloria Swanson', u'Erich..."
63,8.4,M,,Crime,99,"[u'Peter Lorre', u'Ellen Widmann', u'Inge Land..."
66,8.4,Munna Bhai M.B.B.S.,,Comedy,156,"[u'Sunil Dutt', u'Sanjay Dutt', u'Arshad Warsi']"
...,...,...,...,...,...,...
673,7.7,Blow-Up,,Drama,111,"[u'David Hemmings', u'Vanessa Redgrave', u'Sar..."
763,7.6,Hunger,,Biography,96,"[u'Stuart Graham', u'Laine Megaw', u'Brian Mil..."
827,7.5,The Wind That Shakes the Barley,,Drama,127,"[u'Cillian Murphy', u'Padraic Delaney', u'Liam..."
899,7.5,In the Loop,,Comedy,106,"[u'Tom Hollander', u'Peter Capaldi', u'James G..."


## Second example

In [38]:
top_movies = movies.loc[movies.star_rating>=9, :]

In [41]:
top_movies

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,142,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."


In [42]:
top_movies.shape

(4, 6)

In [45]:
# Let's change the duration of The Shawshank Redemption
# This used to generate a Warning, but it does not seem to do it anymore
# The Warning was related to this statement:
# top_movies = movies.loc[movies.star_rating>=9, :]
# Pandas was not sure whether it had created a view or a copy of movies
# In fact, it was a copy
top_movies.loc[0,'duration'] = 150

In [51]:

top_movies.loc[0,"duration"]

150

In [52]:
movies.loc[0,"duration"]

142

In [53]:
test_movies = movies

In [54]:
test_movies is movies

True

In [55]:
test_movies.loc[0,"duration"] = 250

In [56]:
test_movies

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,250,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
...,...,...,...,...,...,...
974,7.4,Tootsie,PG,Comedy,116,"[u'Dustin Hoffman', u'Jessica Lange', u'Teri G..."
975,7.4,Back to the Future Part III,PG,Adventure,118,"[u'Michael J. Fox', u'Christopher Lloyd', u'Ma..."
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."


In [57]:
movies

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.3,The Shawshank Redemption,R,Crime,250,"[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt..."
1,9.2,The Godfather,R,Crime,175,"[u'Marlon Brando', u'Al Pacino', u'James Caan']"
2,9.1,The Godfather: Part II,R,Crime,200,"[u'Al Pacino', u'Robert De Niro', u'Robert Duv..."
3,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
4,8.9,Pulp Fiction,R,Crime,154,"[u'John Travolta', u'Uma Thurman', u'Samuel L...."
...,...,...,...,...,...,...
974,7.4,Tootsie,PG,Comedy,116,"[u'Dustin Hoffman', u'Jessica Lange', u'Teri G..."
975,7.4,Back to the Future Part III,PG,Adventure,118,"[u'Michael J. Fox', u'Christopher Lloyd', u'Ma..."
976,7.4,Master and Commander: The Far Side of the World,PG-13,Action,138,"[u'Russell Crowe', u'Paul Bettany', u'Billy Bo..."
977,7.4,Poltergeist,PG,Horror,114,"[u'JoBeth Williams', u""Heather O'Rourke"", u'Cr..."
