# 09_pandas的SettingWithCopyWarning报警复现、原因、解决方案

## 0.读取数据

In [16]:
import pandas as pd

In [17]:
fpath = "./datas/beijing_tianqi/beijing_tianqi_2018.csv"
df = pd.read_csv(fpath)

In [18]:
df.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
0,2018-01-01,3℃,-6℃,晴~多云,东北风,1-2级,59,良,2
1,2018-01-02,2℃,-5℃,阴~多云,东北风,1-2级,49,优,1
2,2018-01-03,2℃,-5℃,多云,北风,1-2级,28,优,1
3,2018-01-04,0℃,-8℃,阴,东北风,1-2级,28,优,1
4,2018-01-05,3℃,-6℃,多云~晴,西北风,1-2级,50,优,1


In [19]:
# 替换掉温度的后缀℃
df.loc[:,"bWendu"] = df["bWendu"].str.replace("℃","").astype("int32")
df.loc[:,"yWendu"] = df["yWendu"].str.replace("℃","").astype("int32")

In [20]:
df.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
0,2018-01-01,3,-6,晴~多云,东北风,1-2级,59,良,2
1,2018-01-02,2,-5,阴~多云,东北风,1-2级,49,优,1
2,2018-01-03,2,-5,多云,北风,1-2级,28,优,1
3,2018-01-04,0,-8,阴,东北风,1-2级,28,优,1
4,2018-01-05,3,-6,多云~晴,西北风,1-2级,50,优,1


## 1. 复现

In [21]:
# 只选出3月份的数据用于分析
condition = df["ymd"].str.startswith("2018-03")

In [22]:
condition

0      False
1      False
2      False
3      False
4      False
       ...  
360    False
361    False
362    False
363    False
364    False
Name: ymd, Length: 365, dtype: bool

In [24]:
# 设置温差
df[condition]['wencha'] = df['bWendu'] - df['yWendu']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[condition]['wencha'] = df['bWendu'] - df['yWendu']


In [25]:
# 查看是否修改成功
df[condition].head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel
59,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1
60,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2
61,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5
62,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3
63,2018-03-05,8,-3,晴,南风,1-2级,94,良,2


## 2.原因
#### 省略.
#### 核心要诀:pandas的dataframe的修改写操作,只允许在源dataframe上进行,一步到位

## 3.解决方法1
#### 将get+set的两步操作,改成set的一步操作

In [26]:
df.loc[condition,"wen_cha"] = df["bWendu"] - df["yWendu"]

In [28]:
df[condition].head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wen_cha
59,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11.0
60,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10.0
61,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10.0
62,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9.0
63,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11.0


## 4.解决方法2
#### 如果需要预筛选数据做后续的处理分析,使用copy复制dataframe

In [29]:
df_month3 = df[condition].copy()

In [30]:
df_month3.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wen_cha
59,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11.0
60,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10.0
61,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10.0
62,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9.0
63,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11.0


In [31]:
df_month3["wen_cha"] = df["bWendu"]  - df["yWendu"]

In [32]:
df_month3.head()

Unnamed: 0,ymd,bWendu,yWendu,tianqi,fengxiang,fengli,aqi,aqiInfo,aqiLevel,wen_cha
59,2018-03-01,8,-3,多云,西南风,1-2级,46,优,1,11
60,2018-03-02,9,-1,晴~多云,北风,1-2级,95,良,2,10
61,2018-03-03,13,3,多云~阴,北风,1-2级,214,重度污染,5,10
62,2018-03-04,7,-2,阴~多云,东南风,1-2级,144,轻度污染,3,9
63,2018-03-05,8,-3,晴,南风,1-2级,94,良,2,11
