# 簡訊內容過濾分析

In [19]:
import pandas as pd

data = pd.read_csv('https://raw.githubusercontent.com/Code-Gym/python-dataset/master/msg.txt', 
                   sep='\t', 
                   names=['status', 'msg'])
data

Unnamed: 0,status,msg
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


## startswith, endswith

In [20]:
#回傳一個布林值序列的Serirs物件
data['msg'].str.startswith('a')

0       False
1       False
2       False
3       False
4       False
        ...  
5567    False
5568    False
5569    False
5570    False
5571    False
Name: msg, Length: 5572, dtype: bool

In [21]:
#將過濾條件放在loc後的中括弧內，過濾條件單獨執行為布林值的Series
data.loc[data['msg'].str.startswith('a')]

Unnamed: 0,status,msg
427,ham,aathi..where are you dear..
907,spam,"all the lastest from Stereophonics, Marley, Di..."
1071,ham,"alright, I'll make sure the car is back tonight"
2088,ham,"alright babe, justthought id sayhey! how u do..."
2843,ham,aathi..where are you dear..
2950,ham,at bruce b downs &amp; fletcher now
3440,ham,"awesome, how do I deal with the gate? Charles ..."
3471,ham,aathi..where are you dear..
3555,ham,am up to my eyes in philosophy
4249,spam,"accordingly. I repeat, just text the word ok o..."


In [22]:
#過濾條件也可以直接放在DataFrame後面的中括弧內
data[data['msg'].str.startswith('a')]

Unnamed: 0,status,msg
427,ham,aathi..where are you dear..
907,spam,"all the lastest from Stereophonics, Marley, Di..."
1071,ham,"alright, I'll make sure the car is back tonight"
2088,ham,"alright babe, justthought id sayhey! how u do..."
2843,ham,aathi..where are you dear..
2950,ham,at bruce b downs &amp; fletcher now
3440,ham,"awesome, how do I deal with the gate? Charles ..."
3471,ham,aathi..where are you dear..
3555,ham,am up to my eyes in philosophy
4249,spam,"accordingly. I repeat, just text the word ok o..."


## 過濾不是垃圾信的訊息

In [23]:
ham = data[data['status'] == 'ham']
ham

Unnamed: 0,status,msg
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
6,ham,Even my brother is not like to speak with me. ...
...,...,...
5565,ham,Huh y lei...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


## 過濾不是垃圾信的訊息中，包含英文單字「OK」的訊息
contains( )

In [24]:
#若有多個條件，可以把條件分開寫，宣告mask1, mask2做區分，需要時再填入DataFrame過濾條件中
#如此條件式相依性不會太高，方便單個判斷修改而不影響其他條件

mask = ham['msg'].str.contains('OK')
mask

0       False
1       False
3       False
4       False
6       False
        ...  
5565    False
5568    False
5569    False
5570    False
5571    False
Name: msg, Length: 4825, dtype: bool

In [25]:
ham[mask]

Unnamed: 0,status,msg
457,ham,"LOOK AT AMY URE A BEAUTIFUL, INTELLIGENT WOMAN..."
820,ham,BOO BABE! U ENJOYIN YOURJOB? U SEEMED 2 B GETT...
984,ham,LOOK AT THE FUCKIN TIME. WHAT THE FUCK YOU THI...
1080,ham,Me too watching surya movie only. . .after 6 p...
1303,ham,FRAN I DECIDED 2 GO N E WAY IM COMPLETELY BROK...
1908,ham,ELLO BABE U OK?
2158,ham,Sad story of a Man - Last week was my b'day. M...
2217,ham,OK i'm waliking ard now... Do u wan me 2 buy a...
2606,ham,HELLO U.CALL WEN U FINISH WRK.I FANCY MEETIN U...
2849,ham,Sad story of a Man - Last week was my b'day. M...


## 處理欄位名稱
將欄位名稱改為大寫

In [26]:
#DataFrame中有一個屬性columns
data.columns.str.upper()

Index(['STATUS', 'MSG'], dtype='object')

In [27]:
#將更改後的資料傳給DataFrame中的屬性columns
data.columns = data.columns.str.upper()
data

Unnamed: 0,STATUS,MSG
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


## 課後練習解答

In [28]:
#過濾不是垃圾訊息，英文單字包含OK，及開頭為L
mask1 = ham['msg'].str.startswith('L')
ham[mask & mask1]

Unnamed: 0,status,msg
457,ham,"LOOK AT AMY URE A BEAUTIFUL, INTELLIGENT WOMAN..."
984,ham,LOOK AT THE FUCKIN TIME. WHAT THE FUCK YOU THI...
