In [1]:
import re
import math

## 1. SMS_dataset 불러오기

In [2]:
file_path = 'SMS_dataset.txt'
with open(file_path, 'r', encoding='utf-8') as temp:
    doc = temp.readlines()

## 2. 문장을 document로 취급하여 TF-IDF 수행 

In [4]:
# 문장부호 등 특수문자 제거하기 위한 함수 정의
def remove_special_char(text):
    text = text.strip()
    text = re.sub('[^ a-zA-Z0-9]', ' ', text)
    # 띄어쓰기가 중복된 경우 하나로 수정
    text = re.sub(' +', ' ', text)
    return text


# sentence를 인자로 받아서 띄어쓰기 단위로 word bag 생성하기 위한 함수 정의
def doc_to_word_bag(sentence):
    word_bag = []
    sentence = remove_special_char(sentence)
    for word in sentence.split(' '):
        # 문자가 포함되지 않은 경우는 wod_bag에 들어가지 않도록 함 
        if word != '':
            word_bag.append(word)
    return word_bag


# word bag을 인자로 받아서 단어의 빈도 수를 세는 함수
def count_term_freq(word_bag):
    term_freq = {}
    for word in word_bag:
        if word not in term_freq.keys():
            term_freq[word] = 1
        else:
            term_freq[word] += 1
    return term_freq

# 모든 문서들의 word_bag을 인자로 받아서 해당 문서 내 단어들의 df 구하기
def count_doc_freq(doc_index, word_bag_list):
    doc_freq = {}
    for word in word_bag_list[doc_index]:
        count = 0
        for i in range(len(word_bag_list)):
            if word in word_bag_list[i]:
                count += 1
                doc_freq[word] = count
    return doc_freq


# TF-IDF 계산을 통해 단어의 가중치 구하기 
def get_weights(term_freq, doc_freq, doc_num):
    N = doc_num
    term_weight = {}
    for word in term_freq.keys():
        term_weight[word] = term_freq[word] * math.log(N/doc_freq[word])
    # 가중치 내림차순 정렬
    term_weight = sorted(term_weight.items(), key=lambda x : x[1], reverse=True)
    return term_weight

In [5]:
word_bag_dict = {}
for i in range(len(doc)):
    word_bag_dict['word_bag'+str(i)] = doc_to_word_bag(doc[i])

In [6]:
word_bag_dict

{'word_bag0': ['Go',
  'until',
  'jurong',
  'point',
  'crazy',
  'Available',
  'only',
  'in',
  'bugis',
  'n',
  'great',
  'world',
  'la',
  'e',
  'buffet',
  'Cine',
  'there',
  'got',
  'amore',
  'wat'],
 'word_bag1': ['Ok', 'lar', 'Joking', 'wif', 'u', 'oni'],
 'word_bag2': ['Free',
  'entry',
  'in',
  '2',
  'a',
  'wkly',
  'comp',
  'to',
  'win',
  'FA',
  'Cup',
  'final',
  'tkts',
  '21st',
  'May',
  '2005',
  'Text',
  'FA',
  'to',
  '87121',
  'to',
  'receive',
  'entry',
  'question',
  'std',
  'txt',
  'rate',
  'T',
  'C',
  's',
  'apply',
  '08452810075over18',
  's'],
 'word_bag3': ['U',
  'dun',
  'say',
  'so',
  'early',
  'hor',
  'U',
  'c',
  'already',
  'then',
  'say'],
 'word_bag4': ['Nah',
  'I',
  'don',
  't',
  'think',
  'he',
  'goes',
  'to',
  'usf',
  'he',
  'lives',
  'around',
  'here',
  'though'],
 'word_bag5': ['FreeMsg',
  'Hey',
  'there',
  'darling',
  'it',
  's',
  'been',
  '3',
  'week',
  's',
  'now',
  'and',
  'no',

In [7]:
term_freq_dict = {}
for i in range(len(doc)):
    term_freq_dict['term_freq'+str(i)] = count_term_freq(word_bag_dict['word_bag'+str(i)])

In [8]:
term_freq_dict

{'term_freq0': {'Go': 1,
  'until': 1,
  'jurong': 1,
  'point': 1,
  'crazy': 1,
  'Available': 1,
  'only': 1,
  'in': 1,
  'bugis': 1,
  'n': 1,
  'great': 1,
  'world': 1,
  'la': 1,
  'e': 1,
  'buffet': 1,
  'Cine': 1,
  'there': 1,
  'got': 1,
  'amore': 1,
  'wat': 1},
 'term_freq1': {'Ok': 1, 'lar': 1, 'Joking': 1, 'wif': 1, 'u': 1, 'oni': 1},
 'term_freq2': {'Free': 1,
  'entry': 2,
  'in': 1,
  '2': 1,
  'a': 1,
  'wkly': 1,
  'comp': 1,
  'to': 3,
  'win': 1,
  'FA': 2,
  'Cup': 1,
  'final': 1,
  'tkts': 1,
  '21st': 1,
  'May': 1,
  '2005': 1,
  'Text': 1,
  '87121': 1,
  'receive': 1,
  'question': 1,
  'std': 1,
  'txt': 1,
  'rate': 1,
  'T': 1,
  'C': 1,
  's': 2,
  'apply': 1,
  '08452810075over18': 1},
 'term_freq3': {'U': 2,
  'dun': 1,
  'say': 2,
  'so': 1,
  'early': 1,
  'hor': 1,
  'c': 1,
  'already': 1,
  'then': 1},
 'term_freq4': {'Nah': 1,
  'I': 1,
  'don': 1,
  't': 1,
  'think': 1,
  'he': 2,
  'goes': 1,
  'to': 1,
  'usf': 1,
  'lives': 1,
  'around'

In [9]:
doc_freq_dict = {}

for i in range(len(doc)):
    doc_freq_dict['doc_freq'+str(i)] = count_doc_freq(i, [*word_bag_dict.values()])

In [10]:
doc_freq_dict

{'doc_freq0': {'Go': 14,
  'until': 26,
  'jurong': 1,
  'point': 12,
  'crazy': 11,
  'Available': 3,
  'only': 175,
  'in': 755,
  'bugis': 6,
  'n': 118,
  'great': 82,
  'world': 24,
  'la': 7,
  'e': 82,
  'buffet': 2,
  'Cine': 1,
  'there': 183,
  'got': 199,
  'amore': 1,
  'wat': 69},
 'doc_freq1': {'Ok': 150,
  'lar': 37,
  'Joking': 1,
  'wif': 25,
  'u': 578,
  'oni': 4},
 'doc_freq2': {'Free': 47,
  'entry': 20,
  'in': 755,
  '2': 421,
  'a': 1101,
  'wkly': 10,
  'comp': 12,
  'to': 1632,
  'win': 37,
  'FA': 2,
  'Cup': 3,
  'final': 13,
  'tkts': 4,
  '21st': 3,
  'May': 19,
  '2005': 3,
  'Text': 45,
  '87121': 4,
  'receive': 33,
  'question': 21,
  'std': 11,
  'txt': 83,
  'rate': 29,
  'T': 78,
  'C': 64,
  's': 469,
  'apply': 30,
  '08452810075over18': 2},
 'doc_freq3': {'U': 302,
  'dun': 44,
  'say': 80,
  'so': 312,
  'early': 30,
  'hor': 2,
  'c': 51,
  'already': 86,
  'then': 176},
 'doc_freq4': {'Nah': 10,
  'I': 1512,
  'don': 112,
  't': 314,
  'think'

In [11]:
term_weights_dict = {}
for i in range(len(doc)):
    term_weights_dict['term_weights'+str(i)] = get_weights([*term_freq_dict.values()][i], [*doc_freq_dict.values()][i], len(doc))

In [12]:
term_weights_dict

{'term_weights0': [('jurong', 8.625509334899697),
  ('Cine', 8.625509334899697),
  ('amore', 8.625509334899697),
  ('buffet', 7.932362154339751),
  ('Available', 7.5268970462315865),
  ('bugis', 6.833749865671641),
  ('la', 6.679599185844383),
  ('crazy', 6.227614062101326),
  ('point', 6.140602685111696),
  ('Go', 5.986452005284438),
  ('world', 5.44745550455175),
  ('until', 5.367412796878214),
  ('wat', 4.391402830302437),
  ('great', 4.218790087635443),
  ('e', 4.218790087635443),
  ('n', 3.8548247104340314),
  ('only', 3.460723360976182),
  ('there', 3.416023182058275),
  ('got', 3.332204510175204),
  ('in', 1.9987915856506717)],
 'term_weights1': [('Joking', 8.625509334899697),
  ('oni', 7.239214973779806),
  ('wif', 5.4066335100314955),
  ('lar', 5.014591422255472),
  ('Ok', 3.6148740408034405),
  ('u', 2.265935466227319)],
 'term_weights2': [('FA', 15.864724308679502),
  ('entry', 11.25955412269141),
  ('08452810075over18', 7.932362154339751),
  ('Cup', 7.5268970462315865),
  (

### 동일 단어에 대해 가중치 합하기

In [13]:
term_weights_sum = {}
for i in range(len([*term_weights_dict.values()])):
    for j in range(len([*term_weights_dict.values()][i])):
        word = [*term_weights_dict.values()][i][j][0]
        if word not in term_weights_sum:
            term_weights_sum[word] = [*term_weights_dict.values()][i][j][1]
        else:
            term_weights_sum[word] += [*term_weights_dict.values()][i][j][1]

# 가중치 내림차순 정렬
term_weights_sum = sorted(term_weights_sum.items(), key=lambda x : x[1], reverse=True)

In [14]:
term_weights_sum

[('you', 2661.3834705990585),
 ('to', 2637.631873058928),
 ('I', 2630.8150095491596),
 ('a', 2161.506419169996),
 ('the', 2110.0580195681005),
 ('i', 1982.1159141897006),
 ('u', 1839.939598576596),
 ('and', 1787.4538518546806),
 ('is', 1699.2880587384889),
 ('in', 1678.9849319465698),
 ('me', 1614.5424334084173),
 ('my', 1498.6267931611997),
 ('for', 1477.3804899066822),
 ('it', 1474.0992224774166),
 ('of', 1410.4483881946594),
 ('your', 1369.8616739741706),
 ('2', 1363.7587926427411),
 ('s', 1309.2255736538661),
 ('that', 1275.8416231047916),
 ('on', 1262.6268056381728),
 ('have', 1214.9664760779226),
 ('m', 1115.0113570450028),
 ('t', 1113.057027059689),
 ('U', 1107.7312806594323),
 ('are', 1088.0594232118685),
 ('call', 1087.561923282594),
 ('now', 1074.3824886935024),
 ('can', 1044.3253790723663),
 ('or', 1043.7134206142716),
 ('be', 1033.0189049546032),
 ('not', 1027.4186752601865),
 ('at', 1017.5392940228251),
 ('with', 1006.3880996424218),
 ('gt', 997.4297715802743),
 ('lt', 991

### 가중치 상위 200개 단어를 vocabulary로 선정

In [15]:
vocabulary = []
for _ in term_weights_sum[:200]:
    vocabulary.append(_[0])

In [16]:
vocabulary

['you',
 'to',
 'I',
 'a',
 'the',
 'i',
 'u',
 'and',
 'is',
 'in',
 'me',
 'my',
 'for',
 'it',
 'of',
 'your',
 '2',
 's',
 'that',
 'on',
 'have',
 'm',
 't',
 'U',
 'are',
 'call',
 'now',
 'can',
 'or',
 'be',
 'not',
 'at',
 'with',
 'gt',
 'lt',
 'will',
 'get',
 'so',
 '4',
 'ur',
 'but',
 'do',
 'You',
 'up',
 'we',
 'll',
 'out',
 'from',
 'know',
 'go',
 'when',
 'this',
 'if',
 'just',
 'like',
 'all',
 'come',
 'was',
 'got',
 'there',
 'day',
 'time',
 'am',
 'No',
 'he',
 'then',
 'only',
 '1',
 'want',
 'lor',
 'about',
 'by',
 'what',
 'no',
 'home',
 'How',
 'love',
 'Call',
 'one',
 'send',
 'good',
 'n',
 'going',
 'need',
 'how',
 'Ok',
 'her',
 'as',
 'da',
 'back',
 'We',
 'its',
 'still',
 'text',
 'If',
 'd',
 'don',
 'But',
 'ok',
 'our',
 'later',
 'week',
 'FREE',
 'see',
 'today',
 'been',
 'What',
 'So',
 'free',
 'Just',
 'It',
 'think',
 'here',
 'has',
 'r',
 'any',
 'some',
 'Do',
 'Sorry',
 're',
 'phone',
 'she',
 'take',
 'an',
 'mobile',
 'him',
 

## 3. 각 문장을 vocabulary와 비교하여 200차원 벡터로 표현

In [17]:
# 문장을 200차원 벡터로 표현하기 위한 함수
def sent_to_vec(word_bag, vocabulary):
    sent_vector = []
    for word in vocabulary:
        if word in word_bag:
            value = 1
        else:
            value = 0
        sent_vector.append(value)
    return sent_vector

In [18]:
vector_list = []
for i in range(len(doc)):
    vector_list.append(sent_to_vec([*word_bag_dict.values()][i], vocabulary))

In [19]:
vector_list

[[0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],

## 4. KMeans 클러스터링 수행

In [20]:
from sklearn.cluster import KMeans

In [21]:
km = KMeans(n_clusters=2, random_state=0)
y_km = km.fit_predict(vector_list)

## 5. 결과값 출력하기

In [23]:
for i in range(len(doc)):
    print('문장:', doc[i], 'Cluster:', y_km[i])

문장: Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...
 Cluster: 0
문장: Ok lar... Joking wif u oni...
 Cluster: 0
문장: Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's
 Cluster: 1
문장: U dun say so early hor... U c already then say...
 Cluster: 0
문장: Nah I don't think he goes to usf, he lives around here though
 Cluster: 1
문장: FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, 鶯1.50 to rcv
 Cluster: 1
문장: Even my brother is not like to speak with me. They treat me like aids patent.
 Cluster: 1
문장: As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune
 Cluster: 1
문장: WINNER!! As a valued network customer you have been selected to receive

 Cluster: 1
문장: Congrats! Nokia 3650 video camera phone is your Call 09066382422 Calls cost 150ppm Ave call 3mins vary from mobiles 16+ Close 300603 post BCM4284 Ldn WC1N3XX
 Cluster: 0
문장: Booked ticket for pongal?
 Cluster: 0
문장: You available now? I'm like right around hillsborough &amp;  &lt;#&gt; th
 Cluster: 0
문장: The message sent is askin for  &lt;#&gt; dollars. Shoul i pay  &lt;#&gt;  or  &lt;#&gt; ?
 Cluster: 0
문장: Ask g or iouri, I've told the story like ten times already
 Cluster: 0
문장: How long does applebees fucking take
 Cluster: 0
문장: Hi hope u get this txt~journey hasnt been gd,now about 50 mins late I think.
 Cluster: 0
문장: But i have to. I like to have love and arrange.
 Cluster: 1
문장: Yes..he is really great..bhaji told kallis best cricketer after sachin in world:).very tough to get out.
 Cluster: 1
문장: You were supposed to wake ME up &gt;:(
 Cluster: 1
문장: Oic... I saw him too but i tot he din c me... I found a group liao...
 Cluster: 0
문장: Sorry, I'll call later
 C

 Cluster: 0
문장: Hey what's up charles sorry about the late reply.
 Cluster: 0
문장: all the lastest from Stereophonics, Marley, Dizzee Racal, Libertines and The Strokes! Win Nookii games with Flirt!! Click TheMob WAP Bookmark or text WAP to 82468
 Cluster: 1
문장: I.ll give her once i have it. Plus she said grinule greet you whenever we speak
 Cluster: 0
문장: WHITE FUDGE OREOS ARE IN STORES
 Cluster: 0
문장: January Male Sale! Hot Gay chat now cheaper, call 08709222922. National rate from 1.5p/min cheap to 7.8p/min peak! To stop texts call 08712460324 (10p/min)
 Cluster: 1
문장: My love ! How come it took you so long to leave for Zaher's? I got your words on ym and was happy to see them but was sad you had left. I miss you
 Cluster: 1
문장: I am sorry it hurt you.
 Cluster: 0
문장: Can't. I feel nauseous. I'm so pissed. I didn't eat any sweets all week cause today I was planning to pig out. I was dieting all week. And now I'm not hungry :/
 Cluster: 1
문장: Ok lor but not too early. Me still having p

 Cluster: 0
문장: Oh ho. Is this the first time u use these type of words
 Cluster: 0
문장: Am I the only one who doesn't stalk profiles?
 Cluster: 0
문장: Ever green quote ever told by Jerry in cartoon \A Person Who Irritates u Always Is the one Who Loves u Vry Much But Fails to Express It...!..!! :-) :-) gud nyt"
 Cluster: 1
문장: Yes i thought so. Thanks.
 Cluster: 0
문장: But if she.s drinkin i'm ok.
 Cluster: 0
문장: Just wondering, the others just took off
 Cluster: 0
문장: Night has ended for another day, morning has come in a special way. May you smile like the sunny rays and leaves your worries at the blue blue bay. Gud mrng
 Cluster: 1
문장: What do you do, my dog ? Must I always wait till the end of your day to have word from you ? Did you run out of time on your cell already?
 Cluster: 1
문장: Happy new year to u too!
 Cluster: 1
문장: Hey...Great deal...Farm tour 9am to 5pm $95/pax, $50 deposit by 16 May
 Cluster: 1
문장: Eat jap done oso aft ur lect wat... 卿 got lect at 12 rite...
 Cluster: 0


문장: Ya! when are ? taking ure practical lessons? I start in june..
 Cluster: 0
문장: That's good, because I need drugs
 Cluster: 0
문장: Stupid.its not possible
 Cluster: 0
문장: Can ? all decide faster cos my sis going home liao..
 Cluster: 0
문장: Summers finally here! Fancy a chat or flirt with sexy singles in yr area? To get MATCHED up just reply SUMMER now. Free 2 Join. OptOut txt STOP Help08714742804
 Cluster: 0
문장: U sleeping now.. Or you going to take? Haha.. I got spys wat.. Me online checking n replying mails lor..
 Cluster: 1
문장: CLAIRE here am havin borin time & am now alone U wanna cum over 2nite? Chat now 09099725823 hope 2 C U Luv CLAIRE xx Calls鶯1/minmoremobsEMSPOBox45PO139WA
 Cluster: 0
문장: Fighting with the world is easy, u either win or lose bt fightng with some1 who is close to u is dificult if u lose - u lose if u win - u still lose.
 Cluster: 1
문장: Bought one ringtone and now getting texts costing 3 pound offering more tones etc
 Cluster: 0
문장: Yalru lyfu astne chikku.. B

 Cluster: 0
문장: No just send to you. Bec you in temple na.
 Cluster: 1
문장: You aren't coming home between class, right? I need to work out and shower!
 Cluster: 1
문장: Hi if ur lookin 4 saucy daytime fun wiv busty married woman Am free all next week Chat now 2 sort time 09099726429 JANINExx Calls鶯1/minMobsmoreLKPOBOX177HP51FL
 Cluster: 0
문장: S but mostly not like that.
 Cluster: 0
문장: 卿 v ma fan...
 Cluster: 0
문장: Dunno cos i was v late n when i reach they inside already... But we ate spageddies lor... It's e gals who r laughing at me lor...
 Cluster: 0
문장: Guess who spent all last night phasing in and out of the fourth dimension
 Cluster: 0
문장: So now my dad is gonna call after he gets out of work and ask all these crazy questions.
 Cluster: 0
문장: Yes..but they said its IT.,
 Cluster: 0
문장: Very hurting n meaningful lines ever: \I compromised everything for my love
 Cluster: 0
문장: Lmao!nice 1
 Cluster: 0
문장: Glad to see your reply.
 Cluster: 1
문장: URGENT! We are trying to contact U. To

문장: You call him now ok i said call him
 Cluster: 0
문장: Call to the number which is available in appointment. And ask to connect the call to waheed fathima.
 Cluster: 1
문장: Or ? go buy wif him then i meet ? later can?
 Cluster: 0
문장: Mmmm ... Fuck ... Not fair ! You know my weaknesses ! *grins* *pushes you to your knee's* *exposes my belly and pulls your head to it* Don't forget ... I know yours too *wicked smile*
 Cluster: 1
문장: Today my system sh get ready.all is well and i am also in the deep well
 Cluster: 0
문장: Mom wants to know where you at
 Cluster: 1
문장: Aight, I'll text you when I'm back
 Cluster: 0
문장: Dont know supports ass and srt i thnk. I think ps3 can play through usb too
 Cluster: 0
문장: Oh ok i didnt know what you meant. Yep i am baby jontin
 Cluster: 0
문장: You have WON a guaranteed 鶯1000 cash or a 鶯2000 prize.To claim yr prize call our customer service representative on
 Cluster: 0
문장: Would you like to see my XXX pics they are so hot they were nearly banned in the uk!

 Cluster: 0
문장: Kallis is ready for bat in 2nd innings
 Cluster: 0
문장: Thanx but my birthday is over already.
 Cluster: 0
문장: Ugh y can't u just apologize, admit u were wrong and ask me to take u back?
 Cluster: 1
문장: I noe la... U wana pei bf oso rite... K lor, other days den...
 Cluster: 0
문장: Yes, i'm small kid.. And boost is the secret of my energy..
 Cluster: 0
문장: IM GONNA MISS U SO MUCH
 Cluster: 0
문장: Is avatar supposed to have subtoitles
 Cluster: 1
문장: Simply sitting and watching match in office..
 Cluster: 0
문장: You can jot down things you want to remember later.
 Cluster: 1
문장: Oh sorry please its over
 Cluster: 0
문장: Hey are we going for the lo lesson or gym?
 Cluster: 0
문장: Dont pack what you can buy at any store.like cereals. If you must pack food, pack gari or something 9ja that you will miss.
 Cluster: 0
문장: You always make things bigger than they are
 Cluster: 0
문장: 卿 dun wan to watch infernal affair?
 Cluster: 1
문장: Me not waking up until 4 in the afternoon, sup
 Clu

 Cluster: 0
문장: (And my man carlos is definitely coming by mu tonight, no excuses)
 Cluster: 0
문장: soon you will have the real thing princess! Do i make you wet? :)
 Cluster: 0
문장: Raji..pls do me a favour. Pls convey my Birthday wishes to Nimya. Pls. Today is her birthday.
 Cluster: 1
문장: Haha, my legs and neck are killing me and my amigos are hoping to end the night with a burn, think I could swing by in like an hour?
 Cluster: 1
문장: URGENT! Your mobile No 07xxxxxxxxx won a 鶯2,000 bonus caller prize on 02/06/03! this is the 2nd attempt to reach YOU! call 09066362231 ASAP! BOX97N7QP, 150PPM
 Cluster: 1
문장: Usually the body takes care of it buy making sure it doesnt progress. Can we pls continue this talk on saturday.
 Cluster: 0
문장: URGENT!! Your 4* Costa Del Sol Holiday or 鶯5000 await collection. Call 09050090044 Now toClaim. SAE, TC s, POBox334, Stockport, SK38xh, Cost鶯1.50/pm, Max10mins
 Cluster: 0
문장: Hmm well, night night
 Cluster: 0
문장: Just wanted to say holy shit you guys were

 Cluster: 0
문장: Tell your friends what you plan to do on Valentines day @ &lt;URL&gt;
 Cluster: 1
문장: Alright, see you in a bit
 Cluster: 0
문장: Cheers for the message Zogtorius. I陽ve been staring at my phone for an age deciding whether to text or not.
 Cluster: 1
문장: I will take care of financial problem.i will help:)
 Cluster: 0
문장: Tell dear what happen to you. Why you talking to me like an alian
 Cluster: 1
문장: Double your mins & txts on Orange or 1/2 price linerental - Motorola and SonyEricsson with B/Tooth FREE-Nokia FREE Call MobileUpd8 on 08000839402 or2optout/HV9D
 Cluster: 0
문장: 1) Go to write msg 2) Put on Dictionary mode 3)Cover the screen with hand, 4)Press  &lt;#&gt; . 5)Gently remove Ur hand.. Its interesting..:)
 Cluster: 1
문장: Okie...
 Cluster: 0
문장: Hi this is yijue, can i meet u at 11 tmr?
 Cluster: 0
문장: Its posible dnt live in  &lt;#&gt; century cm frwd n thnk different
 Cluster: 0
문장: But i dint slept in afternoon.
 Cluster: 0
문장: That seems unnecessarily affection

 Cluster: 1
문장: You in your room? I need a few
 Cluster: 0
문장: I dont want to hear anything
 Cluster: 1
문장: Hey. For me there is no leave on friday. Wait i will ask my superior and tell you..
 Cluster: 0
문장: Ultimately tor motive tui achieve korli.
 Cluster: 0
문장: From 5 to 2 only my work timing.
 Cluster: 1
문장: 됛_ and don됛? worry we됛?l have finished by march 됛_ ish!
 Cluster: 0
문장: The house is on the water with a dock, a boat rolled up with a newscaster who dabbles in jazz flute behind the wheel
 Cluster: 0
문장: Congrats 2 mobile 3G Videophones R yours. call 09063458130 now! videochat wid ur mates, play java games, Dload polypH music, noline rentl. bx420. ip4. 5we. 150p
 Cluster: 0
문장: Your next amazing xxx PICSFREE1 video will be sent to you enjoy! If one vid is not enough for 2day text back the keyword PICSFREE1 to get the next video.
 Cluster: 1
문장: Now thats going to ruin your thesis!
 Cluster: 1
문장: In sch but neva mind u eat 1st lor..
 Cluster: 0
문장: Hey whats up? U sleeping all

 Cluster: 1
문장: Well I might not come then...
 Cluster: 0
문장: Long after I quit. I get on only like 5 minutes a day as it is.
 Cluster: 0
문장: Then its most likely called Mittelschmertz. Google it. If you dont have paracetamol dont worry it will go.
 Cluster: 0
문장: Well at this right I'm gonna have to get up and check today's steam sales/pee so text me when you want me to come get you
 Cluster: 1
문장: Just arrived, see you in a couple days &lt;3
 Cluster: 0
문장: K, wat s tht incident?
 Cluster: 0
문장: Yeah get the unlimited
 Cluster: 0
문장: cThen i thk shd b enuff.. Still got conclusion n contents pg n references.. I'll b doing da contents pg n cover pg..
 Cluster: 0
문장: Forgot it takes me 3 years to shower, sorry. Where you at/your phone dead yet?
 Cluster: 1
문장: 卿 got wat to buy tell us then ? no need to come in again.
 Cluster: 1
문장: When you are big..| God will bring success.
 Cluster: 0
문장: U됛챭e Bin Awarded 鶯50 to Play 4 Instant Cash. Call 08715203028 To Claim. EVERY 9th Player Wins Mi

 Cluster: 0
문장: I finished my lunch already. U wake up already?
 Cluster: 0
문장: You still at the game?
 Cluster: 0
문장: You have got tallent but you are wasting.
 Cluster: 0
문장: What is your record for one night? :)
 Cluster: 0
문장: Also sir, i sent you an email about how to log into the usc payment portal. I.ll send you another message that should explain how things are back home. Have a great weekend.
 Cluster: 1
문장: gonna let me know cos comes bak from holiday that day.  is coming. Don't4get2text me  number.
 Cluster: 0
문장: Jokin only lar... :-) depends on which phone my father can get lor...
 Cluster: 0
문장: Aight, lemme know what's up
 Cluster: 0
문장: Get ready for  &lt;#&gt;  inches of pleasure...
 Cluster: 0
문장: Raji..pls do me a favour. Pls convey my Birthday wishes to Nimya. Pls. Today is her birthday.
 Cluster: 1
문장: ;-) ok. I feel like john lennon.
 Cluster: 0
문장: Cos darren say ? considering mah so i ask ?...
 Cluster: 0
문장: You are not bothering me but you have to trust my ans

문장: Good morning pookie pie! Lol hope I didn't wake u up
 Cluster: 0
문장: MAYBE IF YOU WOKE UP BEFORE FUCKING 3 THIS WOULDN'T BE A PROBLEM.
 Cluster: 0
문장: Happy birthday to you....dear.with lots of love.rakhesh NRI
 Cluster: 1
문장: Howz that persons story
 Cluster: 0
문장: This is the 2nd time we have tried 2 contact u. U have won the 750 Pound prize. 2 claim is easy, call 08712101358 NOW! Only 10p per min. BT-national-rate
 Cluster: 0
문장: X2  &lt;#&gt; . Are you going to get that
 Cluster: 1
문장: Hi neva worry bout da truth coz the truth will lead me 2 ur heart. It陽s the least a unique person like u deserve. Sleep tight or morning
 Cluster: 0
문장: UR awarded a City Break and could WIN a 鶯200 Summer Shopping spree every WK. Txt STORE to 88039.SkilGme.TsCs087147403231Winawk!Age16+鶯1.50perWKsub
 Cluster: 1
문장: Is ur paper today in e morn or aft?
 Cluster: 0
문장: I will lick up every drop :) are you ready to use your mouth as well?
 Cluster: 1
문장: And you! Will expect you whenever you text! Hop

 Cluster: 0
문장: Bull. Your plan was to go floating off to IKEA with me without a care in the world. So i have to live with your mess another day.
 Cluster: 1
문장: Then i buy.
 Cluster: 0
문장: URGENT! Your Mobile number has been awarded with a 鶯2000 Bonus Caller Prize. Call 09058095201 from land line. Valid 12hrs only
 Cluster: 0
문장: Heehee that was so funny tho
 Cluster: 0
문장: It only does simple arithmetic not percentages.
 Cluster: 0
문장: Yeah we wouldn't leave for an hour at least, how's 4 sound?
 Cluster: 0
문장: As a valued customer, I am pleased to advise you that following recent review of your Mob No. you are awarded with a 鶯1500 Bonus Prize, call 09066364589
 Cluster: 1
문장: Thanks honey. Have a great day.
 Cluster: 0
문장: 'An Amazing Quote'' - \Sometimes in life its difficult to decide whats wrong!! a lie that brings a smile or the truth that brings a tear....\""
 Cluster: 1
문장: Good night my dear.. Sleepwell&amp;Take care
 Cluster: 0
문장: Then ? ask dad to pick ? up lar... 卿 wan 2 s

문장: Should i send you naughty pix? :)
 Cluster: 0
문장: You are a 鶯1000 winner or Guaranteed Caller Prize, this is our Final attempt to contact you! To Claim Call 09071517866 Now! 150ppmPOBox10183BhamB64XE
 Cluster: 1
문장: Xmas & New Years Eve tickets are now on sale from the club, during the day from 10am till 8pm, and on Thurs, Fri & Sat night this week. They're selling fast!
 Cluster: 0
문장: Tyler (getting an 8th) has to leave not long after 9, can you get here in like an hour?
 Cluster: 1
문장: Prepare to be pounded every night...
 Cluster: 1
문장: Actually, my mobile is full of msg. And i m doing a work online, where i need to send them  &lt;#&gt;  sent msg i wil explain u later.
 Cluster: 1
문장: Sorry, I'll call later
 Cluster: 0
문장: Good evening! How are you?
 Cluster: 0
문장: I'm at home. Please call
 Cluster: 0
문장: Oic cos me n my sis got no lunch today my dad went out... So dunno whether 2 eat in sch or wat...
 Cluster: 0
문장: Mmmmm ... It was sooooo good to wake to your words this morni

문장: Same, I'm at my great aunts anniversary party in tarpon springs
 Cluster: 0
문장: Cab is available.they pick up and drop at door steps.
 Cluster: 0
문장: ok....take care.umma to you too...
 Cluster: 1
문장: Unlimited texts. Limited minutes.
 Cluster: 0
문장: Double Mins & 1000 txts on Orange tariffs. Latest Motorola, SonyEricsson & Nokia with Bluetooth FREE! Call MobileUpd8 on 08000839402 or call2optout/HF8
 Cluster: 0
문장: No problem. We will be spending a lot of quality time together...
 Cluster: 0
문장: URGENT This is our 2nd attempt to contact U. Your 鶯900 prize from YESTERDAY is still awaiting collection. To claim CALL NOW 09061702893. ACL03530150PM
 Cluster: 1
문장: Have you heard from this week?
 Cluster: 0
문장: Dear Dave this is your final notice to collect your 4* Tenerife Holiday or #5000 CASH award! Call 09061743806 from landline. TCs SAE Box326 CW25WX 150ppm
 Cluster: 1
문장: Yes. Last  practice
 Cluster: 0
문장: tells u 2 call 09066358152 to claim 鶯5000 prize. U have 2 enter all ur mobi

 Cluster: 0
문장: Its ok, called mom instead have fun
 Cluster: 0
문장: Dear Voucher Holder, To claim this weeks offer, at your PC please go to http://www.wtlp.co.uk/text. Ts&Cs apply.
 Cluster: 1
문장: Well if I'm that desperate I'll just call armand again
 Cluster: 0
문장: Are you at work right now ?
 Cluster: 0
문장: Congrats! Nokia 3650 video camera phone is your Call 09066382422 Calls cost 150ppm Ave call 3mins vary from mobiles 16+ Close 300603 post BCM4284 Ldn WC1N3XX
 Cluster: 0
문장: Haven't heard anything and he's not answering my texts so I'm guessing he flaked. That said the jb is fantastic
 Cluster: 0
문장: Mmmmmm ... I love you,so much, Ahmad ... I can't wait for this year to begin as every second takes me closer to being at your side. Happy New Year, my love!!
 Cluster: 1
문장: Pls what's the full name of joke's school cos fees in university of florida seem to actually be  &lt;#&gt; k. Pls holla back
 Cluster: 1
문장: Sorry, I'll call later
 Cluster: 0
문장: Ok... But they said i've got wis

 Cluster: 0
문장: Call me when you get the chance plz &lt;3
 Cluster: 0
문장: The new deus ex game comin early next yr
 Cluster: 0
문장: My computer just fried the only essential part we don't keep spares of because my fucking idiot roommates looovvve leaving the thing running on full  &lt;#&gt; /7
 Cluster: 0
문장: My friend, she's studying at warwick, we've planned to go shopping and to concert tmw, but it may be canceled, havn't seen  for ages, yeah we should get together sometime!
 Cluster: 1
문장: Probably a couple hours tops
 Cluster: 0
문장: LOL .. *grins* .. I'm not babe, but thanks for thinking of me!
 Cluster: 0
문장: Man this bus is so so so slow. I think you're gonna get there before me
 Cluster: 0
문장: Hope this text meets you smiling. If not then let this text give you a reason to smile. Have a beautiful day.
 Cluster: 1
문장: In case you wake up wondering where I am, I forgot I have to take care of something for grandma today, should be done before the parade
 Cluster: 1
문장: Ok
 Cluster:

 Cluster: 0
문장: Sorry that was my uncle. I.ll keep in touch
 Cluster: 0
문장: Saw Guys and Dolls last night with Patrick Swayze it was great
 Cluster: 0
문장: URGENT This is our 2nd attempt to contact U. Your 鶯900 prize from YESTERDAY is still awaiting collection. To claim CALL NOW 09061702893
 Cluster: 1
문장: Santa calling! Would your little ones like a call from Santa Xmas Eve? Call 09077818151 to book you time. Calls1.50ppm last 3mins 30s T&C www.santacalling.com
 Cluster: 1
문장: Just come home. I don't want u to be miserable
 Cluster: 1
문장: I dont know why she.s not getting your messages
 Cluster: 0
문장: its cool but tyler had to take off so we're gonna buy for him and drop it off at his place later tonight. Our total order is a quarter, you got enough?
 Cluster: 1
문장: The guy at the car shop who was flirting with me got my phone number from the paperwork and called and texted me. I'm nervous because of course now he may have my address. Should i call his boss and tell him, knowing this m

 Cluster: 0
문장: Ur cash-balance is currently 500 pounds - to maximize ur cash-in now send GO to 86688 only 150p/meg. CC: 08718720201 HG/Suite342/2lands Row/W1j6HL
 Cluster: 1
문장: PRIVATE! Your 2003 Account Statement for shows 800 un-redeemed S.I.M. points. Call 08715203685 Identifier Code:4xx26 Expires 13/10/04
 Cluster: 0
문장: Go chase after her and run her over while she's crossing the street
 Cluster: 0
문장: I'd like to tell you my deepest darkest fantasies. Call me 09094646631 just 60p/min. To stop texts call 08712460324 (nat rate)
 Cluster: 1
문장: Is there coming friday is leave for pongal?do you get any news from your work place.
 Cluster: 0
문장: Hey... Very inconvenient for your sis a not huh?
 Cluster: 0
문장: Ok i vl..do u know i got adsense approved..
 Cluster: 0
문장: * Was really good to see you the other day dudette, been missing you!
 Cluster: 1
문장: I want to go to perumbavoor
 Cluster: 1
문장: How many times i told in the stage all use to laugh. You not listen aha.
 Cluster: 1
문장:

문장: Okie
 Cluster: 0
문장: ree entry in 2 a weekly comp for a chance to win an ipod. Txt POD to 80182 to get entry (std txt rate) T&C's apply 08452810073 for details 18+
 Cluster: 1
문장: Our records indicate u maybe entitled to 5000 pounds in compensation for the Accident you had. To claim 4 free reply with CLAIM to this msg. 2 stop txt STOP
 Cluster: 1
문장: Sorry, I'll call later
 Cluster: 0
문장: Oh oh... Den muz change plan liao... Go back have to yan jiu again...
 Cluster: 1
문장: It's wylie, you in tampa or sarasota?
 Cluster: 0
문장: Ok... Take ur time n enjoy ur dinner...
 Cluster: 0
문장: Darren was saying dat if u meeting da ge den we dun meet 4 dinner. Cos later u leave xy will feel awkward. Den u meet him 4 lunch lor.
 Cluster: 0
문장: Spook up your mob with a Halloween collection of a logo & pic message plus a free eerie tone, txt CARD SPOOK to 8007 zed 08701417012150p per logo/pic
 Cluster: 1
문장: I like cheap! But i됛? happy to splash out on the wine if it makes you feel better..
 Cluste

문장: Otherwise had part time job na-tuition..
 Cluster: 0
문장: Oh yeah! And my diet just flew out the window
 Cluster: 0
문장: Santa Calling! Would your little ones like a call from Santa Xmas eve? Call 09058094583 to book your time.
 Cluster: 1
문장: You didnt complete your gist oh.
 Cluster: 0
문장: Er yeah, i will b there at 15:26, sorry! Just tell me which pub/cafe to sit in and come wen u can
 Cluster: 1
문장: If you can make it any time tonight or whenever you can it's cool, just text me whenever you're around
 Cluster: 0
문장: If I was I wasn't paying attention
 Cluster: 0
문장: Thanx a lot 4 ur help!
 Cluster: 0
문장: You're gonna have to be way more specific than that
 Cluster: 1
문장: Jesus armand really is trying to tell everybody he can find
 Cluster: 1
문장: I'm wif him now buying tix lar...
 Cluster: 0
문장: Mode men or have you left.
 Cluster: 0
문장: Am slow in using biola's fne
 Cluster: 0
문장: \What are youdoing later? Sar xxx\""
 Cluster: 0
문장: Hey i've booked the 2 lessons on sun liao...
 C