In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


# Scraping BTS Meal's Tweets 


In [None]:
!pip install snscrape

Collecting snscrape
  Downloading https://files.pythonhosted.org/packages/81/dd/4a4ec9eedd8cc85ced7c5a6a23853965195203aec825ef3f7778a0c3b69e/snscrape-0.3.4-py3-none-any.whl
Installing collected packages: snscrape
Successfully installed snscrape-0.3.4


In [None]:
import pandas as pd
import numpy as np
import csv

import snscrape.modules.twitter as sntwitter
import itertools


In [None]:
#scraping data using snscrape library
df = pd.DataFrame(itertools.islice(sntwitter.TwitterSearchScraper(
   '#BTSMeal near:"Jakarta" within:200km').get_items(), 10000))[['username', 'date', 'content']]

In [None]:
df.head()

Unnamed: 0,username,date,content
0,VRadioFM,2021-06-28 04:30:45+00:00,apakah karyanya tersebut akan dijual. Namun ba...
1,CygnusMusicx,2021-06-27 17:47:40+00:00,Good Vibes Music Night 2021 ❤️ Best Love Songs...
2,newsmerahputih,2021-06-26 16:59:00+00:00,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...
3,cmyth9,2021-06-26 12:21:46+00:00,"Seadanya, sebisanya 💜 ini ending dari nyuci tu..."
4,bisot,2021-06-26 09:54:53+00:00,Temen gue kreatifnya gada obat dia bikin sendi...


In [None]:
df.info

<bound method DataFrame.info of            username  ...                                            content
0          VRadioFM  ...  apakah karyanya tersebut akan dijual. Namun ba...
1      CygnusMusicx  ...  Good Vibes Music Night 2021 ❤️ Best Love Songs...
2    newsmerahputih  ...  Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...
3            cmyth9  ...  Seadanya, sebisanya 💜 ini ending dari nyuci tu...
4             bisot  ...  Temen gue kreatifnya gada obat dia bikin sendi...
..              ...  ...                                                ...
370   Septianaekaa1  ...  gimana ga gemesh cobaa, orng modelannya begini...
371   AngelliaBunga  ...  @BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...
372    Citralifanny  ...  Suka kepikiran udh semangat" menanti #BTSmeal ...
373     kookieV1995  ...  @BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...
374       Chabiee26  ...  Belajar semangat menjadi  #BestFanArmy #BTSmea...

[375 rows x 3 columns]>

In [None]:
df.describe()

  """Entry point for launching an IPython kernel.


Unnamed: 0,username,date,content
count,375,375,375
unique,270,375,375
top,Citralifanny,2021-06-09 07:03:30+00:00,#BTSMeal #BTSxMcD \n\nIngat dunia ini sementar...
freq,7,1,1
first,,2021-04-19 23:59:21+00:00,
last,,2021-06-28 04:30:45+00:00,


In [None]:
df.isnull()

Unnamed: 0,username,date,content
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
...,...,...,...
370,False,False,False
371,False,False,False
372,False,False,False
373,False,False,False


In [None]:
df.isna().sum()

username    0
date        0
content     0
dtype: int64

In [None]:
df.to_csv('/content/drive/My Drive/Data Science/BTSMeal_Jakarta.csv')

From the scrapped tweets data, we get 375 tweets that correspond to the #BTSMeal hashtag around Jakarta which 270 of the tweets are unique. The uniqueness of a tweet means the tweet is not a retweet from another tweet. There is no null data from all of the features. 

#Cleaning Data


In [44]:
#  Cleaning text
import re
import string

def clean_text(tweet):
    tweet = tweet.lower() # text lowercase
    tweet = re.sub('@[^\s]+', '', tweet) # remove usernames
    tweet = re.sub('\[.*?\]', '', tweet) # remove square brackets
    tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', '', tweet) # remove URLs
    tweet = re.sub('[%s]' % re.escape(string.punctuation), '', tweet) # remove punctuation
    tweet = re.sub('\w*\d\w*', '', tweet) 
    tweet = re.sub('[‘’“”…]', '', tweet)
    tweet = re.sub('\n', '', tweet)
    return tweet

    
tweet = lambda x: clean_text(x)

In [45]:
df['clean1'] = pd.DataFrame(df.content.apply(tweet))
df

Unnamed: 0,content,clean1,clean2,clean3,tokens
0,apakah karyanya tersebut akan dijual. Namun ba...,apakah karyanya tersebut akan dijual namun bag...,karyanya dijual sepatu bts meal karyanya insta...,karya jual sepatu bts meal karya instagram pri...,"[karya, jual, sepatu, bts, meal, karya, instag..."
1,Good Vibes Music Night 2021 ❤️ Best Love Songs...,good vibes music night ❤️ best love songs pla...,good vibes music night ❤️ best love songs play...,good vibes music night best love songs playlis...,"[good, vibes, music, night, best, love, songs,..."
2,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...,kemasan bts meal disulap jadi sepatu nyentrik ...,kemasan bts meal disulap sepatu nyentrik merah...,kemas bts meal sulap sepatu nyentrik merahputi...,"[kemas, bts, meal, sulap, sepatu, nyentrik, me..."
3,"Seadanya, sebisanya 💜 ini ending dari nyuci tu...",seadanya sebisanya 💜 ini ending dari nyuci tut...,seadanya 💜 ending nyuci tutup saos 😂 btsmealin...,ada ending nyuci tutup saos btsmealindonesia b...,"[ada, ending, nyuci, tutup, saos, btsmealindon..."
4,Temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat bikin bts meal ...,temen gue kreatif gada obat bikin bts meal kai...,"[temen, gue, kreatif, gada, obat, bikin, bts, ..."
...,...,...,...,...,...
370,"gimana ga gemesh cobaa, orng modelannya begini...",gimana ga gemesh cobaa orng modelannya beginia...,gimana ga gemesh cobaa orng modelannya 😫😭 mele...,gimana ga gemesh cobaa orng model leleh udaahh...,"[gimana, ga, gemesh, cobaa, orng, model, leleh..."
371,@BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...,btsmeallineギフトはこちら,btsmeallineギフトはこちら,btsmealline,[btsmealline]
372,"Suka kepikiran udh semangat"" menanti #BTSmeal ...",suka kepikiran udh semangat menanti btsmeal di...,suka kepikiran udh semangat btsmeal indo tau n...,suka pikir udh semangat btsmeal indo tau nya m...,"[suka, pikir, udh, semangat, btsmeal, indo, ta..."
373,@BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...,🤗🤗🤗btsmeal thebtsmeal,🤗🤗🤗btsmeal thebtsmeal,btsmeal thebtsmeal,"[btsmeal, thebtsmeal]"


In [46]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
additional  = ['rt','rts','retweet']
swords = set().union(stopwords.words('indonesian'), additional)

df['clean2'] = (df['clean1'].apply(lambda x: ' '.join([word for word in x.split() if word not in (swords)])))
df

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,content,clean1,clean2,clean3,tokens
0,apakah karyanya tersebut akan dijual. Namun ba...,apakah karyanya tersebut akan dijual namun bag...,karyanya dijual sepatu bts meal karyanya insta...,karya jual sepatu bts meal karya instagram pri...,"[karya, jual, sepatu, bts, meal, karya, instag..."
1,Good Vibes Music Night 2021 ❤️ Best Love Songs...,good vibes music night ❤️ best love songs pla...,good vibes music night ❤️ best love songs play...,good vibes music night best love songs playlis...,"[good, vibes, music, night, best, love, songs,..."
2,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...,kemasan bts meal disulap jadi sepatu nyentrik ...,kemasan bts meal disulap sepatu nyentrik merah...,kemas bts meal sulap sepatu nyentrik merahputi...,"[kemas, bts, meal, sulap, sepatu, nyentrik, me..."
3,"Seadanya, sebisanya 💜 ini ending dari nyuci tu...",seadanya sebisanya 💜 ini ending dari nyuci tut...,seadanya 💜 ending nyuci tutup saos 😂 btsmealin...,ada ending nyuci tutup saos btsmealindonesia b...,"[ada, ending, nyuci, tutup, saos, btsmealindon..."
4,Temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat bikin bts meal ...,temen gue kreatif gada obat bikin bts meal kai...,"[temen, gue, kreatif, gada, obat, bikin, bts, ..."
...,...,...,...,...,...
370,"gimana ga gemesh cobaa, orng modelannya begini...",gimana ga gemesh cobaa orng modelannya beginia...,gimana ga gemesh cobaa orng modelannya 😫😭 mele...,gimana ga gemesh cobaa orng model leleh udaahh...,"[gimana, ga, gemesh, cobaa, orng, model, leleh..."
371,@BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...,btsmeallineギフトはこちら,btsmeallineギフトはこちら,btsmealline,[btsmealline]
372,"Suka kepikiran udh semangat"" menanti #BTSmeal ...",suka kepikiran udh semangat menanti btsmeal di...,suka kepikiran udh semangat btsmeal indo tau n...,suka pikir udh semangat btsmeal indo tau nya m...,"[suka, pikir, udh, semangat, btsmeal, indo, ta..."
373,@BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...,🤗🤗🤗btsmeal thebtsmeal,🤗🤗🤗btsmeal thebtsmeal,btsmeal thebtsmeal,"[btsmeal, thebtsmeal]"


In [50]:
text = df['clean2']

In [26]:
!pip install Sastrawi

Collecting Sastrawi
[?25l  Downloading https://files.pythonhosted.org/packages/6f/4b/bab676953da3103003730b8fcdfadbdd20f333d4add10af949dd5c51e6ed/Sastrawi-1.0.1-py2.py3-none-any.whl (209kB)
[K     |█▋                              | 10kB 14.6MB/s eta 0:00:01[K     |███▏                            | 20kB 20.2MB/s eta 0:00:01[K     |████▊                           | 30kB 11.2MB/s eta 0:00:01[K     |██████▎                         | 40kB 8.9MB/s eta 0:00:01[K     |███████▉                        | 51kB 7.3MB/s eta 0:00:01[K     |█████████▍                      | 61kB 7.7MB/s eta 0:00:01[K     |███████████                     | 71kB 7.3MB/s eta 0:00:01[K     |████████████▌                   | 81kB 7.6MB/s eta 0:00:01[K     |██████████████                  | 92kB 7.3MB/s eta 0:00:01[K     |███████████████▋                | 102kB 6.9MB/s eta 0:00:01[K     |█████████████████▏              | 112kB 6.9MB/s eta 0:00:01[K     |██████████████████▊             | 122kB 6.9MB/

In [51]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

factory = StemmerFactory()
stemmer = factory.create_stemmer()

list_hasil = text

output = [(stemmer.stem(token)) for token in list_hasil]

In [52]:
df['clean3'] = output

In [53]:
df

Unnamed: 0,content,clean1,clean2,clean3,tokens
0,apakah karyanya tersebut akan dijual. Namun ba...,apakah karyanya tersebut akan dijual namun bag...,karyanya dijual sepatu bts meal karyanya insta...,karya jual sepatu bts meal karya instagram pri...,"[karya, jual, sepatu, bts, meal, karya, instag..."
1,Good Vibes Music Night 2021 ❤️ Best Love Songs...,good vibes music night ❤️ best love songs pla...,good vibes music night ❤️ best love songs play...,good vibes music night best love songs playlis...,"[good, vibes, music, night, best, love, songs,..."
2,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...,kemasan bts meal disulap jadi sepatu nyentrik ...,kemasan bts meal disulap sepatu nyentrik merah...,kemas bts meal sulap sepatu nyentrik merahputi...,"[kemas, bts, meal, sulap, sepatu, nyentrik, me..."
3,"Seadanya, sebisanya 💜 ini ending dari nyuci tu...",seadanya sebisanya 💜 ini ending dari nyuci tut...,seadanya 💜 ending nyuci tutup saos 😂 btsmealin...,ada ending nyuci tutup saos btsmealindonesia b...,"[ada, ending, nyuci, tutup, saos, btsmealindon..."
4,Temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat bikin bts meal ...,temen gue kreatif gada obat bikin bts meal kai...,"[temen, gue, kreatif, gada, obat, bikin, bts, ..."
...,...,...,...,...,...
370,"gimana ga gemesh cobaa, orng modelannya begini...",gimana ga gemesh cobaa orng modelannya beginia...,gimana ga gemesh cobaa orng modelannya 😫😭 mele...,gimana ga gemesh cobaa orng model leleh udaahh...,"[gimana, ga, gemesh, cobaa, orng, model, leleh..."
371,@BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...,btsmeallineギフトはこちら,btsmeallineギフトはこちら,btsmealline,[btsmealline]
372,"Suka kepikiran udh semangat"" menanti #BTSmeal ...",suka kepikiran udh semangat menanti btsmeal di...,suka kepikiran udh semangat btsmeal indo tau n...,suka pikir udh semangat btsmeal indo tau nya m...,"[suka, pikir, udh, semangat, btsmeal, indo, ta..."
373,@BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...,🤗🤗🤗btsmeal thebtsmeal,🤗🤗🤗btsmeal thebtsmeal,btsmeal thebtsmeal,"[btsmeal, thebtsmeal]"


In [54]:
df.dropna()

Unnamed: 0,content,clean1,clean2,clean3,tokens
0,apakah karyanya tersebut akan dijual. Namun ba...,apakah karyanya tersebut akan dijual namun bag...,karyanya dijual sepatu bts meal karyanya insta...,karya jual sepatu bts meal karya instagram pri...,"[karya, jual, sepatu, bts, meal, karya, instag..."
1,Good Vibes Music Night 2021 ❤️ Best Love Songs...,good vibes music night ❤️ best love songs pla...,good vibes music night ❤️ best love songs play...,good vibes music night best love songs playlis...,"[good, vibes, music, night, best, love, songs,..."
2,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...,kemasan bts meal disulap jadi sepatu nyentrik ...,kemasan bts meal disulap sepatu nyentrik merah...,kemas bts meal sulap sepatu nyentrik merahputi...,"[kemas, bts, meal, sulap, sepatu, nyentrik, me..."
3,"Seadanya, sebisanya 💜 ini ending dari nyuci tu...",seadanya sebisanya 💜 ini ending dari nyuci tut...,seadanya 💜 ending nyuci tutup saos 😂 btsmealin...,ada ending nyuci tutup saos btsmealindonesia b...,"[ada, ending, nyuci, tutup, saos, btsmealindon..."
4,Temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat bikin bts meal ...,temen gue kreatif gada obat bikin bts meal kai...,"[temen, gue, kreatif, gada, obat, bikin, bts, ..."
...,...,...,...,...,...
370,"gimana ga gemesh cobaa, orng modelannya begini...",gimana ga gemesh cobaa orng modelannya beginia...,gimana ga gemesh cobaa orng modelannya 😫😭 mele...,gimana ga gemesh cobaa orng model leleh udaahh...,"[gimana, ga, gemesh, cobaa, orng, model, leleh..."
371,@BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...,btsmeallineギフトはこちら,btsmeallineギフトはこちら,btsmealline,[btsmealline]
372,"Suka kepikiran udh semangat"" menanti #BTSmeal ...",suka kepikiran udh semangat menanti btsmeal di...,suka kepikiran udh semangat btsmeal indo tau n...,suka pikir udh semangat btsmeal indo tau nya m...,"[suka, pikir, udh, semangat, btsmeal, indo, ta..."
373,@BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...,🤗🤗🤗btsmeal thebtsmeal,🤗🤗🤗btsmeal thebtsmeal,btsmeal thebtsmeal,"[btsmeal, thebtsmeal]"


In [55]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [56]:
df['tokens'] = pd.DataFrame(df['clean3'].apply(nltk.word_tokenize))
df

Unnamed: 0,content,clean1,clean2,clean3,tokens
0,apakah karyanya tersebut akan dijual. Namun ba...,apakah karyanya tersebut akan dijual namun bag...,karyanya dijual sepatu bts meal karyanya insta...,karya jual sepatu bts meal karya instagram pri...,"[karya, jual, sepatu, bts, meal, karya, instag..."
1,Good Vibes Music Night 2021 ❤️ Best Love Songs...,good vibes music night ❤️ best love songs pla...,good vibes music night ❤️ best love songs play...,good vibes music night best love songs playlis...,"[good, vibes, music, night, best, love, songs,..."
2,Kemasan BTS Meal Disulap Jadi Sepatu Nyentrik ...,kemasan bts meal disulap jadi sepatu nyentrik ...,kemasan bts meal disulap sepatu nyentrik merah...,kemas bts meal sulap sepatu nyentrik merahputi...,"[kemas, bts, meal, sulap, sepatu, nyentrik, me..."
3,"Seadanya, sebisanya 💜 ini ending dari nyuci tu...",seadanya sebisanya 💜 ini ending dari nyuci tut...,seadanya 💜 ending nyuci tutup saos 😂 btsmealin...,ada ending nyuci tutup saos btsmealindonesia b...,"[ada, ending, nyuci, tutup, saos, btsmealindon..."
4,Temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat dia bikin sendi...,temen gue kreatifnya gada obat bikin bts meal ...,temen gue kreatif gada obat bikin bts meal kai...,"[temen, gue, kreatif, gada, obat, bikin, bts, ..."
...,...,...,...,...,...
370,"gimana ga gemesh cobaa, orng modelannya begini...",gimana ga gemesh cobaa orng modelannya beginia...,gimana ga gemesh cobaa orng modelannya 😫😭 mele...,gimana ga gemesh cobaa orng model leleh udaahh...,"[gimana, ga, gemesh, cobaa, orng, model, leleh..."
371,@BTS_jp_official #BTSMEAL\nhttps://t.co/48TUdF...,btsmeallineギフトはこちら,btsmeallineギフトはこちら,btsmealline,[btsmealline]
372,"Suka kepikiran udh semangat"" menanti #BTSmeal ...",suka kepikiran udh semangat menanti btsmeal di...,suka kepikiran udh semangat btsmeal indo tau n...,suka pikir udh semangat btsmeal indo tau nya m...,"[suka, pikir, udh, semangat, btsmeal, indo, ta..."
373,@BTSPublicity @BTS_twt 🤗🤗🤗\n#BTSMeal \n#TheBTS...,🤗🤗🤗btsmeal thebtsmeal,🤗🤗🤗btsmeal thebtsmeal,btsmeal thebtsmeal,"[btsmeal, thebtsmeal]"


In [34]:
!pip install google_trans_new

Collecting google_trans_new
  Downloading https://files.pythonhosted.org/packages/f9/7b/9f136106dc5824dc98185c97991d3cd9b53e70a197154dd49f7b899128f6/google_trans_new-1.1.9-py3-none-any.whl
Installing collected packages: google-trans-new
Successfully installed google-trans-new-1.1.9


In [57]:
from google_trans_new import google_translator  

translator = google_translator()  

In [58]:
def translate_column(text, target_language):
    return translator.translate(text, lang_tgt=target_language)

In [63]:
df['clean_english'] = df['content'].apply(lambda x: translate_column(x, 'en'))

JSONDecodeError: ignored