# BTS Tweet Scraping

In [1]:
# Import Tweetkey and my api keys 
from tweetkey import consumer_api_key, consumer_api_secret_key, access_token, access_secret_token

In [2]:
# Import necessary dependencies
import tweepy
import pandas as pd
import time 

In [3]:
# Authorize the tweepy with OAuthHandler method from tweepy documentation 
# Set up my access token
auth = tweepy.OAuthHandler(consumer_api_key, consumer_api_secret_key)
auth.set_access_token(access_token, access_secret_token)

api = tweepy.API(auth, wait_on_rate_limit=True)

In [4]:
# Reference for searching items using key words or phrases 
# Method 1

# try:
 # Creation of query method using parameters
#  bts_tweets = tweepy.Cursor(api.search,q='bts butter',result_type='popular').items(150)
 
 # Pulling information from tweets iterable object
#  bts_tweets_list = [[bts_tweet.created_at, bts_tweet.id, bts_tweet.text, bts_tweet.favorite_count] for bts_tweet in bts_tweets]
 
 # Creation of dataframe from tweets list
 # Add or remove columns as you remove tweet information
#  bts_tweets_df = pd.DataFrame(bts_tweets_list)
 
# except BaseException as e:
#     print('failed to load,',str(e))
#     time.sleep(3)

In [5]:
# Search by the artist group name and show the results for popular ones up to 150 items
# Method 2

bts_tweets = tweepy.Cursor(api.search,q='bts butter',result_type='popular').items(150)
bts_tweets_list = [[bts_tweet.created_at, bts_tweet.id, bts_tweet.text, bts_tweet.favorite_count] for bts_tweet in bts_tweets]

bts_tweets_df = pd.DataFrame(bts_tweets_list)

In [6]:
# See first top 5 items on the table
bts_tweets_df.head()

Unnamed: 0,0,1,2,3
0,2021-05-12 15:00:05,1392494797072474116,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
1,2021-05-12 15:00:01,1392494780836323330,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
2,2021-05-11 15:00:01,1392132393478148101,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
3,2021-05-11 16:00:00,1392147486270230531,are you ready?! the world tv debut of @BTS_twt...,189086
4,2021-05-11 16:00:00,1392147487230627844,We can’t hold it in any longer! Our debut perf...,755257


In [7]:
# See bottom top 5 items on the table
bts_tweets_df.tail()

Unnamed: 0,0,1,2,3
24,2021-05-05 14:00:10,1389943001510711299,We can't wait for 'Butter'! Pre-order @bts_big...,206508
25,2021-05-06 20:28:37,1390403148112486402,"Hey @bts_bighit fans 🎶As promised, big news. \...",67465
26,2021-05-05 12:23:33,1389918689340821504,Yesterday @OnAirRomeo &amp; Executive Producer...,10557
27,2021-05-05 13:51:53,1389940916576612355,"To promote their upcoming single, Butter, BTS'...",848
28,2021-05-05 08:30:00,1389859914395262978,BTS（防弾少年団） ジン＆SUGA、ニューデジタルシングル「Butter」コンセプトクリッ...,1370


In [8]:
# Convert the datetime column and remove the time slot 

# bts_tweets_df[1] = pd.Timestamp(bts_tweets_df[1]).dt.date
bts_tweets_df[0] = pd.to_datetime(bts_tweets_df[0]).dt.date

In [9]:
# See first top 5 items on the table
bts_tweets_df.head()

Unnamed: 0,0,1,2,3
0,2021-05-12,1392494797072474116,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
1,2021-05-12,1392494780836323330,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
2,2021-05-11,1392132393478148101,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
3,2021-05-11,1392147486270230531,are you ready?! the world tv debut of @BTS_twt...,189086
4,2021-05-11,1392147487230627844,We can’t hold it in any longer! Our debut perf...,755257


In [10]:
# See first bottom 5 items on the table
bts_tweets_df.tail()

Unnamed: 0,0,1,2,3
24,2021-05-05,1389943001510711299,We can't wait for 'Butter'! Pre-order @bts_big...,206508
25,2021-05-06,1390403148112486402,"Hey @bts_bighit fans 🎶As promised, big news. \...",67465
26,2021-05-05,1389918689340821504,Yesterday @OnAirRomeo &amp; Executive Producer...,10557
27,2021-05-05,1389940916576612355,"To promote their upcoming single, Butter, BTS'...",848
28,2021-05-05,1389859914395262978,BTS（防弾少年団） ジン＆SUGA、ニューデジタルシングル「Butter」コンセプトクリッ...,1370


In [11]:
# Drop the unnecessary column number 1
cleaned_bts = bts_tweets_df.drop([1], axis = 1)

In [12]:
# See top 5 tweets
cleaned_bts.head()

Unnamed: 0,0,2,3
0,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
1,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
2,2021-05-11,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
3,2021-05-11,are you ready?! the world tv debut of @BTS_twt...,189086
4,2021-05-11,We can’t hold it in any longer! Our debut perf...,755257


In [13]:
# See bottom 5 tweets
cleaned_bts.tail()

Unnamed: 0,0,2,3
24,2021-05-05,We can't wait for 'Butter'! Pre-order @bts_big...,206508
25,2021-05-06,"Hey @bts_bighit fans 🎶As promised, big news. \...",67465
26,2021-05-05,Yesterday @OnAirRomeo &amp; Executive Producer...,10557
27,2021-05-05,"To promote their upcoming single, Butter, BTS'...",848
28,2021-05-05,BTS（防弾少年団） ジン＆SUGA、ニューデジタルシングル「Butter」コンセプトクリッ...,1370


In [14]:
# Rename the columns
new_bts_tweets_df = cleaned_bts.rename(columns={0: "Tweeted Date", 2: "Tweet Description", 3: "Popularity Count"})

# See top 20 tweets
new_bts_tweets_df.head(20)

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
1,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
2,2021-05-11,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
3,2021-05-11,are you ready?! the world tv debut of @BTS_twt...,189086
4,2021-05-11,We can’t hold it in any longer! Our debut perf...,755257
5,2021-05-11,"The @bts_bighit limited edition BUTTER 7"" viny...",43416
6,2021-05-11,any predictions on what @BTS_twt's #BBMAs perf...,90293
7,2021-05-11,.@BTS_twt will give the debut television perfo...,83243
8,2021-05-12,[#Behind_the_Magic_Door]\n\nPlay your song! \n...,161125
9,2021-05-11,.@BTS_twt has been added to the performer line...,89086


In [15]:
# See bottom 20 tweets
new_bts_tweets_df.tail(20)

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
9,2021-05-11,.@BTS_twt has been added to the performer line...,89086
10,2021-05-11,"Big news, #BTSArmy: the world TV debut of @BTS...",14217
11,2021-05-11,|￣￣￣￣￣￣￣￣￣￣￣￣|\n Esperando los\n ...,17285
12,2021-05-11,Don't miss the World TV Debut of @BTS_twt's 'B...,15349
13,2021-05-11,"BTS is performing at the #BBMAs, Sunday May 23...",18257
14,2021-05-10,WHAT’S MELTING // BUTTER CARD 2.0\n@bts_bighit...,86005
15,2021-05-10,BTS revealed a brand new photo to accompany th...,51242
16,2021-05-11,BTS: The net worth of 28-year-old Suga will BL...,549
17,2021-05-11,BTS: Jimin's rainbow hair for Butter has broug...,913
18,2021-05-11,Netizens LOL at the hilarious ads that fans fr...,1240


In [16]:
# See all tweets
new_bts_tweets_df

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
1,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
2,2021-05-11,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
3,2021-05-11,are you ready?! the world tv debut of @BTS_twt...,189086
4,2021-05-11,We can’t hold it in any longer! Our debut perf...,755257
5,2021-05-11,"The @bts_bighit limited edition BUTTER 7"" viny...",43416
6,2021-05-11,any predictions on what @BTS_twt's #BBMAs perf...,90293
7,2021-05-11,.@BTS_twt will give the debut television perfo...,83243
8,2021-05-12,[#Behind_the_Magic_Door]\n\nPlay your song! \n...,161125
9,2021-05-11,.@BTS_twt has been added to the performer line...,89086


In [17]:
# Sort them in ascending order from most popular tweet on the top to the least popular tweet on the bottom 

# let's see top 5 popular tweets about BTS's Butter Album
top_5_bot_5_popular_least_tweets = new_bts_tweets_df.sort_values(by="Popularity Count", ascending=False)
top_5_bot_5_popular_least_tweets.head()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
22,2021-05-05,#BTS #방탄소년단 #BTS_Butter Concept Clip - 지민 (Jim...,1620370
20,2021-05-09,#BTS #방탄소년단 #BTS_Butter Group Teaser Photo 1 h...,1601592
19,2021-05-05,#BTS #방탄소년단 #BTS_Butter Concept Clip - 제이홉 (j-...,1590236
2,2021-05-11,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
1,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643


In [18]:
# let's see the top 5 least popular tweets about BTS's Butter Album
top_5_bot_5_popular_least_tweets.tail()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
28,2021-05-05,BTS（防弾少年団） ジン＆SUGA、ニューデジタルシングル「Butter」コンセプトクリッ...,1370
18,2021-05-11,Netizens LOL at the hilarious ads that fans fr...,1240
17,2021-05-11,BTS: Jimin's rainbow hair for Butter has broug...,913
27,2021-05-05,"To promote their upcoming single, Butter, BTS'...",848
16,2021-05-11,BTS: The net worth of 28-year-old Suga will BL...,549


In [19]:
# all tweets about BTS's Butter Album from top to bottom
top_5_bot_5_popular_least_tweets

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
22,2021-05-05,#BTS #방탄소년단 #BTS_Butter Concept Clip - 지민 (Jim...,1620370
20,2021-05-09,#BTS #방탄소년단 #BTS_Butter Group Teaser Photo 1 h...,1601592
19,2021-05-05,#BTS #방탄소년단 #BTS_Butter Concept Clip - 제이홉 (j-...,1590236
2,2021-05-11,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 정국 (J...,1321553
1,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 진 (Ji...,896643
0,2021-05-12,#BTS #방탄소년단 #BTS_Butter Teaser Photo 1 - 슈가 (S...,876678
4,2021-05-11,We can’t hold it in any longer! Our debut perf...,755257
23,2021-05-09,Are you smooth like Butter?\n#SmoothLikeButter...,737653
21,2021-05-06,Pre-order @bts_bighit BUTTER digital single an...,445286
24,2021-05-05,We can't wait for 'Butter'! Pre-order @bts_big...,206508


In [20]:
top_5_bot_5_popular_least_tweets.to_csv("files/bts_tweet_scraping.csv")