# JYPE Tweet Scraping

In [1]:
# Import Tweetkey and my api keys 
from tweetkey import consumer_api_key, consumer_api_secret_key, access_token, access_secret_token

In [2]:
# Import necessary dependencies
import tweepy
import pandas as pd
import time 

In [3]:
# Authorize the tweepy with OAuthHandler method from tweepy documentation 
# Set up my access token
auth = tweepy.OAuthHandler(consumer_api_key, consumer_api_secret_key)
auth.set_access_token(access_token, access_secret_token)

api = tweepy.API(auth, wait_on_rate_limit=True)

In [4]:
jyp_tweets = tweepy.Cursor(api.search, q='JYP', result_type='mixed').items(150)
    
jyp_tweets_list = [[jyp_tweet.created_at, jyp_tweet.id, jyp_tweet.text, 
                      jyp_tweet.favorite_count] 
                     for jyp_tweet in jyp_tweets
                    ]
    
jyp_tweets_df = pd.DataFrame(jyp_tweets_list)

In [5]:
jyp_tweets_df

Unnamed: 0,0,1,2,3
0,2021-05-20 03:16:18,1395216788724477955,TWICE ツウィ、JYPの副社長になる可能性も！？「突然ですが占ってもいいですか？」で星ひ...,1704
1,2021-05-21 02:25:00,1395566263515090949,GOT7’s Jay B Reveals How He Felt When Leaving ...,398
2,2021-05-19 12:00:02,1394986199740293122,[Dream High] JYP teaches the entrance class st...,279
3,2021-05-21 02:55:56,1395574050328498177,RT @rachasgf: “JYP PABO” SPEAK YOUR SHIT SKZ h...,0
4,2021-05-21 02:55:47,1395574010851823618,RT @hwangsamericano: they called jyp stupid on...,0
...,...,...,...,...
145,2021-05-19 11:42:48,1394981863878447106,RT @dkrltmxkwjd: Jaebum bilang: once you got o...,0
146,2021-05-19 11:42:47,1394981859004719107,RT @Kkapok_s: ร้องเพราะนะแต่จะร้องเทคนิคJYP เพ...,0
147,2021-05-19 11:42:36,1394981814578581509,パッチムがわかるようになって、パク・ジニョンが何故JNPじゃなくてJYPなのかわかったとき、...,0
148,2021-05-19 11:42:27,1394981775047266304,RT @WAWAI997: แน่นอนว่าต้องเป็นจินจีโด มม.น่าร...,0


In [6]:
cleaned_jyp_tweets = jyp_tweets_df.drop([1], axis = 1)
# cleaned_jyp_tweets

new_jyp_tweets = cleaned_jyp_tweets.rename(columns={0: "Tweeted Date", 2: "Tweet Description", 3: "Popularity Count"})
# new_jyp_tweets

sort_jype_tweets = new_jyp_tweets.sort_values(by="Popularity Count", ascending=False)
sort_jype_tweets

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-20 03:16:18,TWICE ツウィ、JYPの副社長になる可能性も！？「突然ですが占ってもいいですか？」で星ひ...,1704
1,2021-05-21 02:25:00,GOT7’s Jay B Reveals How He Felt When Leaving ...,398
2,2021-05-19 12:00:02,[Dream High] JYP teaches the entrance class st...,279
98,2021-05-19 11:47:57,스테이씨는 JYP 노래스타일이랑 잘 어울림 \nㄴ당연함 JYP 여돌 노래 좋은거 다...,37
144,2021-05-19 11:43:00,イケメンすぎて笑うしかできねえ、JYPだろ\n\n#福田歩汰 https://t.co/9G...,13
...,...,...,...
47,2021-05-19 11:55:07,그레알괫서씨의 답변을 보자！ https://t.co/j1cTErphgE #질문함 #...,0
43,2021-05-19 11:55:36,RT @soompi: #JYP Takes Legal Action Against In...,0
42,2021-05-19 11:55:50,RT @beomdazed: the big three of kpop is no lon...,0
41,2021-05-19 11:55:53,RT @DSoulBprd: “ มันแตกต่างจากที่ผมเคยทำอย่างแ...,0


In [8]:
sort_jype_tweets["Tweeted Date"] = pd.to_datetime(sort_jype_tweets["Tweeted Date"]).dt.date

In [9]:
sort_jype_tweets.head()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-20,TWICE ツウィ、JYPの副社長になる可能性も！？「突然ですが占ってもいいですか？」で星ひ...,1704
1,2021-05-21,GOT7’s Jay B Reveals How He Felt When Leaving ...,398
2,2021-05-19,[Dream High] JYP teaches the entrance class st...,279
98,2021-05-19,스테이씨는 JYP 노래스타일이랑 잘 어울림 \nㄴ당연함 JYP 여돌 노래 좋은거 다...,37
144,2021-05-19,イケメンすぎて笑うしかできねえ、JYPだろ\n\n#福田歩汰 https://t.co/9G...,13


In [10]:
sort_jype_tweets.tail()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
47,2021-05-19,그레알괫서씨의 답변을 보자！ https://t.co/j1cTErphgE #질문함 #...,0
43,2021-05-19,RT @soompi: #JYP Takes Legal Action Against In...,0
42,2021-05-19,RT @beomdazed: the big three of kpop is no lon...,0
41,2021-05-19,RT @DSoulBprd: “ มันแตกต่างจากที่ผมเคยทำอย่างแ...,0
149,2021-05-19,RT @andn_0v0: 🧊WORLD🧊\n#범규 #BEOMGYU https://t....,0


In [11]:
sort_jype_tweets.to_csv('files/jyp_scraping_eng.csv')

# Results with JYP in Korean (박진영) on Twitter

In [12]:
jyp_korean_tweets = tweepy.Cursor(api.search, q='박진영', result_type='mixed').items(150)
    
jyp_korean_tweets_list = [[jyp_korean_tweet.created_at, jyp_korean_tweet.id, jyp_korean_tweet.text, 
                      jyp_korean_tweet.favorite_count] 
                     for jyp_korean_tweet in jyp_korean_tweets
                    ]
    
jyp_korean_tweets_df = pd.DataFrame(jyp_korean_tweets_list)

In [13]:
jyp_korean_tweets_df

Unnamed: 0,0,1,2,3
0,2021-05-16 10:08:38,1393871004233977861,[📺] 13살 나이로 오디션에 참가했던 #조권...☆ 박진영 VS 김문정 더 떨리는...,360
1,2021-05-21 02:59:13,1395574876514263040,RT @markbeomnyoung: The Devil Judge (악마판사)\n\n...,0
2,2021-05-21 02:59:07,1395574848546512910,RT @gangdo825: 실력이랑 외모 빼면 아이돌이냐 히발 그게\n박진영 월급 ...,0
3,2021-05-21 02:57:58,1395574559043059717,RT @SBSNOW: [참가자 프로필] 김민서(20세)\n\nSBS &lt;LOUD...,0
4,2021-05-21 02:56:46,1395574257397256193,RT @markbeomnyoung: The Devil Judge (악마판사)\n\n...,0
...,...,...,...,...
145,2021-05-16 08:39:11,1393848492343193600,RT @screensevens: #JJP; requested lockscreen ✨...,0
146,2021-05-16 08:39:07,1393848474429378562,RT @screensevens: #JJP; requested lockscreen ✨...,0
147,2021-05-16 08:39:00,1393848445417390081,RT @screensevens: #JJP; requested lockscreen ✨...,0
148,2021-05-16 08:37:42,1393848119108923394,RT @screensevens: #JJP; requested lockscreen ✨...,0


In [14]:
cleaned_jyp_korean_tweets_df = jyp_korean_tweets_df.drop([1], axis = 1)
# cleaned_jyp_korean_tweets_df

new_jyp_korean_tweets_df = cleaned_jyp_korean_tweets_df.rename(columns={0: "Tweeted Date", 2: "Tweet Description", 3: "Popularity Count"})
# new_jyp_korean_tweets_df

sort_jyp_korean_tweets_df = new_jyp_korean_tweets_df.sort_values(by="Popularity Count", ascending=False)
sort_jyp_korean_tweets_df

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-16 10:08:38,[📺] 13살 나이로 오디션에 참가했던 #조권...☆ 박진영 VS 김문정 더 떨리는...,360
85,2021-05-16 09:11:28,박진영 디스랩 어디까지 퍼지나 https://t.co/Rk0WMmZ81u,30
96,2021-05-16 09:07:47,박진영 금지 누구보다 좋아하시는데 ㅋㅋ ㅋ ㅋ ㅋ,12
118,2021-05-16 08:55:43,13살 나이로 오디션에 참가했던 조권...☆ 박진영 VS 김문정 더 떨리는 오디션은...,9
17,2021-05-16 10:03:36,😂😂아니 박진영 사진 저거 어디에서 나온거지 ....? 자기가 저렇게 혼자 찍었을...,8
...,...,...,...
54,2021-05-16 09:33:27,RT @SoS_ForMarkJin: Esquire Korea 210515\n\n진영...,0
55,2021-05-16 09:32:55,RT @screensevens: #JJP; requested lockscreen ✨...,0
56,2021-05-16 09:32:36,RT @screensevens: #JJP; requested lockscreen ✨...,0
57,2021-05-16 09:30:50,RT @Saikop_1: [SUB ESP] tvN lanza la lectura ...,0


In [15]:
sort_jyp_korean_tweets_df["Tweeted Date"] = pd.to_datetime(sort_jyp_korean_tweets_df["Tweeted Date"]).dt.date

In [16]:
sort_jyp_korean_tweets_df.head()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-16,[📺] 13살 나이로 오디션에 참가했던 #조권...☆ 박진영 VS 김문정 더 떨리는...,360
85,2021-05-16,박진영 디스랩 어디까지 퍼지나 https://t.co/Rk0WMmZ81u,30
96,2021-05-16,박진영 금지 누구보다 좋아하시는데 ㅋㅋ ㅋ ㅋ ㅋ,12
118,2021-05-16,13살 나이로 오디션에 참가했던 조권...☆ 박진영 VS 김문정 더 떨리는 오디션은...,9
17,2021-05-16,😂😂아니 박진영 사진 저거 어디에서 나온거지 ....? 자기가 저렇게 혼자 찍었을...,8


In [17]:
sort_jyp_korean_tweets_df.tail()

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
54,2021-05-16,RT @SoS_ForMarkJin: Esquire Korea 210515\n\n진영...,0
55,2021-05-16,RT @screensevens: #JJP; requested lockscreen ✨...,0
56,2021-05-16,RT @screensevens: #JJP; requested lockscreen ✨...,0
57,2021-05-16,RT @Saikop_1: [SUB ESP] tvN lanza la lectura ...,0
149,2021-05-16,RT @screensevens: #JJP; requested lockscreen ✨...,0


In [18]:
sort_jyp_korean_tweets_df

Unnamed: 0,Tweeted Date,Tweet Description,Popularity Count
0,2021-05-16,[📺] 13살 나이로 오디션에 참가했던 #조권...☆ 박진영 VS 김문정 더 떨리는...,360
85,2021-05-16,박진영 디스랩 어디까지 퍼지나 https://t.co/Rk0WMmZ81u,30
96,2021-05-16,박진영 금지 누구보다 좋아하시는데 ㅋㅋ ㅋ ㅋ ㅋ,12
118,2021-05-16,13살 나이로 오디션에 참가했던 조권...☆ 박진영 VS 김문정 더 떨리는 오디션은...,9
17,2021-05-16,😂😂아니 박진영 사진 저거 어디에서 나온거지 ....? 자기가 저렇게 혼자 찍었을...,8
...,...,...,...
54,2021-05-16,RT @SoS_ForMarkJin: Esquire Korea 210515\n\n진영...,0
55,2021-05-16,RT @screensevens: #JJP; requested lockscreen ✨...,0
56,2021-05-16,RT @screensevens: #JJP; requested lockscreen ✨...,0
57,2021-05-16,RT @Saikop_1: [SUB ESP] tvN lanza la lectura ...,0


In [19]:
sort_jyp_korean_tweets_df.to_csv('files/jyp_scraping_kor.csv')