## Import libraries

In [13]:
import tweepy
import json
from pandas.io.json import json_normalize

## Setup connection

In [14]:
filename = "credentials.json"
with open(filename) as file:
    keys = json.loads(file.read())

In [15]:
consumer_key = keys['Twitter']['consumer_key']
consumer_secret = keys['Twitter']['consumer_secret']
access_token = keys['Twitter']['access_token']
access_secret = keys['Twitter']['access_secret']
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth,
                 wait_on_rate_limit=True,
                 wait_on_rate_limit_notify=True,
                 retry_count=1000,
                 retry_delay=60)

## Download replies tweets
- [Sony live streaming](https://twitter.com/Sony/status/1214350830981148673)
- [AMD Conference announcement](https://twitter.com/AMD/status/1212119569487351808)

In [16]:
def download_replies(user_name, tweet_id):
    replies = tweepy.Cursor(api.search,
                            q='to:{} -filter:retweets'.format(user_name),
                            since_id=tweet_id,
                            tweet_mode='extended',
                            lang='en').items()
    all_repl = []
    while True:
        try:
            reply = replies.next()
            all_repl.append(reply)
            if not hasattr(reply, 'in_reply_to_status_id_str'):
                continue
            if reply.in_reply_to_status_id == tweet_id:
                logging.info("reply of tweet:{}".format(reply.full_text))

        except tweepy.RateLimitError as e:
            logging.error("Twitter api rate limit reached".format(e))
            time.sleep(60)
            continue

        except tweepy.TweepError as e:
            logging.error("Tweepy error occured:{}".format(e))
            break

        except StopIteration:
            break

        except Exception as e:
            logging.error("Failed while fetching replies {}".format(e))
            break

    all_repl_processed = [0]*len(all_repl)
    for i in range(len(all_repl)):
        all_repl_processed[i] = {}
        all_repl_processed[i]["username"] = all_repl[i].user.screen_name
        all_repl_processed[i]["text"] = all_repl[i].full_text
        all_repl_processed[i]["lang"] = all_repl[i].lang
    return all_repl_processed

### Download replies to Sony tweet

In [17]:
user_name = "@Sony"
tweet_id = "1214350830981148673"
sony_tweets = download_replies(user_name=user_name, tweet_id=tweet_id)
sony_tweets = json_normalize(sony_tweets)
sony_tweets.head()

Unnamed: 0,username,text,lang
0,RealCocailina,@Sony @Beyonce She is influential.,en
1,BTS_NOBUNDLES,@Sony @chartdata @BTS_twt Circus didn't even o...,en
2,monchildnj,@Sony @modooborahae @chartdata @BTS_twt jesus ...,en
3,bshbil,@Sony\nThere is a company that is distributing...,en
4,Harsh_Pathak_ji,@Sony please #lapataganj show wapis se on air ...,en


### Download replies to AMD tweet

In [18]:
user_name = "@AMD"
tweet_id = "1212119569487351808"
amd_tweets = download_replies(user_name=user_name, tweet_id=tweet_id)
amd_tweets = json_normalize(amd_tweets)
amd_tweets.head()

Unnamed: 0,username,text,lang
0,punx223,@AMD / @AMDRyzen 3 3100 and 3300X have landed....,en
1,ejsumatra,@AMD @AMDGaming Hi AMD! Kindly send a DM pleas...,en
2,georgertan,@amd ati .. hated amd cards for ever but had t...,en
3,hP0Ft4X6Yx6mX9Q,@AMD @Livermore_Lab @PenguinHPC amd，yes!,en
4,EchoAlpha16,@AMD I'm trying to RMA a cpu but I can't get y...,en


### Download replies to Samsung tweet 

In [19]:
user_name = "@SamsungMobile"
tweet_id = "1159826737770856448"
samsung_tweets = download_replies(user_name=user_name, tweet_id=tweet_id)
samsung_tweets = json_normalize(samsung_tweets)
samsung_tweets.head()

Unnamed: 0,username,text,lang
0,anisha_hazra,@SamsungMobile @BTS_ARMY @BTS_twt Handsome moc...,en
1,Gawandehariom,@SamsungMobile in Samsung Galaxy m10 there wil...,en
2,sammeeksHQ,@SamsungMobile my Samsung note 9 keeps on sayi...,en
3,PoojariTia,@SamsungMobile @BTS_ARMY @BTS_twt Cutie pie💜,en
4,PoojariTia,@SamsungMobile @BTS_ARMY @BTS_twt Jungkook you...,en


## Store tweets

In [20]:
sony_tweets.shape

(806, 3)

In [21]:
amd_tweets.shape

(66, 3)

In [22]:
samsung_tweets.shape

(1101, 3)

In [23]:
sony_tweets.to_csv('dataset/sony_replies.csv', index=False)
amd_tweets.to_csv('dataset/amd_replies.csv', index=False)
samsung_tweets.to_csv('dataset/samsung_tweets.csv', index=False)

## Download tweets related to CES 2020

In [24]:
def collect_tweets_from_query(api, query, number_of_tweets = 2000):

    result = []
    backoff_counter = 1
    tweets = tweepy.Cursor(api.search,
                           q="{} -filter:retweets".format(query), #for reducing the number of calls
                           lang='en', tweet_mode='extended'
                           ).items(number_of_tweets)

    while True:
        try:
            tweet = tweets.next()
            filtered_tweet = {
                "text": tweet.full_text,
                "lang": tweet.lang,
                "username": tweet.user.screen_name,
            }
            result.append(filtered_tweet)
        except StopIteration:
            break
        except tweepy.TweepError:
            print("Reached Tweet limits, waiting for {} seconds".format(60 * backoff_counter))
            sleep(60 * backoff_counter)
            backoff_counter += 1
            continue
    return result


In [25]:
ces2020_tweets = collect_tweets_from_query(api, "#CES2020")

In [26]:
ces2020_tweets = json_normalize(ces2020_tweets)

In [27]:
ces2020_tweets.head()

Unnamed: 0,text,lang,username
0,"@BStoly @mashable Brenda, blockbuster story! \...",en,ScanMyPhotos
1,Could #CES2020 have been the event that spread...,en,dsilverman
2,#Segway’s S-Pod makes WALL-E’s hoverchair a re...,en,v_shakthi
3,News coming out that #CES2020 may have been th...,en,dc_colombo
4,"If you missed it, check this story about the #...",en,nycbat


In [28]:
ces2020_tweets.to_csv("dataset/ces2020_tweets_full_text.csv", index=False)