# 1. Importing required packages

In [1]:
from time import strftime, gmtime, sleep
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys

# 2. Inputing username and number of tweets

In [2]:
username = input('Input a username for searching the tweets: ')

Input a username for searching the tweets: @BarackObama


In [3]:
while True:
    try:
        limit = int(input('Input the number of tweets to search for (if available): '))
        if limit > 0:
            break
        else:
            print('You can use only valid number that is > 0')
            continue
    except ValueError:
        print('An error occured. Please input a valid positive number')
        continue

Input the number of tweets to search for (if available): 100


# 3. Defining functions for extracting different data

In [4]:
def find_date(tweet):
    '''
    Finds date of tweet publication
    '''
    try:
        stamp = tweet.find_element_by_css_selector('span._timestamp')
        return strftime('%d %b %Y', gmtime(int(stamp.get_attribute('data-time'))))
    except NoSuchElementException:
        return 'Unknown'

def find_time(tweet):
    '''
    Finds time of tweet publication
    '''
    try:
        stamp = tweet.find_element_by_css_selector('span._timestamp')
        return strftime('%H:%M', gmtime(int(stamp.get_attribute('data-time'))))
    except NoSuchElementException:
        return 'Unknown'
    
def find_name(tweet):
    '''
    Finds tweet author's name (Name of account)
    '''
    try:
        name = tweet.find_element_by_css_selector('div.tweet strong.fullname').text
        return name
    except NoSuchElementException:
        return 'Unknown'
    
def find_acc_link(tweet):
    '''
    Finds tweet author's account link (with '@')
    '''
    try:
        link = '@' + tweet.find_element_by_css_selector('div.tweet a.js-action-profile span.username b').text
        return link
    except NoSuchElementException:
        return 'Unknown'
    
def find_text(tweet):
    '''
    Finds tweet text
    '''
    try:
        text = tweet.find_element_by_css_selector('p.tweet-text').text
        return text
    except NoSuchElementException:
        return 'Unknown'
    
def find_media(tweet):
    '''
    Defines whether tweet contains video / picture
    '''
    try:
        tweet.find_element_by_css_selector('div.AdaptiveMediaOuterContainer')
        return 'Yes'
    except NoSuchElementException:
        return 'No'

def find_quote(tweet):
    '''
    Defines whether tweet contains a quoted tweet
    '''
    try:
        tweet.find_element_by_css_selector('div.QuoteTweet-container')
        return 'Yes'
    except NoSuchElementException:
        return 'No'
    
def find_likes(tweet):
    '''
    Finds the number of 'likes' for current tweet
    '''
    try:
        likes = tweet.find_element_by_css_selector('button.ProfileTweet-action--unfavorite span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if likes == '' or likes == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--favorite span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_likes = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--favorite span.ProfileTweet-actionCountForPresentation').text)
                return ret_likes
        else:
            return int(likes)
    except NoSuchElementException:
        return 'Unknown'
        
    
def find_retweets(tweet):
    '''
    Finds the number of 'retweets' for current tweet
    '''
    try:
        retweets = tweet.find_element_by_css_selector('button.ProfileTweet-actionButton.js-actionRetweet span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if retweets == '' or retweets == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--retweet span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_retweets = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--retweet span.ProfileTweet-actionCountForPresentation').text)
                return ret_retweets
        else:
            return int(retweets)
    except NoSuchElementException:
        return 'Unknown'

def find_comments(tweet):
    '''
    Finds the number of 'likes' for current tweet
    '''
    try:
        comments = tweet.find_element_by_css_selector('span.ProfileTweet-action--reply span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if comments == '' or comments == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--reply span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_comments = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--reply span.ProfileTweet-actionCountForPresentation').text)
                return ret_comments
        else:
            return int(comments)
    except NoSuchElementException:
        return 'Unknown'

# 4. Parsing the page and writing extracted data to table

In [5]:
pd.options.display.float_format = '{:0,.0f}'.format

browser = webdriver.Chrome()
url = f'https://twitter.com/{username}'
tweets_limit = limit

browser.get(url)
sleep(1)

body = browser.find_element_by_tag_name('body')

first_len = 0

while True:
    body.send_keys(Keys.END)
    sleep(2)
    tweets = browser.find_elements_by_css_selector('div.tweet')

    second_len = len(tweets)

    if second_len == first_len:
        tweets_limit = second_len
        break
    else:
        first_len = second_len

    if second_len >= tweets_limit:
        break

tweets_table = pd.DataFrame(index=range(1, tweets_limit + 1))
        
if (tweets_limit == 0):
    print('There is no tweets published by this user!')
else:

    # Extracting tweet contents
    for n in range(1, tweets_limit + 1):
        tweets_table.at[n, 'Date'] = find_date(tweets[n - 1])
        tweets_table.at[n, 'Time'] = find_time(tweets[n - 1])
        tweets_table.at[n, 'Account name'] = find_name(tweets[n - 1])
        tweets_table.at[n, 'Account link'] = find_acc_link(tweets[n - 1])
        tweets_table.at[n, 'Tweet text'] = find_text(tweets[n - 1])
        tweets_table.at[n, 'Mediafile'] = find_media(tweets[n - 1])
        tweets_table.at[n, 'Tweet quotation'] = find_quote(tweets[n - 1])
        tweets_table.at[n, 'Likes'] = find_likes(tweets[n - 1])
        tweets_table.at[n, 'Retweets'] = find_retweets(tweets[n - 1])
        tweets_table.at[n, 'Comments'] = find_comments(tweets[n - 1])

    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(tweets_table)

browser.close()

Unnamed: 0,Date,Time,Account name,Account link,Tweet text,Mediafile,Tweet quotation,Likes,Retweets,Comments
1,01 Jan 2019,16:19,Barack Obama,@BarackObama,In 2018 people stepped up and showed up like n...,No,No,793109,109025,22636
2,29 Dec 2018,16:15,Barack Obama,@BarackObama,I hope you find inspiration in the stories of ...,No,No,87239,11045,2138
3,29 Dec 2018,16:15,Barack Obama,@BarackObama,"Leaders like Jonny Boucher, a Chicago native w...",Yes,No,106025,18275,958
4,29 Dec 2018,16:15,Barack Obama,@BarackObama,"Leaders like Hong Hoang, who mobilized a youth...",No,No,26713,3562,167
5,29 Dec 2018,16:15,Barack Obama,@BarackObama,"Leaders like Moussa Kondo and Sandor Lederer, ...",No,No,25269,3637,169
6,29 Dec 2018,16:15,Barack Obama,@BarackObama,"Leaders like Dejah Powell, who started an orga...",Yes,No,33272,4993,285
7,29 Dec 2018,16:15,Barack Obama,@BarackObama,As the year winds down and we look toward 2019...,No,No,385614,84772,8350
8,28 Dec 2018,14:34,Barack Obama,@BarackObama,"As 2018 draws to a close, I’m continuing the t...",No,No,286753,48924,9816
9,25 Dec 2018,14:14,Barack Obama,@BarackObama,Enjoy the holiday season with the ones you lov...,Yes,No,935398,94557,24657
10,20 Dec 2018,20:55,Barack Obama,@BarackObama,There’s no better time than the holidays to gi...,No,Yes,204757,33785,8371


In [6]:
tweets_table.to_csv('tweets.csv')