# 1. Importing required packages

In [7]:
from time import strftime, gmtime, sleep
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys

# 2. Inputing username and number of tweets

In [29]:
username = input('Input a username for searching the tweets: ')

Input a username for searching the tweets: @nikolaevava


In [30]:
limit = int(input('Input the number of tweets to search for (if available): '))

Input the number of tweets to search for (if available): 100


# 3. Defining functions for extracting different data

In [31]:
def find_date(tweet):
    '''
    Finds date of tweet publication
    '''
    try:
        stamp = tweet.find_element_by_css_selector('span._timestamp')
        return strftime('%d %b %Y', gmtime(int(stamp.get_attribute('data-time'))))
    except NoSuchElementException:
        return 'Unknown'

def find_time(tweet):
    '''
    Finds time of tweet publication
    '''
    try:
        stamp = tweet.find_element_by_css_selector('span._timestamp')
        return strftime('%H:%M', gmtime(int(stamp.get_attribute('data-time'))))
    except NoSuchElementException:
        return 'Unknown'
    
def find_name(tweet):
    '''
    Finds tweet author's name (Name of account)
    '''
    try:
        name = tweet.find_element_by_css_selector('div.tweet strong.fullname').text
        return name
    except NoSuchElementException:
        return 'Unknown'
    
def find_acc_link(tweet):
    '''
    Finds tweet author's account link (with '@')
    '''
    try:
        link = '@' + tweet.find_element_by_css_selector('div.tweet a.js-action-profile span.username b').text
        return link
    except NoSuchElementException:
        return 'Unknown'
    
def find_text(tweet):
    '''
    Finds tweet text
    '''
    try:
        text = tweet.find_element_by_css_selector('p.tweet-text').text
        return text
    except NoSuchElementException:
        return 'Unknown'
    
def find_media(tweet):
    '''
    Defines whether tweet contains video / picture
    '''
    try:
        tweet.find_element_by_css_selector('div.AdaptiveMediaOuterContainer')
        return 'Yes'
    except NoSuchElementException:
        return 'No'

def find_quote(tweet):
    '''
    Defines whether tweet contains a quoted tweet
    '''
    try:
        tweet.find_element_by_css_selector('div.QuoteTweet-container')
        return 'Yes'
    except NoSuchElementException:
        return 'No'
    
def find_likes(tweet):
    '''
    Finds the number of 'likes' for current tweet
    '''
    try:
        likes = tweet.find_element_by_css_selector('button.ProfileTweet-action--unfavorite span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if likes == '' or likes == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--favorite span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_likes = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--favorite span.ProfileTweet-actionCountForPresentation').text)
                return ret_likes
        else:
            return int(likes)
    except NoSuchElementException:
        return 'Unknown'
        
    
def find_retweets(tweet):
    '''
    Finds the number of 'retweets' for current tweet
    '''
    try:
        retweets = tweet.find_element_by_css_selector('button.ProfileTweet-actionButton.js-actionRetweet span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if retweets == '' or retweets == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--retweet span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_retweets = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--retweet span.ProfileTweet-actionCountForPresentation').text)
                return ret_retweets
        else:
            return int(retweets)
    except NoSuchElementException:
        return 'Unknown'

def find_comments(tweet):
    '''
    Finds the number of 'likes' for current tweet
    '''
    try:
        comments = tweet.find_element_by_css_selector('span.ProfileTweet-action--reply span.ProfileTweet-actionCount').get_attribute('data-tweet-stat-count')
        if comments == '' or comments == None:
            try:
                tweet.find_element_by_css_selector('div.ProfileTweet-action--reply span.ProfileTweet-actionCount.ProfileTweet-actionCount--isZero')
                return 0
            except NoSuchElementException:
                ret_comments = int(tweet.find_element_by_css_selector('div.ProfileTweet-action--reply span.ProfileTweet-actionCountForPresentation').text)
                return ret_comments
        else:
            return int(comments)
    except NoSuchElementException:
        return 'Unknown'

# 4. Parsing the page and writing extracted data to table

In [32]:
pd.options.display.float_format = '{:0,.0f}'.format

browser = webdriver.Chrome()
url = f'https://twitter.com/{username}'
tweets_limit = limit

browser.get(url)
sleep(1)

body = browser.find_element_by_tag_name('body')

first_len = 0

while True:
    body.send_keys(Keys.END)
    sleep(2)
    tweets = browser.find_elements_by_css_selector('div.tweet')

    second_len = len(tweets)

    if second_len == first_len:
        tweets_limit = second_len
        break
    else:
        first_len = second_len

    if second_len >= tweets_limit:
        break

tweets_table = pd.DataFrame(index=range(1, tweets_limit + 1))
        
if (tweets_limit == 0):
    print('There is no tweets published by this user!')
else:

    # Extracting tweet contents
    for n in range(1, tweets_limit + 1):
        tweets_table.at[n, 'Date'] = find_date(tweets[n - 1])
        tweets_table.at[n, 'Time'] = find_time(tweets[n - 1])
        tweets_table.at[n, 'Account name'] = find_name(tweets[n - 1])
        tweets_table.at[n, 'Account link'] = find_acc_link(tweets[n - 1])
        tweets_table.at[n, 'Tweet text'] = find_text(tweets[n - 1])
        tweets_table.at[n, 'Mediafile'] = find_media(tweets[n - 1])
        tweets_table.at[n, 'Tweet quotation'] = find_quote(tweets[n - 1])
        tweets_table.at[n, 'Likes'] = find_likes(tweets[n - 1])
        tweets_table.at[n, 'Retweets'] = find_retweets(tweets[n - 1])
        tweets_table.at[n, 'Comments'] = find_comments(tweets[n - 1])

    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        display(tweets_table)

browser.close()

Unnamed: 0,Date,Time,Account name,Account link,Tweet text,Mediafile,Tweet quotation,Likes,Retweets,Comments
1,26 Feb 2018,14:30,мєх на капішоні,@nikolaevava,"есть такая теория, шо чем смешнее твитор, тем ...",No,No,69,6,2
2,29 Dec 2018,15:31,мєх на капішоні,@nikolaevava,"из плохого: нам отменили прямой рейс, летим аж...",No,No,1,0,0
3,29 Dec 2018,09:20,мєх на капішоні,@nikolaevava,приключения начинаются: рейс задержали на СЕМЬ...,No,No,1,0,2
4,28 Dec 2018,11:13,мєх на капішоні,@nikolaevava,"собака как-то прознала, что мы уезжаем завтра ...",No,No,0,0,1
5,27 Dec 2018,11:39,мєх на капішоні,@nikolaevava,помните твит про аллергию на креветки. дополня...,No,No,1,0,1
6,25 Dec 2018,11:04,мєх на капішоні,@nikolaevava,"как в домашних условиях узнать, шо твой чумода...",No,No,1,0,4
7,25 Dec 2018,08:34,мєх на капішоні,@nikolaevava,дома щакончились носовые платки. скоро начну с...,No,No,0,0,0
8,23 Dec 2018,19:14,мєх на капішоні,@nikolaevava,у меня аллергия на креветки. все. официально м...,No,No,0,0,2
9,21 Dec 2018,16:05,мєх на капішоні,@nikolaevava,мой типичный вечер: купить лекарство в аптеке....,No,No,2,0,0
10,15 Dec 2018,22:28,мєх на капішоні,@nikolaevava,новая рубрика #рецензияводнослово\nЛарс фон Тр...,No,No,1,0,0


In [250]:
tweets_table.to_csv('tweets.csv')