From 255e568fadcc08f7f69d0d11db8e77b15d49fb4c Mon Sep 17 00:00:00 2001 From: Nikita Komarov Date: Tue, 25 Sep 2018 14:42:42 +0300 Subject: [PATCH] Bugfix for #37 ValueError: invalid literal for int() with base 10 --- twitter_scraper.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/twitter_scraper.py b/twitter_scraper.py index 7e71b96..63a4f13 100644 --- a/twitter_scraper.py +++ b/twitter_scraper.py @@ -39,10 +39,9 @@ def gen_tweets(pages): int(tweet.find('._timestamp')[0].attrs['data-time-ms'])/1000.0) interactions = [x.text for x in tweet.find( '.ProfileTweet-actionCount')] - replies = int(interactions[0].split(" ")[0].replace(comma, "").replace(dot,"")) - retweets = int(interactions[1].split(" ")[ - 0].replace(comma, "").replace(dot,"")) - likes = int(interactions[2].split(" ")[0].replace(comma, "").replace(dot,"")) + replies = int(re.sub('\D', '', interactions[0].split(" ")[0])) + retweets = int(re.sub('\D', '', interactions[1].split(" ")[0])) + likes = int(re.sub('\D', '', interactions[2].split(" ")[0])) hashtags = [hashtag_node.full_text for hashtag_node in tweet.find('.twitter-hashtag')] urls = [url_node.attrs['data-expanded-url'] for url_node in tweet.find('a.twitter-timeline-link:not(.u-hidden)')] photos = [photo_node.attrs['data-image-url'] for photo_node in tweet.find('.AdaptiveMedia-photoContainer')]