Goal: Build a machine-learning algorthim that can predict whether a tweet is more likely to interact with (i.e. retweet at) @BarackObama or @realDonaldTrump, based on the text of the tweet.

In [None]:
#! pip install --user emoji

In [36]:
import json
import emoji
from random import randint
import oauth2 as oauth
import pandas as pd
import numpy as np
from credentials import *

In [27]:
def oauth_twitter_search(query, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
    """ Search Twitter ...
    looks like Tweets with "truncated": true could pose a problem
    """
    search_endpoint = "https://api.twitter.com/1.1/search/tweets.json"
    compiled_search_endpoint = "{}?q={}+-filter:retweets&count=100&result_type=recent&lang=en&tweet_mode=extended".format(search_endpoint, query)
    consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET)
    client = oauth.Client(consumer)
    response, data = client.request(compiled_search_endpoint)
    tweets = json.loads(data)
    return tweets

In [38]:
# fetch the tweets
tweets_44 = oauth_twitter_search("@BarackObama")['statuses']
tweets_45 = oauth_twitter_search("@realDonaldTrump")['statuses']
print(len(tweets_44), len(tweets_45))

94 100


In [39]:
tweets_44[0].keys()

dict_keys(['created_at', 'id', 'id_str', 'full_text', 'truncated', 'display_text_range', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang'])

In [40]:
sample_44 = tweets_44[0]
print(sample_44['id_str'], sample_44['full_text'])

1117206445756428295 @joncoopertweets @BarackObama @realDonaldTrump 1/2 of the 59 millions are Russian bots, 1/4 are trolls, 1/8 are actually accounts belonging to Trumps followers dogs. Its true


In [42]:
'@barackobama' in sample_44['full_text'].lower()

True

In [46]:
# prune each fetched tweet set
pruned_tweets_44 = []
for tweet in tweets_44:
    d = {}
    d['at_45'] = '@barackobama' in tweet['full_text'].lower()
    d['at_55'] = '@realdonaldtrump' in tweet['full_text'].lower()
    d['id_str'] = tweet['id_str']
    d['full_text'] = tweet['full_text']
    d['pruned_text'] = tweet['full_text'].lower().replace('@barackobama', '').replace('@realdonaldtrump', '')
    pruned_tweets_44.append(d)

In [47]:
pruned_tweets_44[0]

{'at_45': True,
 'at_55': True,
 'id_str': '1117206445756428295',
 'full_text': '@joncoopertweets @BarackObama @realDonaldTrump 1/2 of the 59 millions are Russian bots, 1/4 are trolls, 1/8 are actually accounts belonging to Trumps followers dogs. Its true',
 'pruned_text': '@joncoopertweets   1/2 of the 59 millions are russian bots, 1/4 are trolls, 1/8 are actually accounts belonging to trumps followers dogs. its true'}

In [None]:

column_names = ['at_44', 'at_45' 'tweet_id', 'tweet_text']

In [22]:
print(emoji.demojize(sample_tweet_at_nasa['full_text']))

@AnarqDPlantao @rolealeatorio @NASA Meodeos


In [18]:
sample_nasa_tweet.keys()

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang'])

In [22]:
sample_nasa_tweet['retweet_count']

255

In [24]:
sample_nasa_tweet['truncated']

False

In [25]:
print(sample_nasa_text)

RT @NASA: When we talk about the enormity of the cosmos 💫, it’s easy to toss out big numbers – but far more difficult to wrap our minds aro…


In [35]:
for k, v in sample_nasa_tweet.items():
    print("{} :: {}\n".format(k,v))

created_at :: Sat Apr 13 21:12:30 +0000 2019

id :: 1117173741690159105

id_str :: 1117173741690159105

text :: RT @NASA: When we talk about the enormity of the cosmos 💫, it’s easy to toss out big numbers – but far more difficult to wrap our minds aro…

truncated :: False

entities :: {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'NASA', 'name': 'NASA', 'id': 11348282, 'id_str': '11348282', 'indices': [3, 8]}], 'urls': []}

metadata :: {'iso_language_code': 'en', 'result_type': 'recent'}

source :: <a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>

in_reply_to_status_id :: None

in_reply_to_status_id_str :: None

in_reply_to_user_id :: None

in_reply_to_user_id_str :: None

in_reply_to_screen_name :: None

user :: {'id': 825450498719379457, 'id_str': '825450498719379457', 'name': 'The Queen 👑', 'screen_name': 'Rahma_ASSaSa', 'location': '', 'description': 'ألا بذكر الله تطمئن القلوب 💖👐', 'url': None, 'entities': {'description': {'url