## Task 2.2: Getting Started with tweepy 

In [1]:
import tweepy
from tweepy import OAuthHandler
import json
#import getpass

#Alternative style of handing over the secret keys:
#Call the python script directly with bash variables.
#./Lab2.TweepyApi <consumer-key> <consumer-secret> <access-token> <access-secret>
#Then you can retrieve the secret keys from the argument list.
#consumer_key = sys.argv[0]
#consumer_secret = sys.argv[1]
#access_token = sys.argv[2]
#access_secret = sys.argv[3]

consumer_key = "*****"
consumer_secret = "******"
access_token = "*********"
access_secret = "*******"

auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
 
api = tweepy.API(auth)

user = api.me()
 
print('Name: ' + user.name)
print('Location: ' + user.location)
print('Friends: ' + str(user.followers_count))
print('Created: ' + str(user.created_at))
print('Description: ' + str(user.description))

Name: Jakob Gerstenlauer
Location: 
Friends: 0
Created: 2017-04-25 12:10:28
Description: 


## Task 2.2.3: Accessing Tweets  

We can access the json response from the Twitter API via the status object. The status object has a `_json` attribute which contains the JSON content in dictionary form:

In [2]:
for status in tweepy.Cursor(api.home_timeline).items(1):
    print(json.dumps(status._json, indent=2))

{
  "contributors": null, 
  "truncated": false, 
  "text": "The Rest of the Iceberg - The Looming IP Implications of the Industrial Internet of Things https://t.co/8SXnhAsex4", 
  "is_quote_status": false, 
  "in_reply_to_status_id": null, 
  "id": 869841122549485568, 
  "favorite_count": 3, 
  "source": "<a href=\"http://www.hootsuite.com\" rel=\"nofollow\">Hootsuite</a>", 
  "retweeted": false, 
  "coordinates": null, 
  "entities": {
    "symbols": [], 
    "user_mentions": [], 
    "hashtags": [], 
    "urls": [
      {
        "url": "https://t.co/8SXnhAsex4", 
        "indices": [
          91, 
          114
        ], 
        "expanded_url": "http://ow.ly/xQHY30bWCeo", 
        "display_url": "ow.ly/xQHY30bWCeo"
      }
    ]
  }, 
  "in_reply_to_screen_name": null, 
  "in_reply_to_user_id": null, 
  "retweet_count": 4, 
  "id_str": "869841122549485568", 
  "favorited": false, 
  "user": {
    "follow_request_sent": false, 
    "has_extended_profile": false, 
    "profile_use

How to access the list of 10 of our friends:

In [3]:
for friend in tweepy.Cursor(api.friends).items(1):
    print(json.dumps(friend._json, indent=2))

{
  "follow_request_sent": false, 
  "has_extended_profile": false, 
  "profile_use_background_image": true, 
  "live_following": false, 
  "default_profile_image": false, 
  "id": 14174897, 
  "profile_background_image_url_https": "https://pbs.twimg.com/profile_background_images/33516583/water10x.gif", 
  "translator_type": "none", 
  "verified": false, 
  "blocked_by": false, 
  "profile_text_color": "3E4415", 
  "muting": false, 
  "profile_image_url_https": "https://pbs.twimg.com/profile_images/823984252035309568/pPt7JWhh_normal.jpg", 
  "profile_sidebar_fill_color": "CAD9D8", 
  "entities": {
    "url": {
      "urls": [
        {
          "url": "http://t.co/43tj0Ez1jT", 
          "indices": [
            0, 
            22
          ], 
          "expanded_url": "http://www.datasciencecentral.com/profiles/blogs/check-out-our-dsc-newsletter", 
          "display_url": "datasciencecentral.com/profiles/blogs\u2026"
        }
      ]
    }, 
    "description": {
      "urls": []
 

How to get a list of my own tweets (I don't have tweets):

In [4]:
for tweet in tweepy.Cursor(api.user_timeline).items(1):
    print(json.dumps(tweet._json, indent=2))

## Task 2.3: Tweet pre-processing

In [5]:
from nltk.tokenize import word_tokenize

tweet = 'RT @JordiTorresBCN: just an example! :D http://JordiTorres.Barcelona #masterMEI'

print(word_tokenize(tweet))

['RT', '@', 'JordiTorresBCN', ':', 'just', 'an', 'example', '!', ':', 'D', 'http', ':', '//JordiTorres.Barcelona', '#', 'masterMEI']


In [6]:
import re
 
emoticons_str = r"""
    (?:
        [:=;] # Eyes
        [oO\-]? # Nose (optional)
        [D\)\]\(\]/\\OpP] # Mouth
    )"""
 
regex_str = [
    emoticons_str,
    r'<[^>]+>', # HTML tags
    r'(?:@[\w_]+)', # @-mentions
    r"(?:\#+[\w_]+[\w\'_\-]*[\w_]+)", # hash-tags
    r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+', # URLs
 
    r'(?:(?:\d+,?)+(?:\.?\d+)?)', # numbers
    r"(?:[a-z][a-z'\-_]+[a-z])", # words with - and '
    r'(?:[\w_]+)', # other words
    r'(?:\S)' # anything else
]
    
tokens_re = re.compile(r'('+'|'.join(regex_str)+')', re.VERBOSE | re.IGNORECASE)
emoticon_re = re.compile(r'^'+emoticons_str+'$', re.VERBOSE | re.IGNORECASE)
 
def tokenize(s):
    return tokens_re.findall(s)
 
def preprocess(s, lowercase=False):
    tokens = tokenize(s)
    if lowercase:
        tokens = [token if emoticon_re.search(token) else token.lower() for token in tokens]
    return tokens
 
tweet = 'RT @JordiTorresBCN: just an example! :D http://JordiTorres.Barcelona #masterMEI'
print(preprocess(tweet))

['RT', '@JordiTorresBCN', ':', 'just', 'an', 'example', '!', ':D', 'http://JordiTorres.Barcelona', '#masterMEI']


## Conclusion:
Now the moticon `:D`, the hash-tag `@JordiTorresBCN`, and the URL are preserved as tokens.
