# API initialization

The authentification keys are stored as environment variables.

To save authentification keys, do the following in the command prompt (for MacOS):

$touch ~/.bash_profile

Then :

$open -a TextEdit.app ~/.bash_profile

add the following in the opened file (for each variable):

export VARIABLE_NAME=VARIABLE_VALUE

In [15]:
import pprint
import twitter
import os

CONSUMER_KEY = os.environ["CONSUMER_KEY"]
CONSUMER_SECRET = os.environ["CONSUMER_SECRET"]
OAUTH_TOKEN = os.environ["OAUTH_TOKEN"]
OAUTH_TOKEN_SECRET = os.environ["OAUTH_TOKEN_SECRET"]
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)

# Trending tweets

In [29]:
# The Yahoo! Where On Earth ID for the entire world is 1.
# See https://dev.twitter.com/docs/api/1.1/get/trends/place and
# http://developer.yahoo.com/geo/geoplanet/

WORLD_WOE_ID = 1
FR_WOE_ID = 23424819
US_WOE_ID = 23424977

# To get the rate limits for a given type of request :
# https://dev.twitter.com/rest/public/rate-limits

# Trend request
world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
FR_trends = twitter_api.trends.place(_id=FR_WOE_ID)
#FR_trends
#world_trends[0]['trends'] // dictionnary with the different informations for the world trends

[{'name': '#逃げ恥',
  'promoted_content': None,
  'query': '%23%E9%80%83%E3%81%92%E6%81%A5',
  'tweet_volume': 150265,
  'url': 'http://twitter.com/search?q=%23%E9%80%83%E3%81%92%E6%81%A5'},
 {'name': '#BrazilHatesSyco',
  'promoted_content': None,
  'query': '%23BrazilHatesSyco',
  'tweet_volume': 147980,
  'url': 'http://twitter.com/search?q=%23BrazilHatesSyco'},
 {'name': '#فلم_كرتون_ادمنته',
  'promoted_content': None,
  'query': '%23%D9%81%D9%84%D9%85_%D9%83%D8%B1%D8%AA%D9%88%D9%86_%D8%A7%D8%AF%D9%85%D9%86%D8%AA%D9%87',
  'tweet_volume': None,
  'url': 'http://twitter.com/search?q=%23%D9%81%D9%84%D9%85_%D9%83%D8%B1%D8%AA%D9%88%D9%86_%D8%A7%D8%AF%D9%85%D9%86%D8%AA%D9%87'},
 {'name': '#AvrasyaTüneli',
  'promoted_content': None,
  'query': '%23AvrasyaT%C3%BCneli',
  'tweet_volume': 33246,
  'url': 'http://twitter.com/search?q=%23AvrasyaT%C3%BCneli'},
 {'name': '#FelizMartes',
  'promoted_content': None,
  'query': '%23FelizMartes',
  'tweet_volume': 19555,
  'url': 'http://twitter.com

# Common trends

In [35]:
world_trends_set = set([trend['name']
for trend in world_trends[0]['trends']])
FR_trends_set = set([trend['name']
for trend in FR_trends[0]['trends']])
common_trends = world_trends_set.intersection(FR_trends_set)
common_trends

{'#BerlinAttack'}

# Fetch tweets

In [101]:
import json
q = 'Angela Merkel'
count = 4

# See https://dev.twitter.com/docs/api/1.1/get/search/tweets 
search_results = twitter_api.search.tweets(q=q, count=count) 
statuses = search_results['statuses']
statuses

[{'contributors': None,
  'coordinates': None,
  'created_at': 'Tue Dec 20 15:04:54 +0000 2016',
  'entities': {'hashtags': [{'indices': [99, 112], 'text': 'breakingnews'}],
   'symbols': [],
   'urls': [{'display_url': 'dld.bz/fnNsC',
     'expanded_url': 'http://dld.bz/fnNsC',
     'indices': [113, 136],
     'url': 'https://t.co/EX4l4leG1W'}],
   'user_mentions': []},
  'favorite_count': 0,
  'favorited': False,
  'geo': None,
  'id': 811225872216768512,
  'id_str': '811225872216768512',
  'in_reply_to_screen_name': None,
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_reply_to_user_id': None,
  'in_reply_to_user_id_str': None,
  'is_quote_status': False,
  'lang': 'en',
  'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
  'place': None,
  'possibly_sensitive': False,
  'retweet_count': 0,
  'retweeted': False,
  'source': '<a href="https://www.socialoomph.com" rel="nofollow">SocialOomph</a>',
  'text': "Berlin lorry attack: Angela Merkel

In [102]:
# Iterate through 5 more batches of results by following the cursor
for _ in range(5):
    print("Length of statuses", len(statuses))
    try:
        next_results = search_results['search_metadata']['next_results']
    except KeyError: # No more results when next_results doesn't exist 
        break
    #print()
    #print("next_results:",next_results)   
    # Create a dictionary from next_results, which has the following form: # ?max_id=313519052523986943&q=NCAA&include_entities=1
    kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])
    search_results = twitter_api.search.tweets(**kwargs)
    statuses += search_results['statuses']
#print(statuses)
    
# Show one sample search result by slicing the list...
print(json.dumps(statuses[0], indent=1))

Length of statuses 4
Length of statuses 6
{
 "id": 811225872216768512,
 "place": null,
 "truncated": false,
 "user": {
  "id": 2808698093,
  "followers_count": 1115,
  "location": "",
  "utc_offset": null,
  "following": false,
  "time_zone": null,
  "profile_sidebar_border_color": "C0DEED",
  "screen_name": "easy_viral",
  "listed_count": 485,
  "profile_use_background_image": true,
  "profile_image_url_https": "https://pbs.twimg.com/profile_images/518832414898933760/H8yrGTYc_normal.jpeg",
  "is_translation_enabled": false,
  "translator_type": "none",
  "friends_count": 1972,
  "geo_enabled": false,
  "notifications": false,
  "profile_image_url": "http://pbs.twimg.com/profile_images/518832414898933760/H8yrGTYc_normal.jpeg",
  "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
  "protected": false,
  "favourites_count": 2,
  "url": null,
  "profile_banner_url": "https://pbs.twimg.com/profile_banners/2808698093/1412533961",
  "name": "Easy Viral

In [170]:
import json
statuses = json.loads(open('MentionSomeoneImportantForYou.json').read())

# The result of the list comprehension is a list with only one element that
# can be accessed by its index and set to the variable t
t = [ status 
      for status in statuses
          if status['id'] == 316948241264549888 ][0]

# Explore the variable t to get familiarized with the data structure...
#print(t)


### information concerning retweet
#t['retweeted']
# does the tweet is the original one or a retweeted one ???
print (t['retweet_count'])
#print (t['retweeted_status'])
## the retweeted status is important because sometimes tweets are modified when they are retweeted when users add 
## reactions or manipulate the text

#### Number of times a tweet has been bookmarked
print(t['favorite_count'])



### content of a tweet
print(t['text'])


### The entities in the text of a tweet VERY IMPORTANT if we want to do NLP on the information
t['entities']

23
0
RT @hassanmusician: #MentionSomeoneImportantForYou God.


{'hashtags': [{'indices': [20, 50], 'text': 'MentionSomeoneImportantForYou'}],
 'urls': [],
 'user_mentions': [{'id': 56259379,
   'id_str': '56259379',
   'indices': [3, 18],
   'name': 'Download the NEW LP!',
   'screen_name': 'hassanmusician'}]}

In [72]:
###Objectif à réaliser: problème je veux me ramener à la structure de t comme tweet, 
##est-ce que je vais y arriver à partir de FR_trend?
#Recherche du tweet le plus retweeter parmi les trending topics France
#FR_trends

# Extracting different elements from a list of tweets

In [171]:
##### if you execute the previous cell statuses will concern MentionSoemoneImportantForYou
### otherwise it will concern the research on Angela Merkel


#for status in statuses:
    #print(status)
    #print()
    #print("--------------------")


status_texts = [ status['text'] for status in statuses ]
#print(status_texts)
print()
#print(status['entities']['user_mentions'])

############# different entities of a list of tweeet ###########
for status in statuses:
    print("Examination of a new status...")
    for user_mention in status['entities']['user_mentions']:
        print("#####")
        print(user_mention)
        print("-----")
        print(user_mention["screen_name"])

screen_names = [ user_mention['screen_name'] for status in statuses for user_mention in status['entities']['user_mentions'] ]
print("+++++++++")
print("+++++++++")
print("screen_names",screen_names)

################ different hashtag of a list of tweet
for status in statuses:
    print("Examination of a status...")
    print(status['entities']['hashtags'])
    print("--")
    print("End of the current status")

    
hashtags = [ hashtag['text'] for status in statuses for hashtag in status['entities']['hashtags'] ]
print("hashtags=",hashtags)

###### problem avec cette partie 
# Compute a collection of all words from all tweets

words = [ w for t in status_texts for w in t.split() ]
print(words)

# Explore the first 5 items for each...
print("For the five first items")
print (json.dumps(status_texts[0:5], indent=1))
#print json.dumps(screen_names[0:5], indent=1)
#print json.dumps(hashtags[0:5], indent=1)
print (json.dumps(words[0:5], indent=1))


Examination of a new status...
#####
{'id': 335803883, 'id_str': '335803883', 'indices': [1, 17], 'screen_name': 'KathleenMariee_', 'name': 'can you not? '}
-----
KathleenMariee_
#####
{'id': 445699886, 'id_str': '445699886', 'indices': [50, 65], 'screen_name': 'AhhlicksCruise', 'name': 'alejandro cruz'}
-----
AhhlicksCruise
#####
{'id': 337517759, 'id_str': '337517759', 'indices': [68, 82], 'screen_name': 'itsravennn_cx', 'name': '#BirdGang. ~(^.^~)'}
-----
itsravennn_cx
#####
{'id': 770810791, 'id_str': '770810791', 'indices': [84, 99], 'screen_name': 'kandykisses_13', 'name': '♫ Kelanie Nicole★'}
-----
kandykisses_13
#####
{'id': 632452596, 'id_str': '632452596', 'indices': [102, 110], 'screen_name': 'BMOLOGY', 'name': 'Charlemagne Demamp'}
-----
BMOLOGY
Examination of a new status...
#####
{'id': 582670707, 'id_str': '582670707', 'indices': [37, 52], 'screen_name': 'Linkin_Sunrise', 'name': 'Soldier Roxy '}
-----
Linkin_Sunrise
Examination of a new status...
#####
{'id': 56259379,

# Analyzing Tweets and Tweet Entities with Frequency Analysis

In [172]:
from collections import Counter
for item in [words,screen_names,hashtags]:
    c=Counter(item)
    print (c.most_common()[:10])

[('#MentionSomeoneImportantForYou', 92), ('RT', 34), ('my', 10), (',', 6), ('@justinbieber', 6), ('&lt;3', 6), ('My', 5), ('I', 4), ('and', 4), ('me', 3)]
[('justinbieber', 6), ('Kid_Charliej', 2), ('Cavillafuerte', 2), ('LosAlejandro_', 1), ('hassanmusician', 1), ('BigTittieAngel', 1), ('apettyfer', 1), ('imdanielpadilla', 1), ('needmalik', 1), ('cassandrasleee', 1)]
[('MentionSomeoneImportantForYou', 94), ('mentionsomeoneimportantforyou', 3), ('NoHomo', 1), ('Love', 1), ('bebesito', 1), ('MentionSomeOneImportantForYou', 1), ('MyHeart', 1)]


In [184]:
from prettytable import PrettyTable
for label, data in (('Word', words),('Screen Name', screen_names),('Hashtag', hashtags)):
    pt=PrettyTable(field_names=[label, 'Count'])
    c=Counter(data)
    [pt.add_row(kv) for kv in c.most_common()[:10] ]
#pt.align[label], pt.align['Count'] = 'l', 'r' # Set column alignment
    print (pt)

+--------------------------------+-------+
|              Word              | Count |
+--------------------------------+-------+
| #MentionSomeoneImportantForYou |   92  |
|               RT               |   34  |
|               my               |   10  |
|               ,                |   6   |
|         @justinbieber          |   6   |
|             &lt;3              |   6   |
|               My               |   5   |
|               I                |   4   |
|              and               |   4   |
|               me               |   3   |
+--------------------------------+-------+
+-----------------+-------+
|   Screen Name   | Count |
+-----------------+-------+
|   justinbieber  |   6   |
|   Kid_Charliej  |   2   |
|  Cavillafuerte  |   2   |
|  LosAlejandro_  |   1   |
|  hassanmusician |   1   |
|  BigTittieAngel |   1   |
|    apettyfer    |   1   |
| imdanielpadilla |   1   |
|    needmalik    |   1   |
|  cassandrasleee |   1   |
+-----------------+-------+
+-----