In [1]:
import json


## Test of the search function

In [51]:
# Import the Twython class
from twython import Twython
import json

# Load credentials from json file
with open("twitter_credentials.json", "r") as file:
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])

# Create our query
query = {'q': 'learn python',
        'result_type': 'popular',
        'count': 10,
        'lang': 'en',
        }


In [3]:
import pandas as pd

# Search tweets
dict_ = {'user': [], 'date': [], 'text': [], 'favorite_count': []}
for status in python_tweets.search(**query)['statuses']:
    dict_['user'].append(status['user']['screen_name'])
    dict_['date'].append(status['created_at'])
    dict_['text'].append(status['text'])
    dict_['favorite_count'].append(status['favorite_count'])

# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
df.sort_values(by='favorite_count', inplace=True, ascending=False)
df.head(5)


Unnamed: 0,user,date,text,favorite_count
1,GCPcloud,Wed Nov 06 17:17:03 +0000 2019,Announcing Python 3 in GA &amp; streaming supp...,64
2,cziscience,Thu Oct 31 20:34:33 +0000 2019,“We want napari to help not just Python practi...,54
0,GCPcloud,Wed Nov 06 21:58:50 +0000 2019,"DYK that devs can now also use Nodejs 12, Go 1...",49
3,KirkDBorne,Wed Nov 06 01:49:08 +0000 2019,[FREE e-Book] Learn Classification &amp; Regre...,44


In [6]:
df.loc[0]['text']

'DYK that devs can now also use Nodejs 12, Go 1.13, PHP 7.3 and Python 3.8 in addition to Java 11 on App Engine? Lea… https://t.co/mq7A89v8kn'

## Test of the streaming

In [7]:
from twython import TwythonStreamer
import csv

# Filter out unwanted data
def process_tweet(tweet):
    d = {}
    d['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']]
    d['text'] = tweet['text']
    d['user'] = tweet['user']['screen_name']
    d['user_loc'] = tweet['user']['location']
    return d
    
    
# Create a class that inherits TwythonStreamer
class MyStreamer(TwythonStreamer):     

    # Received data
    def on_success(self, data):

        tweet_data = process_tweet(data)
        self.save_to_csv(tweet_data)

    # Problem with the API
    def on_error(self, status_code, data):
        print(status_code, data)
        self.disconnect()
        
    # Save each tweet to csv file
    def save_to_csv(self, tweet):
        with open(r'saved_tweets.csv', 'a') as file:
            writer = csv.writer(file)
            writer.writerow(list(tweet.values()))


In [18]:
# Instantiate from our streaming class
stream = MyStreamer(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'], 
                    creds['ACCESS_TOKEN'], creds['ACCESS_SECRET'])
# Start the stream
stream.statuses.filter(follow=838657240231989250)

KeyboardInterrupt: 

In [19]:
df = pd.read_csv('saved_tweets.csv')

FileNotFoundError: File b'saved_tweets.csv' does not exist

## Test of the 'follow' function

In [52]:
username = 'GBR_Data'
get_tweets = python_tweets.get_user_timeline(screen_name = username,  
                                           count = 200, include_rts = True)


In [22]:
get_tweets

[{'created_at': 'Thu Nov 07 10:11:59 +0000 2019',
  'id': 1192384190089715713,
  'id_str': '1192384190089715713',
  'text': 'RT @timnitGebru: This never ends. This year, so far, 15 out of 44 people to attend @black_in_ai workshop at @NeurIPSConf (which is still in…',
  'truncated': False,
  'entities': {'hashtags': [],
   'symbols': [],
   'user_mentions': [{'screen_name': 'timnitGebru',
     'name': 'Timnit Gebru',
     'id': 359831209,
     'id_str': '359831209',
     'indices': [3, 15]},
    {'screen_name': 'black_in_ai',
     'name': 'Black in AI',
     'id': 929791330519322624,
     'id_str': '929791330519322624',
     'indices': [83, 95]},
    {'screen_name': 'NeurIPSConf',
     'name': 'NeurIPS Conference',
     'id': 138840988,
     'id_str': '138840988',
     'indices': [108, 120]}],
   'urls': []},
  'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
  'in_reply_to_status_id': None,
  'in_reply_to_status_id_str': None,
  'in_repl

In [58]:
import time
# Collect tweets
dict_ = {'user': [], 'date': [], 'text': [], 'favorite_count': [],
        'user_mentions': [], 'urls': [], 'geo': [], 'retweet_count': [], 'retweeted_from': []}
for status in python_tweets.get_user_timeline(screen_name = username,  
                                           count = 200, include_rts = True, tweet_mode='extended'):
    dict_['user'].append(status['user']['screen_name'])
    ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(status['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
    dict_['date'].append(ts)
    if status['truncated']:
        full_text = status['extended_tweet']['full_text']
    else:
        full_text = status['full_text']
    dict_['favorite_count'].append(status['favorite_count'])
    dict_['user_mentions'].append([user['screen_name'] for user in status['entities']['user_mentions']])
    dict_['urls'].append([url['url'] for url in status['entities']['urls']])
    dict_['geo'].append(status['geo'])
    dict_['retweet_count'].append(status['retweet_count'])
    if 'retweeted_status' in status:
        dict_['retweeted_from'].append(status['retweeted_status']['user']['screen_name'])
        if status['retweeted_status']['truncated']:
            full_text = status['retweeted_status']['extended_tweet']['full_text']
        else:
            full_text = status['retweeted_status']['full_text']
    else:
        dict_['retweeted_from'].append(None)
    dict_['text'].append(full_text)
    
# Structure data in a pandas DataFrame for easier manipulation
df = pd.DataFrame(dict_)
df.sort_values(by='favorite_count', inplace=True, ascending=False)
df.head(5)


Unnamed: 0,user,date,text,favorite_count,user_mentions,urls,geo,retweet_count,retweeted_from
69,GBR_Data,2018-04-16 16:10:06,"New release of Graphexp, v0.8.0. More flexible...",39,[],[https://t.co/XIM8z4XbC6],,13,
23,GBR_Data,2019-05-15 02:07:58,I will present our work on anomaly detection w...,32,"[mizvladimir, KirellBenzi, trekkinglemon]",[https://t.co/Coio6IOQ8a],,11,
22,GBR_Data,2019-05-15 02:11:46,I had the chance to present our work with @nas...,10,"[naspert, mizvladimir, wikiworkshop]",[],,3,
49,GBR_Data,2018-10-21 11:13:33,New improvement in GraphExp: https://t.co/kEwO...,8,[],[https://t.co/kEwOwJjXPV],,4,
25,GBR_Data,2019-04-27 17:37:04,C'est important de communiquer sur l 'IA et se...,8,[],[https://t.co/FIlPpy8x51],,0,


In [48]:
df.sort_values(by='date', inplace=True, ascending=False)
df.head(5)

Unnamed: 0,user,date,text,favorite_count,user_mentions,urls,geo,retweet_count,retweeted_from
0,GBR_Data,2019-11-07 10:11:59,"RT @timnitGebru: This never ends. This year, s...",0,"[timnitGebru, black_in_ai, NeurIPSConf]",[],,512,timnitGebru
1,GBR_Data,2019-10-10 06:31:23,RT @SGeantes: Bon on va parler de Vera Rubin (...,0,[SGeantes],[],,10,SGeantes
2,GBR_Data,2019-10-03 15:46:45,RT @countcarbon: 150 years ago we lived in a w...,0,[countcarbon],[],,2593,countcarbon
3,GBR_Data,2019-10-01 19:58:29,Our guest next week. https://t.co/OD3wWkUGiy,1,[],[https://t.co/OD3wWkUGiy],,0,
4,GBR_Data,2019-09-26 18:30:00,"RT @MazetteBD: ""Quand Greta montre la catastro...",0,"[MazetteBD, Unpied]",[https://t.co/IlJYugWlO6],,3001,MazetteBD


In [43]:
get_tweets[1]

{'created_at': 'Thu Oct 10 06:31:23 +0000 2019',
 'id': 1182181817450991616,
 'id_str': '1182181817450991616',
 'text': 'RT @SGeantes: Bon on va parler de Vera Rubin (1928-2016) et de pourquoi l’annonce des Prix Nobel de physique d’hier donne l’impression qu’o…',
 'truncated': False,
 'entities': {'hashtags': [],
  'symbols': [],
  'user_mentions': [{'screen_name': 'SGeantes',
    'name': 'Sur les épaules de géantes',
    'id': 1035525787171139584,
    'id_str': '1035525787171139584',
    'indices': [3, 12]}],
  'urls': []},
 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 838657240231989250,
  'id_str': '838657240231989250',
  'name': 'Benjamin Ricaud',
  'screen_name': 'GBR_Data',
  'location': 'Evian-les-Bains, France',
  'description': 'Data, science, in

In [55]:
status

{'created_at': 'Thu Nov 07 10:11:59 +0000 2019',
 'id': 1192384190089715713,
 'id_str': '1192384190089715713',
 'full_text': 'RT @timnitGebru: This never ends. This year, so far, 15 out of 44 people to attend @black_in_ai workshop at @NeurIPSConf (which is still in…',
 'truncated': False,
 'display_text_range': [0, 140],
 'entities': {'hashtags': [],
  'symbols': [],
  'user_mentions': [{'screen_name': 'timnitGebru',
    'name': 'Timnit Gebru',
    'id': 359831209,
    'id_str': '359831209',
    'indices': [3, 15]},
   {'screen_name': 'black_in_ai',
    'name': 'Black in AI',
    'id': 929791330519322624,
    'id_str': '929791330519322624',
    'indices': [83, 95]},
   {'screen_name': 'NeurIPSConf',
    'name': 'NeurIPS Conference',
    'id': 138840988,
    'id_str': '138840988',
    'indices': [108, 120]}],
  'urls': []},
 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None

In [60]:
for text in df['text']:
    print(text)

New release of Graphexp, v0.8.0. More flexible configuration for easier graph explorations! https://t.co/XIM8z4XbC6 #graphDB https://t.co/JkSvVwjr6y
I will present our work on anomaly detection with application to Wikipedia, done with @mizvladimir @KirellBenzi and @trekkinglemon, tomorrow Wednesday 16:24, session Web mining and content analysis II at #TheWebConf, https://t.co/Coio6IOQ8a https://t.co/FQvz7UkqBE
I had the chance to present our work with @naspert @mizvladimir at the #wikiworkshop2019. Great workshop with many interesting research topics. Thanks @wikiworkshop https://t.co/diqNqaUsDt
New improvement in GraphExp: https://t.co/kEwOwJjXPV, the ability to display multiple edges between 2 nodes + curved edges!🙂 https://t.co/yyzMna8OmQ
C'est important de communiquer sur l 'IA et ses limites et sur la science en général. Je vais faire plus d'efforts dans ce sens https://t.co/FIlPpy8x51
My first impressions on the #CosmosDB graph database, using @apachetinkerpop https://t.co/F0UgPj