In [6]:
import pandas as pd
import json
import os
import time


In [2]:
# Import the Twython class
from twython import Twython
import json

# Load credentials from json file
with open("twitter_credentials.json", "r") as file:
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])


In [14]:
#username = 'templivs'
username = 'GilbertCollard'
data_path = username + '/'
#get_tweets = python_tweets.get_user_timeline(screen_name = username,  
#                                           count = 200, include_rts = True)
if not os.path.isdir(data_path):
    os.mkdir(data_path)

In [15]:
def fill_retweet_info(tweet_dic,raw_retweet):
    tweet_dic['retweeted_from'].append(raw_retweet['user']['screen_name'])
    if raw_retweet['truncated']:
        full_text = raw_retweet['extended_tweet']['full_text']
    else:
        full_text = raw_retweet['full_text']
    return tweet_dic, full_text

In [16]:
def get_user_tweets(tweet_handle, username,count=200):
    # Collect tweets
    tweets_dic = {'user': [], 'date': [], 'text': [], 'favorite_count': [], 'retweet_count': [],
        'user_mentions': [], 'urls': [], 'geo': [], 'retweeted_from': []}

    for raw_tweet in tweet_handle.get_user_timeline(screen_name = username,  
                                           count = count, include_rts = True, tweet_mode='extended'):
        # Meta data
        tweets_dic['user'].append(raw_tweet['user']['screen_name'])
        ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(raw_tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
        tweets_dic['date'].append(ts)
        tweets_dic['favorite_count'].append(raw_tweet['favorite_count'])
        tweets_dic['retweet_count'].append(raw_tweet['retweet_count'])    
        tweets_dic['user_mentions'].append([user['screen_name'] for user in raw_tweet['entities']['user_mentions']])
        tweets_dic['urls'].append([url['url'] for url in raw_tweet['entities']['urls']])
        tweets_dic['geo'].append(raw_tweet['geo'])
        
        # Handle text and retweet data
        if raw_tweet['truncated']:
            full_text = raw_tweet['extended_tweet']['full_text']
        else:
            full_text = raw_tweet['full_text']    
        if 'retweeted_status' in raw_tweet:
            tweets_dic, full_text = fill_retweet_info(tweets_dic,raw_tweet['retweeted_status'])
        else:
            tweets_dic['retweeted_from'].append(None)
        tweets_dic['text'].append(full_text)
    return tweets_dic
    

In [17]:
def get_mentions(tweet_df):
    mention_dic = {}
    for idx,tweet in tweet_df.iterrows():
        mentions = tweet['user_mentions']
        for m in mentions:
            if m in mention_dic:
                mention_dic[m] += 1
            else:
                mention_dic[m] = 1
    return mention_dic

In [18]:
def get_mentions_graph(tweet_df):
    mention_df = pd.DataFrame(columns=['user','mention','weight'])
    row_list = []
    for idx,tweet in tweet_df.iterrows():
        user = tweet['user']
        mentions = tweet['user_mentions']
        for m in mentions:
            row_list.append({'user':user,'mention': m,'weight': 1})
    mention_df = pd.DataFrame(row_list)
    if mention_df.empty:
        return None
    mention_grouped = mention_df.groupby(['user','mention']).sum()
    mention_grouped.reset_index(level=['user', 'mention'], inplace=True)
    return mention_grouped

In [19]:
# initial user
tweets_dic = get_user_tweets(python_tweets,username,count=200)
tweet_df = pd.DataFrame(tweets_dic)
mention_grouped = get_mentions_graph(tweet_df)
mention_grouped.to_csv(data_path + username + '_mentions.csv')
print('First user done')

# Threshold for number of mentions
thres = 3
print('Using threshold:',thres)


for idx,row in mention_grouped.iterrows():
    print('processing mention',idx)
    mention_name = row['mention']
    if row['weight'] < thres:
        continue
    tweets_dic = get_user_tweets(python_tweets,mention_name,count=200)
    tweet_df = pd.DataFrame(tweets_dic)
    mention_grouped = get_mentions_graph(tweet_df)
    if mention_grouped is not None:
        mentionfilename = data_path + mention_name + '_mentions' +'_t' +str(thres)+'.csv'
        print('Writing',mentionfilename)
        mention_grouped.to_csv(mentionfilename)

First user done
Using threshold: 3
processing mention 0
processing mention 1
processing mention 2
processing mention 3
processing mention 4
processing mention 5
processing mention 6
processing mention 7
processing mention 8
processing mention 9
Writing GilbertCollard/ERichoufftz_mentions_t3.csv
processing mention 10
processing mention 11
processing mention 12
processing mention 13
processing mention 14
processing mention 15
processing mention 16
processing mention 17
processing mention 18
processing mention 19
Writing GilbertCollard/LaMatinaleLCI_mentions_t3.csv
processing mention 20
processing mention 21
processing mention 22
Writing GilbertCollard/MLP_officiel_mentions_t3.csv
processing mention 23
processing mention 24
processing mention 25
Writing GilbertCollard/NMeizonnet_mentions_t3.csv
processing mention 26
processing mention 27
processing mention 28
Writing GilbertCollard/RNational_off_mentions_t3.csv
processing mention 29
processing mention 30
Writing GilbertCollard/SoMabrouk_m

In [20]:
import glob

edge_df = pd.DataFrame()
for filename in glob.glob(data_path + '*_mentions' +'_t' +str(thres)+ '.csv'):
    print(filename)
    new_edge_df = pd.read_csv(filename)
    edge_df = edge_df.append(new_edge_df)
    

GilbertCollard/SoMabrouk_mentions_t3.csv
GilbertCollard/MLP_officiel_mentions_t3.csv
GilbertCollard/LaMatinaleLCI_mentions_t3.csv
GilbertCollard/RNational_off_mentions_t3.csv
GilbertCollard/NMeizonnet_mentions_t3.csv
GilbertCollard/ERichoufftz_mentions_t3.csv


In [21]:
import networkx as nx
G = nx.from_pandas_edgelist(edge_df,source='user',target='mention', edge_attr='weight')
print('Nb of nodes:',G.number_of_nodes())
# Drop
remove = [node for node,degree in dict(G.degree()).items() if degree < 2]
G.remove_nodes_from(remove)
print('Nb of nodes after removing less connected nodes:',G.number_of_nodes())

Nb of nodes: 283
Nb of nodes after removing less connected nodes: 62


In [22]:
# Save the graph
graphfilename = data_path + username + '_t' + str(thres) +'_graph.gexf'
nx.write_gexf(G,graphfilename)
print('Wrote',graphfilename)

Wrote GilbertCollard/GilbertCollard_t3_graph.gexf
