In [None]:
import pandas as pd
import json
import os
import time

In [None]:
# Import the Twython class
from twython import Twython
import json

# Load credentials from json file
with open("twitter_credentials.json", "r") as file:
    creds = json.load(file)

# Instantiate an object
python_tweets = Twython(creds['CONSUMER_KEY'], creds['CONSUMER_SECRET'])


In [None]:
#username = 'templivs'
username_list  = ['GilbertCollard','dav_dec','Carbongate','bcassoret',
                  'Electroversenet','thinkfree55', 'KlassLib','sauvonsleclimat']
data_path = 'multiusers/'
#get_tweets = python_tweets.get_user_timeline(screen_name = username,  
#                                           count = 200, include_rts = True)
if not os.path.isdir(data_path):
    os.mkdir(data_path)

In [None]:
def fill_retweet_info(tweet_dic,raw_retweet):
    tweet_dic['retweeted_from'].append(raw_retweet['user']['screen_name'])
    if raw_retweet['truncated']:
        full_text = raw_retweet['extended_tweet']['full_text']
    else:
        full_text = raw_retweet['full_text']
    return tweet_dic, full_text

In [None]:
def get_user_tweets(tweet_handle, username,count=200):
    # Collect tweets
    tweets_dic = {'user': [], 'date': [], 'text': [], 'favorite_count': [], 'retweet_count': [],
        'user_mentions': [], 'urls': [], 'hashtags': [], 'geo': [], 'retweeted_from': []}

    for raw_tweet in tweet_handle.get_user_timeline(screen_name = username,  
                                           count = count, include_rts = True, tweet_mode='extended'):
        # Meta data
        tweets_dic['user'].append(raw_tweet['user']['screen_name'])
        ts = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(raw_tweet['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
        tweets_dic['date'].append(ts)
        tweets_dic['favorite_count'].append(raw_tweet['favorite_count'])
        tweets_dic['retweet_count'].append(raw_tweet['retweet_count'])    
        tweets_dic['user_mentions'].append([user['screen_name'] for user in raw_tweet['entities']['user_mentions']])
        tweets_dic['urls'].append([url['url'] for url in raw_tweet['entities']['urls']])
        tweets_dic['hashtags'].append([htg['text'] for htg in raw_tweet['entities']['hashtags']])
        #if raw_tweet['entities']['hashtags']:
        #    print([htg['text'] for htg in raw_tweet['entities']['hashtags']])
        tweets_dic['geo'].append(raw_tweet['geo'])
        
        # Handle text and retweet data
        if raw_tweet['truncated']:
            full_text = raw_tweet['extended_tweet']['full_text']
        else:
            full_text = raw_tweet['full_text']    
        if 'retweeted_status' in raw_tweet:
            tweets_dic, full_text = fill_retweet_info(tweets_dic,raw_tweet['retweeted_status'])
        else:
            tweets_dic['retweeted_from'].append(None)
        tweets_dic['text'].append(full_text)
    return tweets_dic
    

In [None]:
def get_mentions_edges(tweet_df):
    mention_df = pd.DataFrame(columns=['user','mention','weight'])
    row_list = []
    for idx,tweet in tweet_df.iterrows():
        user = tweet['user']
        mentions = tweet['user_mentions']
        hashtags = tweet['hashtags']
        for m in mentions:
            row_list.append({'user':user,'mention': m, 'weight': 1, 'hashtags': hashtags})
    mention_df = pd.DataFrame(row_list)
    if mention_df.empty:
        return None
    # this agg only works with pandas version >= 0.25
    mention_grouped = mention_df.groupby(['user','mention']).agg(weight=('weight',sum),
                                                                 hashtags=('hashtags', sum))#lambda x: list(x)))    
    mention_g_list = mention_df.groupby(['user','mention'])['hashtags'].apply(list)
    mention_grouped.reset_index(level=['user', 'mention'], inplace=True)
    return mention_grouped,mention_g_list

In [None]:
def collect_user_mention(username,python_tweets,data_path):
    tweets_dic = get_user_tweets(python_tweets,username,count=200)
    tweet_df = pd.DataFrame(tweets_dic)
    mention_grouped,mention_g_list = get_mentions_edges(tweet_df)
    return mention_grouped, mention_g_list

In [None]:
def create_user_edgelist(python_tweets, data_path, username, thres=3):
    # initial user
    print('Processing',username)
    try:
        mention_grouped,mgl = collect_user_mention(username,python_tweets,data_path)
    except:
        print('exception catched on user {} !!!!!!!!!!!!'.format(username))
        return
    mention_grouped.to_csv(data_path + username + '_mentions.csv')
    print('First user done')

    # Threshold for number of mentions
    print('Using threshold:',thres)

    for idx,row in mention_grouped.iterrows():
        print('processing mention',idx)
        mention_name = row['mention']
        if row['weight'] < thres:
            continue
        try:
            mention_grouped,mgl = collect_user_mention(mention_name,python_tweets,data_path)
        except:
            print('exception catched on user {} !!!!!!!!!!!!'.format(username))
            continue
        if mention_grouped is not None:
            mentionfilename = data_path + mention_name + '_mentions' +'_t' +str(thres)+'.csv'
            print('Writing',mentionfilename)
            mention_grouped.to_csv(mentionfilename)

In [None]:
#data_path = 'GBR_data/'
#username_list = ['GBR_Data']
thres = 3
for user in username_list:
    create_user_edgelist(python_tweets, data_path, user, thres=thres)

In [None]:
import glob

edge_df = pd.DataFrame()
for filename in glob.glob(data_path + '*_mentions' +'_t' +str(thres)+ '.csv'):
    print(filename)
    new_edge_df = pd.read_csv(filename)
    edge_df = edge_df.append(new_edge_df)
    

In [None]:
edge_df[edge_df['hashtags'].apply(lambda x : len(x.split()))>1]

In [None]:
import networkx as nx
G = nx.from_pandas_edgelist(edge_df,source='user',target='mention', edge_attr=['weight','hashtags'])
print('Nb of nodes:',G.number_of_nodes())
# Drop
remove = [node for node,degree in dict(G.degree()).items() if degree < 4]
G.remove_nodes_from(remove)
print('Nb of nodes after removing less connected nodes:',G.number_of_nodes())

In [None]:
graphname = 'multiusersgraph'
#graphname = 'GBRgraph'

In [None]:
# Save the graph
graphfilename = data_path + graphname + '_t' + str(thres) +'_graph.gexf'
nx.write_gexf(G,graphfilename)
print('Wrote',graphfilename)

In [None]:
edge_df