Tweets are saved in JSON format ([JavaScript Object Notation](https://www.w3schools.com/js/js_json_intro.asp))
JSON is text, written with JavaScript object notation.

The `json` python module allows to easily import json file into python [Dictonairies](https://docs.python.org/3/tutorial/datastructures.html#dictionaries)



In [None]:
#load tweets 

import json

filename = 'trumpTweets.txt'

tweet_list = []

with open(filename, 'r') as fopen:
    # each line correspond to a tweet
    for line in fopen:
        tweet_list.append(json.loads(line))
        

Let's look at the informations contained in a tweet

In [None]:
# take the first tweet of the list
tweet = tweet_list[2]

In [None]:
# each tweet is a python dictionary
type(tweet)

In [None]:
# all the 'entries' of the dictionary
tweet.keys()

you can find a description of the fields in the Twitter API documentation: https://dev.twitter.com/overview/api/tweets

In [None]:
#creation time
tweet['created_at']

In [None]:
# text of the tweet
print(tweet['text'])

In [None]:
# user info
tweet['user']

In [None]:
# user is itslef a dict
print(type(tweet['user']))

tweet['user']['name']

In [None]:
# unique id of the user
tweet['user']['id']

In [None]:
#is the tweet a retweet?
'retweeted_status' in tweet

In [None]:
if 'retweeted_status' in tweet:
    print(tweet['retweeted_status'])
# the `retweeted_status` is also a tweet dictionary    

In [None]:
# user id and name of the retweeted user?
if 'retweeted_status' in tweet:
    print(tweet['retweeted_status']['user']['id'])
    print(tweet['retweeted_status']['user']['name'])

In [None]:
# is the tweet a reply?
'in_reply_to_user_id' in tweet and tweet['in_reply_to_user_id'] is not None

In [None]:
# 'entities' contains the hashtags, urls and usernames used in the tweet
tweet['entities']

In [None]:
# user id of the mentioned users
for  mention in tweet['entities']['user_mentions']:
    print(mention['id'])

In [None]:
# is the tweet a quote?
'quoted_status' in tweet

# Building the network of interactions

We will use the python module [`NetworkX`](https://networkx.readthedocs.io/en/stable/index.html) to construct and analyze the social network.

A short introduction to networkx: https://github.com/networkx/notebooks


Four types of interactions:
- Retweet
- Quote
- Reply
- Mention

In [None]:
# let's define some functions to extract the interactions from tweets

def getTweetID(tweet):
    """ If properly included, get the ID of the tweet """
    return tweet.get('id')
    
def getUserIDandScreenName(tweet):
    """ If properly included, get the tweet 
        user ID and Screen Name """
    user = tweet.get('user')
    if user is not None:
        uid = user.get('id')
        screen_name = user.get('screen_name')
        return uid, screen_name
    else:
        return (None, None)

def getRetweetedUserIDandSreenName(tweet):
    """ If properly included, get the retweet 
        source user ID and Screen Name"""
    
    retweet = tweet.get('retweeted_status')
    if retweet is not None:
        return getUserIDandScreenName(retweet)
    else:
        return (None, None)
    
def getRepliedUserIDandScreenName(tweet):
    """ If properly included, get the ID and Screen Name 
        of the user the tweet replies to """
    
    reply_id = tweet.get('in_reply_to_user_id')
    reply_screenname = tweet.get('in_reply_to_screen_name')
    return reply_id, reply_screenname
    
def getUserMentionsIDandScreenName(tweet):
    """ If properly included, return a list of IDs and Screen Names tuple
        of all user mentions, including retweeted and replied users """
        
    mentions = []
    entities = tweet.get('entities')
    if entities is not None:
        user_mentions = entities.get('user_mentions')
        for mention in user_mentions:
            mention_id = mention.get('id')
            screen_name = mention.get('screen_name')
            mentions.append((mention_id, screen_name))
    
    return mentions

    
def getQuotedUserIDandScreenName(tweet):
    """ If properly included, get the ID of the user the tweet is quoting"""
    
    quoted_status = tweet.get('quoted_status')
    
    if quoted_status is not None:
        return getUserIDandScreenName(quoted_status)
    else:
        return (None, None)
    
def getAllInteractions(tweet):
    """ Get all the interactions from this tweet
    
        returns : (tweeter_id, tweeter_screenname), list of (interacting_id, interacting_screenname)
    """
    
    # Get the tweeter
    tweeter = getUserIDandScreenName(tweet)
    
    # Nothing to do if we couldn't get the tweeter
    if tweeter[0] is None:
        return (None, None), []
    
    # a python set is a collection of unique items
    # we use a set to avoid duplicated ids
    interacting_users = set()
    
    # Add person they're replying to
    interacting_users.add(getRepliedUserIDandScreenName(tweet))
    
    # Add person they retweeted
    interacting_users.add(getRetweetedUserIDandSreenName(tweet))
    
    # Add person they quoted
    interacting_users.add(getQuotedUserIDandScreenName(tweet))
    
    # Add mentions
    interacting_users.update(getUserMentionsIDandScreenName(tweet))
  
    # remove the tweeter if he is in the set
    interacting_users.discard(tweeter)
    # remove the None case
    interacting_users.discard((None,None))
    
    # Return our tweeter and their influencers
    return tweeter, list(interacting_users)
    


In [None]:
print(getUserIDandScreenName(tweet))

In [None]:
user is not None

Let's build the network

In [None]:
import networkx as nx

# define an empty Directed Graph
G = nx.DiGraph()

# loop over all the tweets and add edges if the tweet include some interactions
for tweet in tweet_list:
    # find all influencers in the tweet
    tweeter, interactions = getAllInteractions(tweet)
    tweeter_id, tweeter_name = tweeter
    
    # add an edge to the Graph for each influencer
    for interaction in interactions:
        interact_id, interact_name = interaction
        # add edges between the two user ids
        G.add_edge(tweeter_id, interact_id)
        # add name as a property to each node
        # with networkX each node is a dictionary 
        G.node[tweeter_id]['name'] = tweeter_name
        G.node[interact_id]['name'] = interact_name
        

In [None]:
G.number_of_nodes()

In [None]:
G.node[472286193]

In [None]:
G.degree(472286193)

In [None]:
# average degree
G.number_of_edges()/G.number_of_nodes()

In [None]:
# maximum degree


max([degree for node, degree in G.in_degree_iter()])

In [None]:
degree_node_list = []
for node in G.nodes_iter():
    degree_node_list.append((node, G.node[node]['name'], G.degree(node)))
    
print(degree_node_list[:10])

# sort the list according the degree in descinding order
degree_node_list = sorted(degree_node_list, key=lambda x:x[2], reverse=True)
print(degree_node_list[:10])

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
components = nx.weakly_connected_components(G)

In [None]:
components

In [None]:
comp_sizes = []
for comp in components:
    comp_sizes.append(len(comp))

In [None]:
hist = plt.hist(comp_sizes, bins=100)

In [None]:
hist = plt.hist(comp_sizes, bins=100, log=True)

In [None]:
components = nx.weakly_connected_components(G)
largest_comp = list(sorted(components, key=lambda x:len(x), reverse=True))[0]

In [None]:
len(largest_comp)

In [None]:
LCC = G.subgraph(largest_comp)

In [None]:
G.number_of_nodes()

In [None]:
LCC.number_of_nodes()

In [None]:
nx.draw_circular(LCC, node_size=1, alpha=0.7, linewidths=None, width=0.2)


### Exercise: do the same for the Graph comprising only retweet, replies, quote and mentions