In [1]:
import tweepy
from py2neo import Graph
import re
import json

# Neo4j

## Connexion

In [2]:
uri = "http://neo4j:7474" 
password="esgi_password"
graph = Graph(uri,password=password)
graph

<Graph database=<Database uri='http://neo4j:7474' secure=False user_agent='py2neo/4.2.0 urllib3/1.24.2 Python/3.7.3-final-0 (linux)'> name='data'>

## Suppresion du travail precedent

In [3]:
graph.run("MATCH (n) DETACH DELETE n")

<py2neo.database.Cursor at 0x7fa4ee6d0588>

In [4]:
graph.run("MATCH (n) RETURN n LIMIT 25").data()

[]

# Twitter api

### Token

In [5]:
consumer_key = "XXX"
consumer_secret = "XXXX"
access_token = "XXXX"
access_token_secret = "XXXX"


### Authentification

In [6]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)


In [7]:
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

### Fonctions twitter

In [8]:
def user_info(id_or_name, links=True):
    try:
        user = api.get_user(id_or_name)
        direct_info = {
            "screen_name": user.screen_name,
            "name": user.name,
            "id": user.id}
    except tweepy.TweepError as e:
        direct_info = {
            "screen_name": 'unknown',
            "name": 'unknown',
            "id": 'unknown'}
    
    if links:
        folower_ids = api.followers_ids(user.id)
        friend_ids = api.friends_ids(user.id)
        return direct_info, folower_ids, friend_ids
    else:
        return direct_info

#direct_info, folower_ids, friend_ids = user_info("dataswati", links=True)

In [10]:
def tweet_info(tweet, text=True):
    if text :
        return {
            "id": tweet.id,
            "text": tweet.full_text,
            "user_id": tweet.user.id,
            "user_name": tweet.user.name,
            "user_screen_name": tweet.user.screen_name,
            "hashtags": tweet.entities['hashtags'],
            "mentions": tweet.entities['user_mentions']
        }
    
    else :
        return {
            "id": tweet.id,
            "user_id": tweet.user.id,
            "user_name": tweet.user.name,
            "user_screen_name": tweet.user.screen_name}


In [11]:
def retweets(tweet):
    retweets = tweet.retweets()
    return [tweet_info(retweet, text=False) for retweet in retweets]


In [12]:
def user_timeline(id, look_retweet=False):
    tweets = api.user_timeline(id, count=100, tweet_mode="extended")
    if look_retweet:
        return [tweet_info(tweet) for tweet in tweets], [retweets(tweet) for tweet in tweets]
    else :
        return [tweet_info(tweet) for tweet in tweets]

#user_timeline(direct_info["id"], look_retweet=False)

In [13]:
def user_mentioned(id, look_retweet=False):
    tweets = api.search(q="Dataswati", count=100,tweet_mode="extended")
    if look_retweet:
        return [tweet_info(tweet) for tweet in tweets], [retweets(tweet) for tweet in tweets]
    else:
        return [tweet_info(tweet) for tweet in tweets]


### Fonction construction graphe

In [14]:
def graph_from_user(user):
    
    list_request = []
    
    # Info utilisateur
    direct_info, folower_ids, friend_ids = user_info(user)
    
    user_node = "(u:USER{name:'%s'})" % direct_info["name"]
    request = "MERGE %s" % user_node
    request += "\n SET u.id=%s" % direct_info["id"]
    request += "\n SET u.screen_name='%s'" % direct_info["screen_name"]
    graph.run(request)
    list_request.append(request)
    
    # Recupere la timeline
    timeline_tweets = user_timeline(direct_info["id"], look_retweet=False)
    mentioned_tweets = user_mentioned(direct_info["id"], look_retweet=False)
    
    # Recupere chaque tweet & cree les liens entre user et tweet
    for tweet in timeline_tweets:
        request = "MATCH %s"%user_node
        request += "\n MERGE (t:TWEET {id:%s})"%tweet["id"]
        request += "\n MERGE (u)-[:TWEET]->(t)"
        request += "\n SET t.text=\"%s\""%tweet["text"].replace("\"","\\\"")
        
        # Recherche les hashtags & cree les liens entre tweet et hashtag
        for i, hashtag in enumerate(tweet["hashtags"]):
            request += "MERGE (h%s:HASHTAG {name: '%s'})"%(i, hashtag['text'])
            request += "\n MERGE (t)-[:HASHTAG]-(h%s)"%i
            
        # Recherche les mentions & cree les liens entre tweet et mention (il manque l'id de l'user)
        for i, mention in enumerate(tweet["mentions"]):
            request += "\n MERGE (u%s:USER {screen_name:'%s', name:\"%s\", id:%s})" % (
                i, mention['screen_name'], mention['name'], mention['id'])
            request += "\n MERGE (t)-[:MENTION]-(u%s)" % i
            
        graph.run(request)    
        list_request.append(request)
    
    # Même processus sur les mentions
    for tweet in mentioned_tweets:
        request = " MERGE (u:USER {id:%s, name:'%s', screen_name:'%s'})\n MERGE (t:TWEET {id:%s})" % (
            tweet["user_id"], tweet["user_name"], tweet["user_screen_name"], tweet["id"])
        request += "\n MERGE (u)-[:TWEET]-(t)"
        request += "\n SET t.text=\"%s\""%tweet["text"].replace("\"","\\\"")
        
        for i, hashtag in enumerate(tweet["hashtags"]):
            request += "MERGE (h%s:HASHTAG {name: '%s'})"%(i, hashtag['text'])
            request += "\n MERGE (t)-[:HASHTAG]-(h%s)"%i
            
        for i, mention in enumerate(tweet["mentions"]):
            request += "\n MERGE (u%s:USER {screen_name:'%s', name:\"%s\", id:%s})" % (
                i, mention['screen_name'], mention['name'], mention['id'])
            request += "\n MERGE (t)-[:MENTION]-(u%s)" % i
            
        graph.run(request)
        list_request.append(request)

    
    # Cree lien entre user et followers
    for folower_id in folower_ids:
        direct_info_temp = user_info(folower_id, links=False)
        request = "MATCH %s \n" % user_node
        request += "MERGE (u1:USER {id:%s, screen_name:'%s', name:\"%s\"})" % (
            folower_id, direct_info_temp['screen_name'], direct_info_temp['name'])
        request += "MERGE (u1)-[:FOLLOW]->(u)"
        graph.run(request)    
        list_request.append(request)

    for friend_id in friend_ids:
        direct_info_temp = user_info(friend_ids, links=False)
        request = "MATCH %s \n"%user_node
        request += "MERGE (u1:USER {id:%s, screen_name:'%s', name:\"%s\"})" % (
            friend_id, direct_info_temp['screen_name'], direct_info_temp['name'])
        request += "MERGE (u1)-[:FOLLOW]->(u)"
        graph.run(request)
        list_request.append(request)
    
    request = "MATCH (n) where n.name='unknown' DETACH DELETE (n)"
    graph.run(request)
    list_request.append(request)
    
    return list_request
    

In [None]:
#graph.run("MATCH (n) DETACH DELETE n")

list_request = graph_from_user("dataswati")


In [None]:
with open('../data/list_request_neo4j.txt', 'w') as outfile:
    json.dump(list_request, outfile)


In [None]:
with open('../data/list_request_neo4j.txt') as json_data:
    list_request = json.load(json_data)


In [None]:
graph.run("MATCH (n) DETACH DELETE n")
for request in list_request:
    graph.run(request)