# Almacenando datos de Twitter en Neo4j

![png](http://d20tdhwx2i89n1.cloudfront.net/image/upload/t_next_gen_article_large_767/btp7c4imyevfdt9icxlo.jpg)

In [None]:
%load_ext cypher

In [None]:
%%cypher
MATCH (n)
OPTIONAL MATCH (n)-[r]-()
DELETE n,r

In [None]:
%cypher CREATE CONSTRAINT ON (t:Tweet) ASSERT t.id IS UNIQUE

In [None]:
%cypher CREATE CONSTRAINT ON (u:User) ASSERT u.username IS UNIQUE

In [None]:
%cypher CREATE CONSTRAINT ON (h:HashTag) ASSERT h.hashtag IS UNIQUE

![png](./images/Model.png)

In [None]:
from pprintpp import pprint as pp


In [None]:
from py2neo import Graph, Relationship
import json

graph = Graph()

In [None]:
def add_property(obj, json, name):
    if name in json:
        obj[name] = json[name]

In [None]:
def parse_user(user_json):
    user = graph.merge_one("User", "username", user_json['screen_name'])
    
    add_property(user, user_json, 'created_at')
    add_property(user, user_json, 'description')
    add_property(user, user_json, 'favourites_count')
    add_property(user, user_json, 'followers_count')
    add_property(user, user_json, 'friends_count')
    add_property(user, user_json, 'statuses_count')
    add_property(user, user_json, 'time_zone')
    add_property(user, user_json, 'name')
    add_property(user, user_json, 'profile_image_url')
    
    user.push()
    return user

In [None]:
def parse_tweet(tweet_json):
    user = parse_user(tweet_json['user'])
    
    tweet = graph.merge_one("Tweet", "id", tweet_json['id'])
    add_property(tweet, tweet_json, 'created_at')
    add_property(tweet, tweet_json, 'lang')
    add_property(tweet, tweet_json, 'retweet_count')
    add_property(tweet, tweet_json, 'source')
    add_property(tweet, tweet_json, 'text')
    
    tweet.push()
    
    user_tweeted_tweet = Relationship(user, "TWEETED", tweet)
    graph.create_unique(user_tweeted_tweet)
    
    if 'user_mentions' in tweet_json:
        for user_mention_json in tweet_json['user_mentions']:
            user_mencioned = parse_user(user_mention_json)
            tweet_mencioned_user = Relationship(tweet, "MENCIONED", user_mencioned)
            graph.create_unique(tweet_mencioned_user)
 
    if 'entities' in tweet_json:
        for entity in tweet_json['entities']:
            hashtag = graph.merge_one("HashTag", "hashtag", entity)
            tweet_HashTag_hashtag = Relationship(tweet, "HASHTAG", hashtag)
            graph.create_unique(tweet_HashTag_hashtag)

    if 'retweeted_status' in tweet_json:
        user_retweeted = parse_user(tweet_json['retweeted_status']['user'])
        tweet_retweetOf_user = Relationship(tweet, "RETWEET_OF", user_retweeted)
        graph.create_unique(tweet_retweetOf_user)
        
        parse_tweet(tweet_json['retweeted_status'])

In [None]:
def load_file(tweets_data_path):
    tweets_file = open(tweets_data_path, "r")
    for tweet in tweets_file:
        parse_tweet(json.loads(tweet))

In [None]:
load_file('../data/tweets.json')

In [None]:
%%cypher
MATCH p = ((u:User {username : 'couchbase'})-[r:TWEETED]->t)
RETURN u.username, t.text, type(r)
LIMIT 10

In [None]:
%%cypher match (n:HashTag)-[r]-() 
return n.hashtag, count(r) as degree 
order by degree desc
limit 10

In [None]:
%matplotlib inline

In [None]:
results = %%cypher match (n:HashTag)-[r]-()  return n.hashtag as HashTag, count(r) as Degree order by Degree desc limit 10

In [None]:
results.get_dataframe()

In [None]:
results.pie()

In [None]:
results.plot()

In [None]:
results.bar()

In [None]:
results = %cypher match (n)-[r]-() return n, r limit 10
results.draw()

In [None]:
from py2neo import Graph
graph = Graph()
cypher = graph.cypher

In [None]:
query = """
    MATCH (h:HashTag)<-[:HASHTAG]-(:Tweet)-[:HASHTAG]->(HashTag {hashtag:"neo4j"}) 
    WHERE h.hashtag <> "neo4j"
    RETURN h.hashtag AS hashtag, COUNT(*) AS count
    ORDER BY count DESC
    LIMIT 10
"""

results = cypher.execute(query )
print results



In [None]:
results = cypher.execute(
"""
MATCH (u:User)
WHERE exists(u.followers_count)
return distinct u.username, u.followers_count
order by u.followers_count DESC LIMIT 10
""")

print results

In [None]:
type(results)

In [None]:
%%cypher
MATCH (u:User)
WHERE exists(u.followers_count)
return distinct u.username, u.followers_count
order by u.followers_count DESC LIMIT 10

In [None]:
%%cypher
match n
return distinct labels(n)

In [None]:

result = %cypher MATCH (hashtag:HashTag)<-[:HASHTAG]-(tweet:Tweet) \
                 RETURN hashtag.name AS hashtag, COUNT(tweet) AS tweets \
                 ORDER BY tweets DESC LIMIT 5
        
df = result.get_dataframe()
df.head()