# TP1 - Redis

## Conexión a nuestra Base de Datos Redis

In [1]:
import redis

In [2]:
redis_db = redis.Redis(host='127.0.0.1', port=6379, password='')

In [3]:
redis_db.flushdb()

True

## Implementación

### 1. Diseño de la base de datos

Funciones de apoyo

In [4]:
def get_user_id_or_name(user_name_or_id):
    return redis_db.hget('users', user_name_or_id)

Funciones requeridas por el ejercicio

In [5]:
def nuevo_usuario(username):
    if get_user_id_or_name(username) is None:
        userid = redis_db.incr('user_id')
        redis_db.hmset('users', {
            username: userid,
            userid: username
        })

In [6]:
def nuevo_follower(followed_username, follower_username, timestamp):
    
    followed_id = get_user_id_or_name(followed_username)
    follower_id = get_user_id_or_name(follower_username)
    
    if followed_id is None or follower_id is None:
        return False
    
    followed_id = followed_id.decode('utf-8')
    follower_id = follower_id.decode('utf-8')
    
    redis_db.zadd('user:' + followed_id + ':followers', {follower_id: timestamp})
    
    return True

In [7]:
def nuevo_following(follower_username, followed_username, timestamp):
    
    followed_id = get_user_id_or_name(followed_username)
    follower_id = get_user_id_or_name(follower_username)
    
    if followed_id is None or follower_id is None:
        return False
    
    followed_id = followed_id.decode('utf-8')
    follower_id = follower_id.decode('utf-8')
    
    redis_db.zadd('user:' + follower_id + ':following', {followed_id: timestamp})
    
    return True

In [8]:
def seguir(follower, followed, timestamp):
    return nuevo_follower(followed, follower, timestamp) and nuevo_following(follower, followed, timestamp)

In [9]:
def nuevo_post(username, message, timestamp):
    postid = str(redis_db.incr('post_id'))
    
    userid = get_user_id_or_name(username)
    
    if userid is None:
        return False
    
    userid = userid.decode('utf-8')
    
    redis_db.hmset('post:' + postid, {
        'timestamp': timestamp,
        'userid': userid,
        'message': message
    })
    
    redis_db.set('post_' + postid + '_timeline', timestamp)
    
    redis_db.sadd('user:' + userid + ':posts', postid)
    
    follower_ids = redis_db.zrange('user:' + userid + ':followers', 0, -1)
    
    for follower_id in follower_ids:
        follower_id = str(follower_id.decode('utf-8'))
        redis_db.sadd('user:' + follower_id + ':posts', postid)
    
    

### 2. Conjunto de datos

In [10]:
import pandas as pd
import numpy as np
import os

In [11]:
tweets = pd.read_csv(
    filepath_or_buffer=os.path.join('..', 'data', 'twitter_sample.csv'), 
    header=0, 
    sep=',', 
    quotechar='"', 
    encoding='utf-8'
)

relations = pd.read_csv(
    filepath_or_buffer=os.path.join('..', 'data','relations.csv'), 
    header=0, 
    sep=',', 
    encoding='utf-8'
)

En primera instancia, vamos a crear todos los usuarios, puesto que asumimos que todos ya existían

In [12]:
tweets['User'].unique()

array(['andyglittle', 'afparron', 'drshahrul80', 'karin_stowell',
       'cathcooney', 'dkalnow', 'alkhalilkouma', 'seers_helen',
       'hanyshita', nan, 'roxanefeller', 'animalhealthEU', 'charleskod'],
      dtype=object)

In [13]:
tweets['User'].unique().size

13

In [14]:
tweets[tweets['User'].isna()]

Unnamed: 0,User,Post_Time,Tweet_Content,Unnamed: 3
14,,02 Jul 2019 20:56:39,@stemagno74 @wcrfint @macmillancancer @NIHRres...,


In [15]:
tweets.drop(index=14, inplace=True)

In [16]:
def create_users_from_tweets(tweets):
    users = tweets['User'].unique()
    for user in users:
        nuevo_usuario(user)

In [17]:
create_users_from_tweets(tweets)

In [18]:
redis_db.hget('users', 'karin_stowell').decode('utf-8')

'4'

In [19]:
relations['User'].unique().size

12

In [20]:
relations['Follows'].unique().size

12

In [21]:
from datetime import datetime as dt

date_format = '%d %b %Y %H:%M:%S'

In [22]:
def from_date_to_timestamp(date):
    return int(dt.strptime(date, date_format).timestamp())

In [23]:
def from_timestamp_to_date(timestamp):
    return dt.fromtimestamp(timestamp).strftime(date_format)

In [24]:
def create_relations(relations):
    for i, relation in relations.iterrows():
        seguir(relation['User'], relation['Follows'], from_date_to_timestamp(relation['Following_Time']))

In [25]:
create_relations(relations)

In [26]:
def create_posts(tweets):
    for i, tweet in tweets.iterrows():
        nuevo_post(tweet['User'], tweet['Tweet_Content'], from_date_to_timestamp(tweet['Post_Time']))

In [27]:
create_posts(tweets)

### 3. Pruebas

In [28]:
def obtener_followers(username):
    iduser = get_user_id_or_name(username)
    follower_ids = redis_db.zscan('user:' + iduser.decode('utf-8') + ':followers')[1]
    print('El usuario ' + username + ' tiene ' + str(len(follower_ids)) + ' followers:\n')
    for follower in follower_ids:
        print(get_user_id_or_name(follower[0].decode('utf-8')).decode('utf-8'), 'empezó a seguirle el', from_timestamp_to_date(follower[1]))
    

In [29]:
obtener_followers('drshahrul80')

El usuario drshahrul80 tiene 3 followers:

animalhealthEU empezó a seguirle el 19 Jul 2019 14:59:55
alkhalilkouma empezó a seguirle el 01 Aug 2019 12:17:59
karin_stowell empezó a seguirle el 01 Aug 2019 21:58:25


In [30]:
def obtener_following(username):
    iduser = get_user_id_or_name(username)
    following_ids = redis_db.zscan('user:' + iduser.decode('utf-8') + ':following')[1]
    print('El usuario ' + username + ' sigue a ' + str(len(following_ids)) + ' usuarios:\n')
    for following in following_ids:
        print('A', get_user_id_or_name(following[0].decode('utf-8')).decode('utf-8'), 'empezó a seguirle el', from_timestamp_to_date(following[1]))
    

In [31]:
obtener_following('alkhalilkouma')

El usuario alkhalilkouma sigue a 2 usuarios:

A animalhealthEU empezó a seguirle el 01 Jul 2019 19:25:03
A drshahrul80 empezó a seguirle el 01 Aug 2019 12:17:59


In [64]:
def obtener_timeline(username, tweets_propios=True):
    iduser = get_user_id_or_name(username).decode('utf-8')
    posts = redis_db.sort('user:' + iduser + ':posts', by='post_*_timeline', desc=True)
    for post in posts:
        id_post = post.decode('utf-8')
        id_user_post = redis_db.hget('post:' + str(id_post), 'userid').decode('utf-8')
        name_user_post = str(get_user_id_or_name(id_user_post).decode('utf-8'))
        timestamp_post = str(from_timestamp_to_date(int(redis_db.hget('post:' + str(id_post), 'timestamp').decode('utf-8'))))
        message_post = redis_db.hget('post:' + str(id_post), 'message').decode('utf-8')
        if(id_user_post == iduser or not tweets_propios):
            print('- [' + timestamp_post + ']' + 
                  ' El usuario ' + name_user_post + 
                  ' twitteó: ' + message_post)
    

In [65]:
obtener_timeline('alkhalilkouma', True)

- [28 Aug 2019 12:17:59] El usuario alkhalilkouma twitteó: Zoonosen sind ein Risiko für Mensch und Tier❗️💉🐶🐱🐷🐮🐔
#Impfung #Tiergesundheit #AnimalHealthMatters #PetsareFamily #MorethanMedicine #zoonoses #Zoonosen https://t.co/uM5e2Asd5K
- [22 Aug 2019 12:12:50] El usuario alkhalilkouma twitteó: @somedocs - Sharing experiences &amp; medical knowledge which should be available to all #FOAMed #MedEd 
- Being a good human (ie. Conveying values of respect, trust &amp; honesty) #morethanmedicine 
- Tips &amp; tricks in #education to better help whatever your profession is

@EMinMiami (👈 this guy)
- [25 Jul 2019 09:35:25] El usuario alkhalilkouma twitteó: Proud carer of three stray #dogs who are now part of the family, Uwe Mucke from @Bayer4Animals, shares his story of more than 30 years working in #animalhealth.
#MorethanMedicine:
https://t.co/aAO0qzLtUq
- [21 Jul 2019 13:40:46] El usuario alkhalilkouma twitteó: Some diseases that animals are affected by, can be a risk for people too ⚠️

➡️ Ke