# Marking interesting news


In [1]:
%load_ext autoreload
%autoreload 2
from mongoengine import connect
from tweepyrate import create_apps
from hate_collector.models import Tweet

client = connect("hatespeech-news")
db = client["hatespeech-news"]

We look for tweets that are not replies.

In [2]:
screen_names = [t[1:] for t in db.tweet.distinct('query') if t is not None]
screen_names

['LANACION',
 'abc_es',
 'clarincom',
 'cronica',
 'elmundoes',
 'elpaisuy',
 'infobae',
 'latercera',
 'lavanguardia',
 'perfilcom']

In [3]:
tweets = Tweet.objects(in_reply_to_status_id=None, user__screen_name__in=screen_names)

print(f"There are {tweets.count() / 1e3:.2f}K that are not replies and come from the official news accounts = {['@' + name for name in screen_names]}")

There are 15.37K that are not replies and come from the official news accounts = ['@LANACION', '@abc_es', '@clarincom', '@cronica', '@elmundoes', '@elpaisuy', '@infobae', '@latercera', '@lavanguardia', '@perfilcom']


## News and its replies

In [64]:
tweets_and_replies = db.tweet.aggregate([ 

    {
        "$match": {
            "checked": False,
            "interesting": False,
            "user.screen_name": {"$in": screen_names },
            "in_reply_to_status_id": None
        }
    },
    { 
        "$lookup": {   
            "from": "tweet",   
            "localField": "_id",   
            "foreignField": "in_reply_to_status_id", 
            "as": "replies"
        }
    },
    {
        "$match": {
            "$expr": {"$gt": [{"$size": "$replies"}, 5]},
            #"replies": { "$gt": {"$size": 5 } },
        }
    },
    { 
        "$sample": { 
            "size": 100,
        } 
    }
])


In [65]:
tweets_and_replies = list(tweets_and_replies)
current_idx = 0

In [66]:
from pprint import pprint as ppr

def show_tweet(tweet):
    print(f"{'*' * 80} \n" * 4)
    print(f"Tweet id: {tweet['_id']}")
    print(f"Interesting? {tweet['interesting']}")
    print(f"Checked ? {tweet['checked']}")
    print(tweet["created_at"], "\n\n")
    
    
    print(f"@{tweet['user']['screen_name']}")
    print(tweet["text"])
    
    print("\nRespuestas:")
    
    for j, reply in enumerate(tweet["replies"]):
        print(f"{reply['_id']:<14} -- {j+1:<3}- {reply['text']}")
        
def set_interesting(tweet):                                                                           
    tweet["interesting"] = True
    tweet["checked"] = True
    
    db.tweet.update_one(
        {"_id": tweet["_id"]},
        {"$set": {"interesting": True, "checked": True}}
    )
    
def check(tweet):
    tweet["interesting"] = False
    tweet["checked"] = True
    
    db.tweet.update_one(
        {"_id": tweet["_id"]},
        {"$set": {"interesting": False, "checked": True}}
    )
    

In [67]:
print(f"Hay {len(tweets_and_replies)} tweets para chequear")

Hay 100 tweets para chequear


In [68]:
from IPython.display import display, clear_output
import ipywidgets as widgets

interesting = widgets.Button(description="Guardar")
not_interesting = widgets.Button(description="No guardar")
output = widgets.Output()

def save_tweet(b):
    global tweet
    global current_idx
    output.clear_output()
    with output:
        set_interesting(tweet)
        current_idx+=1
        tweet = tweets_and_replies[current_idx]
        show_tweet(tweet)

def skip_tweet(b):
    global tweet
    global current_idx
    output.clear_output()
    with output:
        check(tweet)
        current_idx+=1
        tweet = tweets_and_replies[current_idx]
        show_tweet(tweet)

buttons = widgets.HBox([interesting, not_interesting])

def first_show(b):
    with output:
        tweet = tweets_and_replies[current_idx]
        show_tweet(tweet)


buttons.on_displayed(first_show)
interesting.on_click(save_tweet)
not_interesting.on_click(skip_tweet)


display(buttons, output)




HBox(children=(Button(description='Guardar', style=ButtonStyle()), Button(description='No guardar', style=Butt…

Output()