In [13]:
from scipy.linalg import norm
from numba import jit
import random
import multiprocessing
import re
from operator import itemgetter

import logging
from collections import defaultdict
from functools import lru_cache
from pathlib import Path
from numba import jit
import numpy as np

import spacy
from bson.objectid import ObjectId
from gensim.models import KeyedVectors
from nltk.corpus import stopwords
from pymongo import MongoClient
from tqdm import tqdm

logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.INFO)
_info = logging.info

client = MongoClient('mongodb://localhost:27017')
db = client.twitter_news
nlp = spacy.load('en_core_web_sm', tagger=False, entity=False, matcher=False)

def hashtag_pipe(doc):
    merged_hashtag = False
    while True:
        for token_index, token in enumerate(doc):
            if token.text == '#':
                if token.head is not None:
                    start_index = token.idx
                    end_index = start_index + len(token.head.text) + 1
                    if doc.merge(start_index, end_index) is not None:
                        merged_hashtag = True
                        break
        if not merged_hashtag:
            break
        merged_hashtag = False
    return doc


nlp.add_pipe(hashtag_pipe)

In [14]:
total_events = 3


@lru_cache(maxsize=total_events)
def get_representatives(event_id):
    _info("getting representatives")
    representatives = db.representatives.find({'event': ObjectId(event_id)})
    return list(representatives)


@lru_cache(maxsize=total_events)
def get_topics(event_id):
    _info("getting topics")
    topics = list(db.topics.find({'event': ObjectId(event_id)}))
    comodin = None
    for t in topics:
        if t['topic_name'] == "Non relevant":
            comodin = t
            topics.remove(t)
            break
    return topics, comodin


@lru_cache(maxsize=1)
def get_events():
    _info("getting events")
    events = db.events.find()
    return list(events)


@lru_cache(maxsize=1)
def get_tweets(a=None):
    _info('getting all tweets')
    all_tweets = db.tweets.find()
    return list(all_tweets)


@lru_cache(maxsize=3)
def get_vectors(path):
    _info(f"loading fasttext vectors from {path}")
    word_vectors = KeyedVectors.load_word2vec_format(path)
    return word_vectors


@lru_cache(maxsize=2**30)
def sim(tokens_a, tokens_b):
    return ft_comp.n_similarity(tokens_a, tokens_b)


def mmr(docs, query, l):
    def mmr_score(tweet):
        return l * sim(docs[tweet], query) - \
               (1 - l) * max([sim(docs[tweet], docs[y]) for y in set(selected) - {tweet}] or [0])

    L = np.array([[l, 0], [0, l - 1]])

    def score(tweet):
        s1 = sim(docs[tweet], query)
        s2 = np.max(np.array([sim(docs[tweet], docs[y]) for y in set(selected) - {tweet}] or [0]))

        return L.dot(np.array([s1, s2])).sum()

    selected = set()
    while selected != set(docs):
        remaining = list(set(docs) - selected)
        next_selected = max(remaining, key=mmr_score)
        # next_selected = remaining[np.argmax([score(t) for t in remaining])]

        # next_selected = None
        # max_score = 0
        #
        # for _t in remaining:
        #     score = l * sim(docs[_t], query) - \
        #             (1 - l) * max([sim(docs[_t], docs[y]) for y in set(selected) - {_t}] or [0])
        #     if score > max_score:
        #         max_score = score
        #         next_selected = _t

        selected.add(next_selected)
        yield next_selected, ' '.join(list(docs[next_selected]))


@lru_cache(maxsize=total_events)
def process_tweets(event_id):
    all_tweets = get_tweets()
    representatives = get_representatives(event_id)

    _info("processing tweets")

    # rep_tweet: repr_id => tweet
    rep_tweet = dict()
    for t in tqdm(all_tweets):
        rep_tweet[t['representative']] = t

    # repr_ids: {repr_id} // this event
    repr_ids = set([r['_id'] for r in representatives])

    # tweets_this_event: [tweet]
    tweets_this_event_ = [t for r, t in rep_tweet.items() if r in repr_ids]
    tweets_this_event = []
    
    # filter out tweets wo expanded urls
    for t in tweets_this_event_:
        e_u = t['expanded_urls']
        if all(u is not None for u in e_u):
            tweets_this_event.append(t)    

    tweets_tokens = dict()
    all_tokens = set()
    for tweet, doc in tqdm(zip(tweets_this_event, nlp.pipe([_t['text'] for _t in tweets_this_event],
                                                           n_threads=8)),
                           total=len(tweets_this_event)):

        tokens = frozenset([token.lower_
                            for token in doc
                            if token.lower_ not in stopwords.words('english') and token.lower_ in ft_comp])

        if tokens and tokens not in all_tokens:
            tweets_tokens[str(tweet['_id'])] = tokens
            all_tokens.add(tokens)

    return tweets_tokens


def expand_query(topics):
    # query expansion 
    topics_this_event = []
    for topic in topics:
        new_words = set()
        for keyword in topic:
            new_words |= set([word for word, _ in ft_comp.most_similar(keyword, topn=25)])
        new_topic = set(topic) | new_words
        topics_this_event.append(new_topic)
    return topics_this_event

In [15]:
path = Path('/home/mquezada/anchor-text-twitter/data/ft_alltweets_model.vec')
ft_comp = get_vectors(path.as_posix())
data_path = Path('/home/mquezada/tweet_topics/')

2018-07-06 16:11:54,528 : loading fasttext vectors from /home/mquezada/anchor-text-twitter/data/ft_alltweets_model.vec
2018-07-06 16:11:54,532 : loading projection weights from /home/mquezada/anchor-text-twitter/data/ft_alltweets_model.vec
2018-07-06 16:12:55,043 : loaded (1076139, 100) matrix from /home/mquezada/anchor-text-twitter/data/ft_alltweets_model.vec


In [16]:
#### BM25 RELEVANCE RANKING

events = get_events()
topics = list()

for event in events:
    topics.append([frozenset(topic['keywords'])
                   for topic in get_topics(event['_id'])[0]])

for EVENT_NO in (0, 1, 2):
    event = events[EVENT_NO]
    print(event['event_name'])
    event_id = event['_id']
        
    _info("expanding queries")
    
    # query expansion        
    topics_this_event = expand_query(topics[EVENT_NO])
    
    _info("processing tweets")
    tweet_tokens = process_tweets(event_id)

    _info("dictionaries for bm25")
    query_terms = set()
    for topic in topics_this_event:
        for term in topic:
            query_terms.add(term)
            
    ##### f(q_i, D)
    query_freqs = defaultdict(dict)

    ##### avgdl
    avgdl = 0

    ###### n(q_i)
    n_q_i = defaultdict(int)

    ###### |D|
    sizeof_D = len(tweet_tokens)

    for tweet_id, tokens in tqdm(tweet_tokens.items(), total=sizeof_D):
        for term in query_terms:
            if term in tokens:
                query_freqs[tweet_id][term] = 1
                n_q_i[term] += 1
        avgdl += len(tokens)

    avgdl = avgdl / sizeof_D
    
    k_1 = 1.2
    b = 0.75
    C1 = k_1 * (1 - b + b * (sizeof_D / avgdl))  # 634.7016801700094
    C2 = k_1 + 1  # 2.2

    def IDF(term):
        return np.log((sizeof_D - n_q_i[term] + .5) / (n_q_i[term] + .5))

    def BM25(query, tweet_id):
        score = 0
        for q_i in query:
            f_q_i = query_freqs[tweet_id].get(q_i, 0)

            score += IDF(q_i) * (f_q_i * C2) / (f_q_i + C1)
        return score
    
    def compute_bm25(query):
        scores_q_i = dict()
        for tweet_id, doc in tqdm(tweet_tokens.items(), total=sizeof_D):
            score = BM25(query, tweet_id)
            if score > 0:
                scores_q_i[tweet_id] = score

        return scores_q_i
    
    n_topics = len(topics_this_event)

    print("computing bm25 for", n_topics, "topics")
    pool = multiprocessing.Pool(n_topics)
    topic_scores = pool.map(compute_bm25, topics_this_event)
    
    print("saving data")    
    mongo_topics = [str(t['_id']) for t in get_topics(event_id)[0]]
    for t_id, ts in zip(mongo_topics, topic_scores):
        with (data_path / Path(f'event_{event_id}-topic_{t_id}-bm25_tweetid_score.txt')).open('w') as f:
            for tweet_id, score in ts.items():
                f.write(f"{tweet_id}\t{score}\n")
                
    print("done")

2018-07-06 16:13:00,389 : getting events
2018-07-06 16:13:00,395 : getting topics
2018-07-06 16:13:00,398 : getting topics
2018-07-06 16:13:00,401 : getting topics
2018-07-06 16:13:00,406 : expanding queries
2018-07-06 16:13:00,407 : precomputing L2-norms of word weight vectors


libya_hotel_tweets.tsv


2018-07-06 16:13:01,460 : processing tweets
2018-07-06 16:13:01,461 : getting all tweets
2018-07-06 16:13:06,667 : getting representatives
2018-07-06 16:13:06,755 : processing tweets
100%|██████████| 642251/642251 [00:00<00:00, 1299408.49it/s]
100%|██████████| 11168/11168 [00:34<00:00, 322.53it/s]
2018-07-06 16:13:41,957 : dictionaries for bm25
100%|██████████| 5932/5932 [00:00<00:00, 22936.05it/s]

computing bm25 for 6 topics



100%|██████████| 5932/5932 [00:00<00:00, 6732.43it/s]
100%|██████████| 5932/5932 [00:00<00:00, 6190.32it/s]
100%|██████████| 5932/5932 [00:01<00:00, 4566.80it/s]
100%|██████████| 5932/5932 [00:01<00:00, 3435.37it/s]
100%|██████████| 5932/5932 [00:01<00:00, 3350.61it/s]
100%|██████████| 5932/5932 [00:01<00:00, 3097.14it/s]
2018-07-06 16:13:44,401 : expanding queries


saving data
done
oscar_pistorius_tweets.tsv


2018-07-06 16:13:45,418 : processing tweets
2018-07-06 16:13:45,419 : getting representatives
2018-07-06 16:13:45,687 : processing tweets
100%|██████████| 642251/642251 [00:00<00:00, 1073502.62it/s]
100%|██████████| 64506/64506 [03:25<00:00, 314.33it/s]
2018-07-06 16:17:11,694 : dictionaries for bm25
100%|██████████| 34275/34275 [00:02<00:00, 15064.57it/s]

computing bm25 for 11 topics



100%|██████████| 34275/34275 [00:04<00:00, 7446.43it/s]
100%|██████████| 34275/34275 [00:06<00:00, 5398.38it/s]
100%|██████████| 34275/34275 [00:06<00:00, 5223.90it/s]
100%|██████████| 34275/34275 [00:08<00:00, 3950.39it/s]
100%|██████████| 34275/34275 [00:08<00:00, 3885.49it/s]
100%|██████████| 34275/34275 [00:09<00:00, 3727.64it/s]
100%|██████████| 34275/34275 [00:12<00:00, 2828.14it/s]
100%|██████████| 34275/34275 [00:12<00:00, 2761.98it/s]
100%|██████████| 34275/34275 [00:12<00:00, 2716.88it/s]
100%|██████████| 34275/34275 [00:13<00:00, 2513.71it/s]
100%|██████████| 34275/34275 [00:13<00:00, 2455.03it/s]
2018-07-06 16:17:28,576 : expanding queries


saving data
done
nepal_tweets.tsv


2018-07-06 16:17:29,449 : processing tweets
2018-07-06 16:17:29,450 : getting representatives
2018-07-06 16:17:29,869 : processing tweets
100%|██████████| 642251/642251 [00:00<00:00, 1107062.78it/s]
100%|██████████| 123964/123964 [06:14<00:00, 330.84it/s]
2018-07-06 16:23:45,457 : dictionaries for bm25
100%|██████████| 76533/76533 [00:03<00:00, 24254.68it/s]

computing bm25 for 10 topics



100%|██████████| 76533/76533 [00:13<00:00, 5688.39it/s]
100%|██████████| 76533/76533 [00:13<00:00, 5512.61it/s]
100%|██████████| 76533/76533 [00:14<00:00, 5324.70it/s]
100%|██████████| 76533/76533 [00:17<00:00, 4374.68it/s]
100%|██████████| 76533/76533 [00:17<00:00, 4252.37it/s]
100%|██████████| 76533/76533 [00:18<00:00, 4190.38it/s]
100%|██████████| 76533/76533 [00:18<00:00, 4067.44it/s]
100%|██████████| 76533/76533 [00:19<00:00, 3970.41it/s]
100%|██████████| 76533/76533 [00:22<00:00, 3364.78it/s]
100%|██████████| 76533/76533 [00:27<00:00, 2780.80it/s]


saving data
done


In [17]:
@lru_cache(maxsize=total_events)
def process_tweets_subset(tweet_ids, event_id):
    all_tweets = get_tweets()
    sub_tweets = [t for t in all_tweets if str(t['_id']) in tweet_ids]
    
    representatives = get_representatives(event_id)

    _info("processing tweets")

    # rep_tweet: repr_id => tweet
    rep_tweet = dict()
    for t in tqdm(sub_tweets):
        rep_tweet[t['representative']] = t

    # repr_ids: {repr_id} // this event
    repr_ids = set([r['_id'] for r in representatives])

    # tweets_this_event: [tweet]
    tweets_this_event_ = [t for r, t in rep_tweet.items() if r in repr_ids]
    tweets_this_event = []
    
    # filter out tweets wo expanded urls
    for t in tweets_this_event_:
        e_u = t['expanded_urls']
        if all(u is not None for u in e_u):
            tweets_this_event.append(t)    

    tweets_tokens = dict()
    all_tokens = set()
    for tweet, doc in tqdm(zip(tweets_this_event, nlp.pipe([_t['text'] for _t in tweets_this_event],
                                                           n_threads=8,
                                                           batch_size=1024)),
                           total=len(tweets_this_event)):

        tokens = frozenset([token.lower_
                            for token in doc
                            if token.lower_ not in stopwords.words('english') and token.lower_ in ft_comp])

        if tokens and tokens not in all_tokens:
            tweets_tokens[str(tweet['_id'])] = tokens
            all_tokens.add(tokens)

    return tweets_tokens

In [None]:
#### MMR

lambda_ = 0.6
_1_lambda_ = 1 - lambda_

files = list(data_path.glob('event_*-topic_*-bm25_tweetid_score.txt'))

for f_0 in files:
    # f_0 = files[1]

    _, e_id, t_id, _, _ = f_0.name.split('_')
    event_id = e_id.split('-')[0]
    topic_id = t_id.split('-')[0]

    with f_0.open('r') as f:
        ranked_tweets_ = list((line[:-1].split('\t')[0], float(line[:-1].split('\t')[1])) for line in f)

    ranked_tweets = sorted(ranked_tweets_, key=itemgetter(1), reverse=True)    
    tweets = [t for t, _ in ranked_tweets]
    tweets_tokens = process_tweets_subset(frozenset(tweets), event_id)

    n_docs = len(tweets_tokens)
    tweet_vectors = np.array([np.mean([ft_comp[token] for token in tokens], axis=0) for _, tokens in tweets_tokens.items()])
    norm_vectors = tweet_vectors / norm(tweet_vectors)

    all_topics_this_event, _ = get_topics(event_id)
    this_topic = None
    for t in all_topics_this_event:
        if str(t['_id']) == topic_id:
            this_topic = t
            break

    query_keywords = frozenset(expand_query([t['keywords']])[0])
    query = np.mean([ft_comp[token] for token in query_keywords if token in ft_comp], axis=0)
    doc_q_sim = (norm_vectors).dot(query) * lambda_

    @lru_cache(maxsize=2**20)
    @jit
    def sim_c(doc_x, doc_y):
        return norm_vectors[doc_x].dot(norm_vectors[doc_y].T)

    @jit
    def score(doc_x, selected):
        max_sim = 0
        if selected - {doc_x}:
            for doc_y in selected - {doc_x}:
                sim = sim_c(doc_x, doc_y)
                if sim > max_sim:
                    max_sim = sim

        return doc_q_sim[doc_x] - _1_lambda_ * max_sim

    def mmr2(docs):
        selected = set()

        while selected != set(docs):
            remaining = set(docs) - selected

            max_score = 0
            next_selected = None
            for r in remaining:
                sc = score(r, frozenset(selected))
                if sc > max_score:
                    max_score = sc
                    next_selected = r

            selected.add(next_selected)
            yield next_selected

    tweets = [tweet_id for tweet_id, rank in ranked_tweets][:min(len(ranked_tweets), 1000)]

    print(this_topic['topic_name'])
    f_name = f'event_{event_id}-topic_{topic_id}-tweet_ids_sorted_mmr.txt'
    with (data_path / Path(f_name)).open('w') as f:
        for cnt, i in enumerate(mmr2(range(len(tweets)))):
            _id = tweets[i]
            # text = ' '.join(sorted(tweets_tokens[_id]))
            # print(cnt, text, sep="\t")
            f.write(f'{_id}\n')

2018-07-06 16:26:40,964 : getting representatives
2018-07-06 16:26:41,210 : processing tweets
100%|██████████| 6993/6993 [00:00<00:00, 1172292.88it/s]
100%|██████████| 6993/6993 [00:26<00:00, 266.15it/s]
2018-07-06 16:27:07,761 : getting topics


Police under investigation


2018-07-06 16:28:44,524 : getting representatives
2018-07-06 16:28:44,921 : processing tweets
100%|██████████| 232/232 [00:00<00:00, 542709.72it/s]
100%|██████████| 232/232 [00:00<00:00, 280.84it/s]
2018-07-06 16:28:45,802 : getting topics


Humanitarian crisis


2018-07-06 16:28:48,124 : processing tweets
100%|██████████| 22075/22075 [00:00<00:00, 1260644.02it/s]
100%|██████████| 22075/22075 [01:17<00:00, 285.28it/s]


Rescue of people


2018-07-06 16:31:42,910 : processing tweets
100%|██████████| 621/621 [00:00<00:00, 771523.34it/s]
100%|██████████| 621/621 [00:02<00:00, 270.93it/s]


Oscar Pistorius removes his prosthesis


2018-07-06 16:32:09,115 : processing tweets
100%|██████████| 28876/28876 [00:00<00:00, 1269280.26it/s]
100%|██████████| 28876/28876 [01:35<00:00, 301.70it/s]


Death tolls


2018-07-06 16:35:26,190 : processing tweets
100%|██████████| 4316/4316 [00:00<00:00, 1210715.36it/s]
100%|██████████| 4316/4316 [00:15<00:00, 277.32it/s]


Pistorius pledges innocence


2018-07-06 16:37:22,368 : processing tweets
100%|██████████| 19569/19569 [00:00<00:00, 1273657.88it/s]
100%|██████████| 19569/19569 [01:04<00:00, 304.72it/s]


Reports on the magnitude of the earthquake


2018-07-06 16:40:03,206 : processing tweets
100%|██████████| 6848/6848 [00:00<00:00, 1215462.48it/s]
100%|██████████| 6848/6848 [00:22<00:00, 303.22it/s]


Avalanche in Mount Everest


2018-07-06 16:42:02,763 : processing tweets
100%|██████████| 2705/2705 [00:00<00:00, 833070.88it/s]
100%|██████████| 2705/2705 [00:10<00:00, 270.20it/s]


Shooting in a restaurant


2018-07-06 16:43:46,501 : getting representatives
2018-07-06 16:43:46,620 : processing tweets
100%|██████████| 953/953 [00:00<00:00, 1037418.04it/s]
100%|██████████| 953/953 [00:03<00:00, 289.72it/s]
2018-07-06 16:43:49,957 : getting topics


Hostages are taken


2018-07-06 16:45:13,789 : processing tweets
100%|██████████| 9383/9383 [00:00<00:00, 1164215.90it/s]
100%|██████████| 9383/9383 [00:33<00:00, 276.59it/s]


Interrogatory


2018-07-06 16:47:31,378 : processing tweets
100%|██████████| 434/434 [00:00<00:00, 747261.06it/s]
100%|██████████| 434/434 [00:01<00:00, 291.16it/s]


Confrontation with security forces


2018-07-06 16:47:42,651 : processing tweets
100%|██████████| 3872/3872 [00:00<00:00, 1225408.97it/s]
100%|██████████| 3872/3872 [00:12<00:00, 300.59it/s]


Psychiatric evaluation


2018-07-06 16:49:30,874 : processing tweets
100%|██████████| 402/402 [00:00<00:00, 590168.08it/s]
100%|██████████| 402/402 [00:01<00:00, 290.39it/s]


Paddy Powers


2018-07-06 16:49:40,003 : processing tweets
100%|██████████| 8764/8764 [00:00<00:00, 970710.90it/s]
100%|██████████| 8764/8764 [00:30<00:00, 285.11it/s]


International aid


2018-07-06 16:51:48,062 : processing tweets
100%|██████████| 18970/18970 [00:00<00:00, 1265281.26it/s]
100%|██████████| 18970/18970 [01:06<00:00, 285.68it/s]


Ways to help


2018-07-06 16:54:33,115 : processing tweets
100%|██████████| 457/457 [00:00<00:00, 998331.73it/s]
100%|██████████| 457/457 [00:01<00:00, 298.97it/s]


Car bomb explodes


2018-07-06 16:54:45,281 : processing tweets
100%|██████████| 1484/1484 [00:00<00:00, 1126986.63it/s]
100%|██████████| 1484/1484 [00:04<00:00, 304.93it/s]


ISIS adjudicates attack


2018-07-06 16:56:26,738 : processing tweets
100%|██████████| 17806/17806 [00:00<00:00, 1300137.13it/s]
100%|██████████| 17806/17806 [01:01<00:00, 290.53it/s]


Final arguments


In [88]:
tweets_tokens

{'5b17172fda870923dcb0e617': frozenset({"'s",
            'accident',
            'breaks',
            'day',
            'devastating',
            'one',
            'pistorius',
            'reeva',
            'shooting',
            'silence',
            'steenkamp',
            'valentine',
            'year'}),
 '5b17172fda870923dcb0e61c': frozenset({'amp',
            'broken',
            'killed',
            'message',
            'oscar',
            'pistorius',
            'posted',
            'reeva',
            'shot',
            'silence',
            'website',
            'year'}),
 '5b17172fda870923dcb0e620': frozenset({"'s",
            'day',
            'pistorius',
            'reeva',
            'remembers',
            'valentine'}),
 '5b17172fda870923dcb0e62a': frozenset({"'s",
            '4',
            'anniversary',
            'channel',
            'first',
            'google',
            'news',
            'pistorius',
            'reeva',
  

In [123]:
files = list(data_path.glob('event_*-topic_*-tweet_ids_sorted_mmr.txt'))

all_tweets = get_tweets()
all_tweets_d = dict()
for t in all_tweets:
    all_tweets_d[str(t['_id'])] = t

    
for f_0 in files:
    _, ev, top, _, _, _ = f_0.name.split("_")
    event_id = ev.split("-")[0]
    topic_id = top.split("-")[0]
    
    with f_0.open() as f:
        for i, line in enumerate(f):
            r_id = line[:-1]
            tweet = all_tweets_d[r_id]
            print(tweet['text'])
            
            if i == 10:
                break
                
        print()
        print()

Homes and offices collapse as massive earthquake rocks Nepal destroying historic 19th-century tower : At least... http://t.co/Zn7jsueOUD
ZH: Hundreds Dead After Devastating Nepal Earthquake Topples Structures, Starts Avalanches: A little over four... http://t.co/5E1nJl9Zy1
More than 1,300 dead after devastating earthquake: Nepals worst earthquake in 81 years has toppled structures... http://t.co/3tPIwBoMME
Earthquake kills more than 800 in Nepal. turns two historic Kathmandu landmarks into rubble
A man is freed from the rubble of a destroyed building in Katmandu. Earthquake kills hundreds http://t.co/av00lYkQY5 http://t.co/8lSWSXmYf6
More than 900 killed after the 7.8 magnitude earthquake in Nepal. Historical sites were devastated. Let's send our prayers for them
#earthquake  Media reports from Nepal say that historical tower #Dharahara (similar to Delhis #Qutb Minar) in... http://t.co/AP5h2wwe48
Deadly! Destructive 7.9 EARTHQUAKE Shakes NEPAL; 600+ Dead!; Historical Blds. Dstryd: Chao

In [10]:
files = list(data_path.glob('event_*-topic_*-tweet_ids_sorted_mmr.txt'))

all_tweets = get_tweets()
all_tweets_d = dict()
for t in all_tweets:
    all_tweets_d[str(t['_id'])] = t

    
for f_0 in files:
    _, ev, top, _, _, _ = f_0.name.split("_")
    event_id = ev.split("-")[0]
    topic_id = top.split("-")[0]
    
    with f_0.open() as f:
        for i, line in enumerate(f):
            r_id = line[:-1]
            tweet = all_tweets_d[r_id]
            print(tweet['text'])
            
            if i == 10:
                break
                
        print()
        print()

2018-07-06 15:54:26,139 : getting all tweets


Homes and offices collapse as massive earthquake rocks Nepal destroying historic 19th-century tower : At least... http://t.co/Zn7jsueOUD
ZH: Hundreds Dead After Devastating Nepal Earthquake Topples Structures, Starts Avalanches: A little over four... http://t.co/5E1nJl9Zy1
More than 1,300 dead after devastating earthquake: Nepals worst earthquake in 81 years has toppled structures... http://t.co/3tPIwBoMME
Earthquake kills more than 800 in Nepal. turns two historic Kathmandu landmarks into rubble
A man is freed from the rubble of a destroyed building in Katmandu. Earthquake kills hundreds http://t.co/av00lYkQY5 http://t.co/8lSWSXmYf6
More than 900 killed after the 7.8 magnitude earthquake in Nepal. Historical sites were devastated. Let's send our prayers for them
#earthquake  Media reports from Nepal say that historical tower #Dharahara (similar to Delhis #Qutb Minar) in... http://t.co/AP5h2wwe48
Deadly! Destructive 7.9 EARTHQUAKE Shakes NEPAL; 600+ Dead!; Historical Blds. Dstryd: Chao