# Setup

In [3]:
# Download needed packages
!pip install --quiet google-cloud-pubsub
!pip install --quiet fsspec gcsfs

In [6]:
from google.colab import auth

In [7]:
from google.cloud import pubsub
from google.api_core import exceptions as gexc

In [8]:
# { display-mode: "form" }
project_id = "starthack-workshop" #@param {type:"string"}

In [9]:
auth.authenticate_user()

# Publisher


In [26]:
import json
import time
from typing import Dict
from concurrent.futures import Future


import numpy as np
import pandas as pd
from google.cloud import storage

# Read the testing dataset from google cloud bucket into a DataFrame
df_test = pd.read_csv("gs://starthack-workshop-data/test.csv")

# Tranform each row into a JSON
jsons = [json.dumps(x) for x in df_test.to_dict(orient="records")]

In [27]:
# Initialize the Pub/Sub client
pub_client = pubsub.PublisherClient()
topic_path = pub_client.topic_path(project_id, "tweets-in")

In [33]:
# When we push a message to a topic we get a future, we store them to
# make sure we wait for all of them to finish before ending the code
# execution
futures = dict()

def get_callback(f: Future, data: Dict):
    """Get a callback function called when the client is done sending a 
    message"""

    def callback(f: Future):
        """Callback called when future is complete"""
        try:
            print(f"Future: {f.result()}, data: {data[:70]} ...")
            futures.pop(data)
        except Exception as err:
            print(err)
            print("Please handle {} for {}.".format(f.exception(), data))

    return callback

# We send 100 Tweets. We sample an exponential distribution for our waiting
# time between events (this is not important)
for i, wait_time in enumerate(np.random.exponential(1, 100)):
    # Readt the ith json
    data = jsons[i]
    # We set the future corresponding to this data to None
    futures.update({data: None})
    # We publish and get a Future
    future = pub_client.publish(topic_path, data.encode("utf-8"))
    futures[data] = future
    # Publish failures shall be handled in the callback function.
    future.add_done_callback(get_callback(future, data))
    time.sleep(wait_time)



# Wait for all the publish futures to resolve before exiting.
while futures:
    time.sleep(5)

print(f"Published messages with error handler to {topic_path}.")

Future: 2169611729020673, data: {"id": 0, "keyword": NaN, "location": NaN, "text": "Just happened a te ...
Future: 2169575979334650, data: {"id": 2, "keyword": NaN, "location": NaN, "text": "Heard about #earth ...
Future: 2169592659903786, data: {"id": 3, "keyword": NaN, "location": NaN, "text": "there is a forest  ...
Future: 2169592398088145, data: {"id": 9, "keyword": NaN, "location": NaN, "text": "Apocalypse lightin ...
Future: 2169602577894871, data: {"id": 11, "keyword": NaN, "location": NaN, "text": "Typhoon Soudelor  ...
Future: 2169619107025850, data: {"id": 12, "keyword": NaN, "location": NaN, "text": "We're shaking...I ...
Future: 2169601054512350, data: {"id": 21, "keyword": NaN, "location": NaN, "text": "They'd probably s ...
Future: 2169600516797586, data: {"id": 22, "keyword": NaN, "location": NaN, "text": "Hey! How are you? ...
Future: 2169608980569740, data: {"id": 27, "keyword": NaN, "location": NaN, "text": "What a nice hat?" ...
Future: 2169634064559037, data: {"id"

KeyboardInterrupt: ignored