# Kafka Producer

In [None]:
from kafka import KafkaProducer
from json import dumps
import time
 
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
          value_serializer=lambda x: dumps(x).encode('utf-8'))
 
data = {'hello' : 'world', 'time': time.time()}
producer.send('dsp', data)


# Kafka Consumer

In [None]:
from kafka import KafkaConsumer
from json import loads
 
consumer = KafkaConsumer('dsp',
     bootstrap_servers=['localhost:9092'],
     value_deserializer=lambda x: loads(x.decode('utf-8')))
 
for x in consumer:
    print(x.value)


# Model Producer

In [1]:

from kafka import KafkaProducer
from json import dumps
import time
import uuid

producer = KafkaProducer(bootstrap_servers=['54.166.148.190:9092'],
          value_serializer=lambda x: dumps(x).encode('utf-8'))

data = { 'G1': 1, 'G2': 0, 'G3': 0, 'G4': 0, 'G5': 0, 
         'G6': 0, 'G7': 0, 'G8': 0, 'G9': 0, 'G10': 0, 
        'User_ID': str(uuid.uuid1())}
result = producer.send('dsp', data)
result.get()


RecordMetadata(topic='dsp', partition=0, topic_partition=TopicPartition(topic='dsp', partition=0), offset=109, timestamp=1576709681368, checksum=None, serialized_key_size=-1, serialized_value_size=142, serialized_header_size=-1)

# Streaming Pipeline 

In [None]:

from pyspark.sql.types import StringType
import json 
import pandas as pd
from sklearn.linear_model import LogisticRegression

# build a logsitic regression model 
gamesDF = pd.read_csv("https://github.com/bgweber/Twitch/raw/master/Recommendations/games-expand.csv")
model = LogisticRegression() 
model.fit(gamesDF.iloc[:,0:10], gamesDF['label'])

# read from Kafka 
df = spark .readStream.format("kafka") \
  .option("kafka.bootstrap.servers", "54.166.148.190:9092") \
  .option("subscribe", "dsp").load()

# define the UDF for scoring users 
def score(row):
    d = json.loads(row)
    p = pd.DataFrame.from_dict(d, orient = "index").transpose()        
    pred = model.predict_proba(p.iloc[:,0:10])[0][0]
    result = {'User_ID': d['User_ID'], 'pred': pred }
    return str(json.dumps(result))
    
# select the value field and apply the UDF     
df = df.selectExpr("CAST(value AS STRING)")
score_udf = udf(score, StringType())    
df = df.select( score_udf("value").alias("value"))

# Write results to Kafka 
query = df.writeStream.format("kafka") \
  .option("kafka.bootstrap.servers", "54.166.148.190:9092") \
  .option("topic", "preds") \
  .option("checkpointLocation", "/temp").start()


# Model Consumer

In [None]:
from kafka import KafkaConsumer
from json import loads

consumer = KafkaConsumer('preds',
     bootstrap_servers=['54.166.148.190:9092'],
     value_deserializer=lambda x: loads(x))

for x in consumer:
    print(x.value)