In [None]:
import json

from kafka import KafkaConsumer
from pyspark.ml.regression import LinearRegressionModel
from pyspark.sql import SparkSession

HDFS_PATH = 'hdfs://10.84.129.52:9000/trab/g05'
TICKERS = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'V']

# Create a SparkSession
spark = SparkSession.builder.getOrCreate()

# Define the Kafka consumer
consumer = KafkaConsumer(
    'g05in',  # Kafka topic to consume from
    bootstrap_servers='10.204.131.11:9092',  # List of brokers
    value_deserializer=lambda v: json.loads(v.decode('utf-8'))  # Deserializer function for the messages
)

# Load the saved models
models = {ticker: LinearRegressionModel.load(HDFS_PATH + '/models/' + ticker + '_model') for ticker in TICKERS}

try:
    # Consume messages
    for message in consumer:
        # Convert the JSON string to a dictionary
        message_dict = json.loads(message.value)

        # Parse the inner JSON
        for ticker, data in message_dict.items():
            # Transform the data into the format expected by the model
            features = [data['Low'], data['Open'], data['Volume'], data['High'], data['Close']]
            features_df = spark.createDataFrame([features], ["Low", "Open", "Volume", "High", "Close"])

            # Use the model to make a prediction
            prediction = models[ticker].transform(features_df).select("prediction").first()[0]

            # Compare the predicted value with the real-time value
            real_time_value = data['Close']
            difference = real_time_value - prediction

            # Print the result
            print(
                f"For {ticker}, the real-time value is {real_time_value}, the predicted value is {prediction}, and the difference is {difference}")



except KeyboardInterrupt:
    consumer.close()
    spark.stop()