In [1]:
import requests
from bs4 import BeautifulSoup
from kafka import KafkaProducer, KafkaConsumer
import json
import time
from flask import Flask, jsonify
import threading

In [2]:
def get_product():
    url = "https://scrapeme.live/shop/"
    response = requests.get(url)
    # use BeautifulSoup to read html content
    # it helps find and get data we need from webpage
    soup = BeautifulSoup(response.content, "html.parser")

    #create product list
    products = []
    for product in soup.select(".product"):
        title = product.select_one(".woocommerce-loop-product__title").text.strip()
        price = product.select_one(".price").text.strip()

        #get product link because description and stock information are in the product page
        product_link = product.select_one("a").get("href")

        #get product details from the product link 
        product_response = requests.get(product_link)
        product_soup = BeautifulSoup(product_response.content, "html.parser")

        description = product_soup.select_one(".woocommerce-product-details__short-description")
        description = description.text.strip() if description else "No description available"
        
        stock_info = product_soup.select_one(".stock")
        stock = stock_info.text.strip() if stock_info else "Stock information not available"

        #add informations to product list
        products.append({
            "title": title,
            "price": price,
            "description": description,
            "stock": stock
        })

    return products

#call function and print what we have
data = get_product()
print(data)

[{'title': 'Bulbasaur', 'price': '£63.00', 'description': 'Bulbasaur can be seen napping in bright sunlight. There is a seed on its back. By soaking up the sun’s rays, the seed grows progressively larger.', 'stock': '45 in stock'}, {'title': 'Ivysaur', 'price': '£87.00', 'description': 'There is a bud on this Pokémon’s back. To support its weight, Ivysaur’s legs and trunk grow thick and strong. If it starts spending more time lying in the sunlight, it’s a sign that the bud will bloom into a large flower soon.', 'stock': '142 in stock'}, {'title': 'Venusaur', 'price': '£105.00', 'description': 'There is a large flower on Venusaur’s back. The flower is said to take on vivid colors if it gets plenty of nutrition and sunlight. The flower’s aroma soothes the emotions of people.', 'stock': '30 in stock'}, {'title': 'Charmander', 'price': '£48.00', 'description': 'The flame that burns at the tip of its tail is an indication of its emotions. The flame wavers when Charmander is enjoying itself.

In [3]:
def create_producer():
    try:
        # create kafka producer to send messages to the kafka server
        producer = KafkaProducer(
            bootstrap_servers='localhost:9093',  #connect to the server
            value_serializer=lambda v: json.dumps(v).encode('utf-8')  #convert messages to JSON format
        )
        return producer
    except Exception as e:
        print(f"Producer creating error: {e}")
        return None


def produce_messages(producer, data):
    #send data lsit to my-topic
    try:
        for item in data:
            producer.send('my-topic', value=item)
            print(f"Sent message: {item}") #print the message to see being sent correctly
            time.sleep(1)  # waiting for 1 second to send next message
        producer.flush()  # ensure all messages are sent
    except Exception as e:
        print(f"Sending error messages: {e}")

In [4]:
def write_to_file():
    # create a consumer to read messages from my-topic
    consumer = KafkaConsumer(
        'my-topic',
        bootstrap_servers='localhost:9093',  # listener
        auto_offset_reset='earliest',
        group_id='my-group',
        enable_auto_commit=True,
        auto_commit_interval_ms=5000
    )

    messages = []
    try:
        for message in consumer:
            print(f"Received message: {message.value.decode('utf-8')}") #print messages so we can see what we received
            messages.append(message.value.decode('utf-8')) #decode message and add to list

            # write in json file
            with open('kafka_data.json', 'w') as file:
                json.dump(messages, file)
    except Exception as e:
        print(f"Consumer loop error: {e}")
    finally:
        consumer.close()

In [6]:
app = Flask(__name__)

# flask route
@app.route('/data', methods=['GET'])
def get_data():
    #read data from 'kafka_data.json' and return it as json
    try:
        with open('kafka_data.json', 'r') as f:
            data = json.load(f)
        return jsonify(data)
    except Exception as e:
        print(f"Error reading kafka_data.json file: {e}")
        #return error http 500 status
        return jsonify({"error": "Failed to read data"}), 500

#flask runner
def run_flask():
    app.run(host='0.0.0.0', port=5000)


if __name__ == '__main__':
    # Start Kafka consumer in a separate thread
    consumer_thread = threading.Thread(target=write_to_file)
    consumer_thread.daemon = True  # Ensure thread exits when the main program exits
    consumer_thread.start()

    #get data and send produce messages
    try:
        producer = create_producer()
        if producer:
            data = get_product()  # Assuming get_product() is defined elsewhere
            produce_messages(producer, data)
        else:
            print("Producer could not be created.")
    except Exception as e:
        print(f"Error in producing messages: {e}")

    # Start Flask app
    run_flask()

Sent message: {'title': 'Bulbasaur', 'price': '£63.00', 'description': 'Bulbasaur can be seen napping in bright sunlight. There is a seed on its back. By soaking up the sun’s rays, the seed grows progressively larger.', 'stock': '45 in stock'}
Sent message: {'title': 'Ivysaur', 'price': '£87.00', 'description': 'There is a bud on this Pokémon’s back. To support its weight, Ivysaur’s legs and trunk grow thick and strong. If it starts spending more time lying in the sunlight, it’s a sign that the bud will bloom into a large flower soon.', 'stock': '142 in stock'}
Sent message: {'title': 'Venusaur', 'price': '£105.00', 'description': 'There is a large flower on Venusaur’s back. The flower is said to take on vivid colors if it gets plenty of nutrition and sunlight. The flower’s aroma soothes the emotions of people.', 'stock': '30 in stock'}
Sent message: {'title': 'Charmander', 'price': '£48.00', 'description': 'The flame that burns at the tip of its tail is an indication of its emotions. 

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://10.5.52.123:5000
Press CTRL+C to quit
