In [1]:
from pymongo import MongoClient
import requests
import pandas as pd
import json
from kafka import KafkaConsumer
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K

In [2]:
def focal_loss(gamma=2., alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
        cross_entropy = -y_true * K.log(y_pred)
        loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy
        return K.sum(loss, axis=-1)
    
    return focal_loss_fixed

loaded_model = load_model("model_ANNLSTM_week_3.h5", custom_objects={'focal_loss_fixed': focal_loss()})

loaded_model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])



In [3]:
def create_consumer():
    return KafkaConsumer(
        'student_data',
        bootstrap_servers=['localhost:9092'],
        group_id='DS317',
        value_deserializer=lambda x: json.loads(x.decode('utf-8')),
        auto_offset_reset='earliest',
        enable_auto_commit=True
    )

In [4]:
from urllib.parse import quote_plus

# Mã hóa mật khẩu
username = "vietkha1975"
password = "Kha22112003@"  # Lưu ý `@` sẽ gây lỗi nếu không được mã hóa
encoded_password = quote_plus(password)

# Tạo URL kết nối
mongo_url = f"mongodb+srv://{username}:{encoded_password}@ds317.o9qaf.mongodb.net/"
client = MongoClient(mongo_url)
# Chọn database và collection
db = client["DS317"] 
collection = db["test"]

In [6]:
# Tạo ánh xạ giữa nhãn số và nhãn chữ
label_mapping = {0: "A", 1: "B", 2: "C", 3: "D", 4: "E"}
def process_messages():
    consumer = create_consumer()
    
    # Process remaining messages
    for message in consumer:
        try:
            data = message.value
            
            # Chuyển dict thành DataFrame
            data_df = pd.DataFrame([data])
            
            # Loại bỏ các cột không dùng để dự đoán
            X_test = data_df.drop(
                columns=["classification_encoded", "user_id", "name", "course_id", "school", "enroll_time", "classification"],
                errors='ignore'
            )
            
            # Dự đoán xác suất và lớp
            y_pred = loaded_model.predict(X_test)
            y_pred_class = y_pred.argmax(axis=1)[0]  # Lấy lớp dự đoán (chỉ 1 dòng)
            
            # Ghi kết quả vào thông điệp
            data["classification_predict"] = label_mapping.get(int(y_pred_class), "Unknown")
            # Gửi dữ liệu tới MongoDB
            collection.insert_one(data)
            # In kết quả
            print(f"Processed and saved message to MongoDB: {data}")
        except Exception as e:
            print(f"Error processing message: {e}")

In [None]:
if __name__ == "__main__":
    process_messages()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 645ms/step
Processed and saved message to MongoDB: {'user_id': 'U_1001694', 'name': '高进姝', 'course_id': 'C_735164', 'gender': 0.0, 'school': '昆明理工大学', 'enroll_time': '2020-11-18 19:49:32', 'school_encoded': 476, 'course_id_encoded': 138, 'comment_count_week1': 0.0652310711, 'reply_count_week1': 0.0756756757, 'questions_done_week1': 0.1590995678, 'attempts_count_week1': 0.1630505181, 'correct_answers_week1': 0.2338800648, 'total_score_week1': 0.0938749839, 'user_watching_time_week1': 0.0674681185, 'comment_count_week2': 0.0424263675, 'reply_count_week2': 0.2, 'questions_done_week2': 0.5149501661, 'attempts_count_week2': 0.5046728972, 'correct_answers_week2': 0.5132450331, 'total_score_week2': 0.2708860759, 'user_watching_time_week2': 0.091789899, 'comment_count_week3': 0.0589553795, 'reply_count_week3': 0.125, 'questions_done_week3': 0.1951905916, 'attempts_count_week3': 0.2029904828, 'correct_answers_week3': 0.1886207422, 't