In [None]:
%pip install pandas paho-mqtt

In [4]:
# ==========================================
# PHASE 1: Initial Topic Analysis (TOP 50 UPDATE)
# Goal: Extract Top 50 Frequent ROOT Topics
# ==========================================

import pandas as pd
import glob
import os

# 1. Load Data
file_pattern = "*.pkl"
file_list = glob.glob(file_pattern)
print(f"Found {len(file_list)} files.")

dfs = []
for file in file_list:
    try:
        # Load assuming GZIP compression
        df = pd.read_pickle(file, compression='gzip')
        dfs.append(df)
    except Exception as e:
        print(f"Failed to load {file}: {e}")

if dfs:
    # Combine dataframes
    full_df = pd.concat(dfs, ignore_index=True)
    
    # 1. Get the column data and convert to string
    all_topics = full_df.iloc[:, 0].astype(str)
    
    # 2. Extract ONLY the Root (the part before the first '/')
    root_topics = all_topics.apply(lambda x: x.split('/')[0] if '/' in x else x)
    
    # 3. Count frequency and take TOP 50 (Updated request)
    # This automatically captures the most "common" and "popular" roots across all files
    top_roots_counts = root_topics.value_counts().head(50)
    
    print(f"\n--- Top {len(top_roots_counts)} Most Frequent ROOTS ---")
    print(top_roots_counts)

    # 4. Generate List with Wildcards
    target_topics = []
    for root in top_roots_counts.index.tolist():
        topic_str = str(root) + "/#"
        target_topics.append(topic_str)

    # Check if we have enough
    print(f"\nTotal topics generated: {len(target_topics)}")

    # 5. Save to File
    output_file = "final_topics_50.txt"
    with open(output_file, "w") as f:
        f.write("TOP 50 MQTT ROOT TOPICS (Source: Lab Dataset)\n")
        f.write("================================================\n")
        for t in target_topics:
            f.write(f"{t}\n")
    
    print(f"✅ DONE! File '{output_file}' created with 50 topics.")
    print("Send this file to your group.")

else:
    print("Error: No data loaded.")

Found 3 files.

--- Top 50 Most Frequent ROOTS ---
topic_list
{'                   521
{'ActiveMQ           280
{'gateway            146
{'tele                98
{'teslamate           33
{'zigbee2mqtt         29
{'abc                 25
{'sys                 24
{'eu868               23
{'homeassistant       18
{'device              18
{'pt:j1               17
{'gw                  13
{'home                13
{'shellies            12
{'mqtt                12
{'owntracks           12
{'EGOi                12
{'v1                  10
{'sensor              10
{'prop                10
{'weather             10
{'topic                9
{'esp32                9
{'as923                9
{'dev                  9
{'application          8
{'iot-2                8
{'domoticz             8
{'servicelocation      8
{'LedMatrix            7
{'SNR-ERD-4            7
{'au915_0              7
{'tasmota              7
{'dvswitch             6
{'status               6
{'whiteboard           6
{'test       

In [3]:
# ==========================================
# Export Results to File
# ==========================================

# 1. Save as a clean Text file (Easy to read)
with open("final_topics_for_group.txt", "w") as f:
    f.write("Here is the list of frequent ROOT topics extracted from the dataset:\n")
    f.write("============================================================\n")
    for topic in target_topics:
        f.write(f"{topic}\n")
    f.write("============================================================\n")
    f.write(f"Total topics found: {len(target_topics)}")

print("✅ Success! A file named 'final_topics_for_group.txt' has been created.")

✅ Success! A file named 'final_topics_for_group.txt' has been created.
