In [6]:
import pickle
from typing import List
from drain3 import TemplateMiner
from drain3.template_miner_config import TemplateMinerConfig
from transformers import pipeline
import torch
import pandas as pd

def load_combined_model(pkl_path: str):
    with open(pkl_path, "rb") as f:
        data = pickle.load(f)

    t5_model_path = data["t5_model_path"]
    drain_state = data["drain_state"]

    device = 0 if torch.cuda.is_available() else -1
    t5_pipe = pipeline(
        "text2text-generation",
        model=t5_model_path,
        tokenizer=t5_model_path,
        device=device
    )

    config = TemplateMinerConfig()
    miner = TemplateMiner(config=config)

    for c in drain_state.get("clusters", []):
        tmpl = str(c.get("template", ""))
        size = int(c.get("size", 1) or 1)
        for _ in range(size):
            miner.add_log_message(tmpl)

    class CombinedModel:
        def __init__(self, t5, dr):
            self.t5 = t5
            self.miner = dr

        def predict(self, logs: List[str]):
            outs = self.t5(
                list(logs),
                max_new_tokens=64,
                num_beams=4,
                do_sample=False
            )
            t5_templates = [o.get("generated_text", "").strip() for o in outs]
            cluster_list = list(self.miner.drain.clusters)
            results = []
            for raw, tmpl in zip(logs, t5_templates):
                res = self.miner.add_log_message(tmpl)
                cid = res.get("cluster_id")
                sim = res.get("similarity", 1.0)
                if cid is not None and cid < len(cluster_list):
                    cl = cluster_list[cid]
                    try:
                        ctmpl = cl.get_template()
                    except Exception:
                        ctmpl = getattr(cl, "template", "")
                    cid_str = f"T{cid:08d}"
                else:
                    ctmpl = ""
                    cid_str = "T-1"
                results.append(
                    {
                        "raw": raw,
                        "t5_template": tmpl,
                        "cluster_id": cid_str,
                        "cluster_template": ctmpl,
                        "drain_similarity": sim,
                    }
                )
            return results

    return CombinedModel(t5_pipe, miner)


if __name__ == "__main__":
    model = load_combined_model(r"C:\Users\User\KS\combined_model.pkl")

    new_logs = [
        "Dec 10 05:22:10 sshd[1234]: Failed password for root from 192.168.1.22 port 52210 ssh2",
        "Dec 10 06:41:55 api-worker-1 kernel: CPU0: Core temperature above threshold, cpu clock throttled",
    ]

    results = model.predict(new_logs)
    df = pd.DataFrame(results)
    print(df)


                                                 raw  \
0  Dec 10 05:22:10 sshd[1234]: Failed password fo...   
1  Dec 10 06:41:55 api-worker-1 kernel: CPU0: Cor...   

                                         t5_template cluster_id  \
0  1234: Failed password for root from 192.168.1....  T00013399   
1  api-worker-1 kernel: CPU0: Core temperature ab...        T-1   

                                    cluster_template  drain_similarity  
0  [25140]: Failed password for root from 183.62....               1.0  
1                                                                  1.0  


### T5 normalizes incoming logs into clean templates, and Drain3 then compares them against previously learned clusters. In our test, the SSH “Failed password” log was successfully matched to an existing Drain3 cluster, while the CPU temperature warning was flagged as a new, unseen pattern. This confirms that the model can both recognize known log behaviors and detect new ones