In [None]:
import pandas as pd
import os
import cv2
from ultralytics import YOLO
import yaml
from sklearn.model_selection import train_test_split
import shutil
import random


class LesionTzetlinTrainer:
    def __init__(
        self,
        base_images_dir="data/Dataset-pruned/base_images",
        lesion_data_path="data/Dataset-pruned/lesion_data.csv",
    ):
        self.base_images_dir = base_images_dir
        self.lesion_data_path = lesion_data_path
        self.dataset_dir = "yolo_dataset"
        self.lesion_df = None

    def load_data(self):
        print("Loading lesion data...")
        self.lesion_df = pd.read_csv(self.lesion_data_path)
        print(f"Loaded {len(self.lesion_df)} lesion annotations")

    def create_dataset_structure(self):
        print("Creating dataset structure...")

        for split in ["train", "val"]:
            os.makedirs(f"{self.dataset_dir}/{split}/images", exist_ok=True)
            os.makedirs(f"{self.dataset_dir}/{split}/labels", exist_ok=True)

    def convert_to_yolo_format(self, x, y, width, height, img_width, img_height):
        center_x = x + width / 2
        center_y = y + height / 2

        norm_center_x = center_x / img_width
        norm_center_y = center_y / img_height
        norm_width = width / img_width
        norm_height = height / img_height

        return norm_center_x, norm_center_y, norm_width, norm_height

    def process_annotations(self):
        print("Processing annotations...")

        grouped = self.lesion_df.groupby(["image_id", "frame"])

        image_annotations = {}

        for (image_id, frame), group in grouped:
            image_filename = f"{image_id}_{frame}.png"
            image_path = os.path.join(self.base_images_dir, image_filename)

            if not os.path.exists(image_path):
                print(f"WARNING: Image {image_filename} not found, skipping...")
                continue

            img = cv2.imread(image_path)
            if img is None:
                print(
                    f"WARNING: Image exists, but cannot be loaded {image_filename}, skipping..."
                )
                continue

            img_height, img_width = img.shape[:2]

            annotations = []
            for _, row in group.iterrows():
                x = row["lesion_x"]
                y = row["lesion_y"]
                width = row["lesion_width"]
                height = row["lesion_height"]

                if (
                    pd.isna(row["lesion_x"])
                    or pd.isna(row["lesion_y"])
                    or pd.isna(row["lesion_width"])
                    or pd.isna(row["lesion_height"])
                ):
                    continue

                norm_cx, norm_cy, norm_w, norm_h = self.convert_to_yolo_format(
                    x, y, width, height, img_width, img_height
                )

                if not (
                    0 <= norm_cx <= 1
                    and 0 <= norm_cy <= 1
                    and 0 <= norm_w <= 1
                    and 0 <= norm_h <= 1
                ):
                    print(
                        f"WARNING: Invalid normalized coordinates for {image_filename}: cx={norm_cx}, cy={norm_cy}, w={norm_w}, h={norm_h}"
                    )
                    continue

                annotations.append(
                    f"0 {norm_cx:.6f} {norm_cy:.6f} {norm_w:.6f} {norm_h:.6f}"
                )

            image_annotations[image_filename] = annotations

        return image_annotations

    def split_dataset(self, image_annotations, train_ratio=0.8):
        print("Splitting dataset...")

        image_files = list(image_annotations.keys())
        train_files, val_files = train_test_split(
            image_files, train_size=train_ratio, random_state=42
        )

        print(f"Training images: {len(train_files)}")
        print(f"Validation images: {len(val_files)}")

        return train_files, val_files

    def copy_files_and_create_labels(self, image_annotations, train_files, val_files):
        print("Copying files and creating labels...")

        for split, files in [("train", train_files), ("val", val_files)]:
            for image_file in files:
                src_image = os.path.join(self.base_images_dir, image_file)
                dst_image = os.path.join(self.dataset_dir, split, "images", image_file)
                shutil.copy2(src_image, dst_image)

                label_file = image_file.replace(".png", ".txt")
                label_path = os.path.join(self.dataset_dir, split, "labels", label_file)

                with open(label_path, "w") as f:
                    for annotation in image_annotations[image_file]:
                        f.write(annotation + "\n")

    def create_yaml_config(self):
        config = {
            "path": os.path.abspath(self.dataset_dir),
            "train": "train/images",
            "val": "val/images",
            "nc": 1,
            "names": ["lesion"],
        }

        config_path = os.path.join(self.dataset_dir, "dataset.yaml")
        with open(config_path, "w") as f:
            yaml.dump(config, f, default_flow_style=False)

        print(f"Created YAML config at {config_path}")
        return config_path

    def prepare_dataset(self):
        self.load_data()
        self.create_dataset_structure()

        image_annotations = self.process_annotations()
        train_files, val_files = self.split_dataset(image_annotations)
        self.train_files, self.val_files = train_files, val_files
        self.copy_files_and_create_labels(image_annotations, train_files, val_files)

        config_path = self.create_yaml_config()
        return config_path

    

    def train_model(self, config_path, model_size, epochs, imgsz=512):
        print(f"Starting YOLO training with {model_size}...")

        model = YOLO(f"{model_size}.pt")
        results = model.train(
            data=config_path,
            epochs=epochs,
            imgsz=imgsz,
            patience=50,
            save=True,
            # device="CUDA",
            device="0,1",
            # workers=4,
            batch=16,
            # batch=96,
            name="lesion_detection",
            exist_ok=True,
            single_cls=True,
            mosaic=0.0,
            mixup=0.0,
            degrees=15.0,
            flipud=0.5,
            fliplr=0.5,
        )

        print("Training completed!")
        return model, results

    def evaluate_model(self, model, dataset_yaml_path=None):
        print("Evaluating model...")

        metrics = model.val(data=dataset_yaml_path)

        print("\n=== Model Evaluation Results ===")
        print(f"mAP@0.5: {metrics.box.map50:.4f}")
        print(f"mAP@0.5:0.95: {metrics.box.map:.4f}")
        print(f"mAP@0.75: {metrics.box.map75:.4f}")
        print(f"Mean Precision: {metrics.box.mp:.4f}")
        print(f"Mean Recall: {metrics.box.mr:.4f}")

        return metrics

    def predict_sample(self, model, image_path, conf_threshold=0.25):
        print(f"Making prediction on {image_path}...")

        results = model.predict(
            image_path,
            conf=conf_threshold,
            save=True,
            show_labels=True,
            show_conf=True,
        )

        return results

    def predict_multiple_samples(self, model, num_samples=10, conf_threshold=0.1):
        print(f"Making predictions on {num_samples} sample images...")

        sample_images = os.listdir(self.base_images_dir)
        if not sample_images:
            print("No images found in base directory")
            return []

        random.seed(42)
        random.shuffle(sample_images)
        selected_samples = sample_images[: min(num_samples, len(sample_images))]

        image_paths = [
            os.path.join(self.base_images_dir, img) for img in selected_samples
        ]

        print(f"Selected images: {selected_samples}")

        try:
            results = model.predict(
                image_paths,
                conf=conf_threshold,
                save=True,
                show_conf=True,
                show_labels=True,
                visualize=True,
            )

            prediction_results = []
            for i, (image_file, result) in enumerate(zip(selected_samples, results)):
                num_detections = len(result.boxes) if result.boxes is not None else 0

                prediction_info = {
                    "image_file": image_file,
                    "image_path": image_paths[i],
                    "results": result,
                    "num_detections": num_detections,
                }

                prediction_results.append(prediction_info)
            return prediction_results

        except Exception as e:
            print(f"ERROR: batch prediction: {e}")
            return []


def evaluate_trained_model():
    trainer = LesionTzetlinTrainer(
        base_images_dir="./Dataset-pruned/base_images",
        lesion_data_path="./Dataset-pruned/lesion_data.csv",
    )

    trained_model_path = (
        "results/iteration-5/runs/detect/lesion_detection/weights/best.pt"
    )
    dataset_yaml_path = "results/iteration-5/yolo_dataset/dataset.yaml"

    if not os.path.exists(trained_model_path):
        print(f"ERROR: Trained model not found at {trained_model_path}")
        return

    if not os.path.exists(dataset_yaml_path):
        print(f"ERROR: Dataset YAML not found at {dataset_yaml_path}")
        return

    model = YOLO(trained_model_path)

    metrics = trainer.evaluate_model(model, dataset_yaml_path)
    print(f"Evaluation completed. mAP@0.5: {metrics.box.map50:.4f}")

    prediction_results = trainer.predict_multiple_samples(
        model, num_samples=30, conf_threshold=0.1
    )

    print(f"\nPredictions generated for {len(prediction_results)} images.")
    for i, pred_info in enumerate(prediction_results):
        print(
            f"  Image {i+1}: {pred_info['image_file']} - Detections: {pred_info['num_detections']}"
        )


def train_model():
    trainer = LesionTzetlinTrainer()
    config_path = trainer.prepare_dataset()

    # Models:
    # yolov8n -> nano version
    # yolov8s -> small version
    # yolov8m -> medium version
    # yolov8l -> large version
    # yolov8x -> extra large version
    model, results = trainer.train_model(
        config_path=config_path,
        model_size="yolo11l",
        epochs=300,
        imgsz=512,
    )

    metrics = trainer.evaluate_model(model)
    prediction_results = trainer.predict_multiple_samples(model, num_samples=10)

    print(f"Model saved")

In [None]:
tzetlin_trainer = LesionTzetlinTrainer()

In [None]:
tzetlin_trainer.train_model()

TypeError: LesionTzetlinTrainer.train_model() missing 3 required positional arguments: 'config_path', 'model_size', and 'epochs'

In [None]:
config_path = tzetlin_trainer.prepare_dataset()

Loading lesion data...
Loaded 1061 lesion annotations
Creating dataset structure...
Processing annotations...
Splitting dataset...
Training images: 663
Validation images: 166
Copying files and creating labels...
Created YAML config at yolo_dataset/dataset.yaml


In [None]:
config_path

'yolo_dataset/dataset.yaml'

In [None]:
from keras.datasets import mnist

2025-09-22 20:49:33.088113: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-22 20:49:33.088754: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-22 20:49:33.154876: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-22 20:49:35.060677: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation or

In [None]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [None]:
X_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(60000, 784))

In [None]:
from pyTsetlinMachine.tm import MultiClassTsetlinMachine
import numpy as np

In [None]:
np.where(X_train > 0)

(array([    0,     0,     0, ..., 59999, 59999, 59999], shape=(8994156,)),
 array([ 5,  5,  5, ..., 24, 24, 24], shape=(8994156,)),
 array([12, 13, 14, ...,  8,  9, 10], shape=(8994156,)))

In [None]:
X_train.shape

(60000, 28, 28)

In [None]:
X_train.reshape((X_train.shape[0], 28*28))

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(60000, 784), dtype=uint8)

In [None]:
np.where(X_train.reshape((X_train.shape[0], 28*28)) > 75, 1, 0) 

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(60000, 784))

In [None]:
X_train = np.where(X_train.reshape((X_train.shape[0], 28*28)) > 75, 1, 0) 
X_test = np.where(X_test.reshape((X_test.shape[0], 28*28)) > 75, 1, 0) 

tm = MultiClassTsetlinMachine(2000, 50, 10.0)
tm

<pyTsetlinMachine.tm.MultiClassTsetlinMachine at 0x7a6691f54110>

In [None]:
tm

<pyTsetlinMachine.tm.MultiClassTsetlinMachine at 0x7026b7db0440>

In [None]:
from pyTsetlinMachine.tm import MultiClassTsetlinMachine
import numpy as np
from time import time

from keras.datasets import mnist

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = np.where(X_train.reshape((X_train.shape[0], 28*28)) > 75, 1, 0) 
X_test = np.where(X_test.reshape((X_test.shape[0], 28*28)) > 75, 1, 0) 

tm = MultiClassTsetlinMachine(2000, 50, 10.0)

print("\nAccuracy over 250 epochs:\n")
for i in range(2):
	start_training = time()
	tm.fit(X_train, Y_train, epochs=1, incremental=True)
	stop_training = time()

	start_testing = time()
	result = 100*(tm.predict(X_test) == Y_test).mean()
	stop_testing = time()

	print("#%d Accuracy: %.2f%% Training: %.2fs Testing: %.2fs" % (i+1, result, stop_training-start_training, stop_testing-start_testing))


Accuracy over 250 epochs:

#1 Accuracy: 93.98% Training: 31.66s Testing: 3.14s
#2 Accuracy: 94.99% Training: 17.14s Testing: 3.08s


In [None]:
tm.predict(X_test[:10])

array([7, 2, 1, 0, 4, 1, 4, 9, 6, 9], dtype=uint32)

In [None]:
result

np.float64(94.99)

In [None]:
1*(tm.predict(X_test) == Y_test).mean()*100

np.float64(94.99)

In [None]:
import numpy as np

In [None]:
img0_path = filenames[0]
img0_path

'data/Dataset-pruned/base_images/131aedfhs6pnf1fvtvp49mldx9653nxv22_45.png'

In [None]:
# tsetlin_train.py
import os
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from pyTsetlinMachine.tm import MultiClassConvolutionalTsetlinMachine2D
from time import time

def load_image_folder(folder, size=(28,28), threshold=128):
    X = []
    y = []
    classes = sorted(os.listdir(folder))
    class_to_idx = {c:i for i,c in enumerate(classes)}
    for c in classes:
        class_dir = os.path.join(folder, c)
        if not os.path.isdir(class_dir):
            continue
        for fname in os.listdir(class_dir):
            print(fname)
            if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
                continue
            path = os.path.join(class_dir, fname)
            img = Image.open(path).convert("L")          # grayscale
            img = img.resize(size, Image.NEAREST)       # resize
            arr = np.array(img)                         # shape (H,W)
            # binarize
            bin_arr = (arr >= threshold).astype(np.uint8)
            X.append(bin_arr)
            y.append(class_to_idx[c])
    X = np.stack(X)   # shape (N, H, W)
    y = np.array(y, dtype=np.int32)
    return X, y, class_to_idx

In [None]:
X_train[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 

In [None]:
img0 = img.resize((128, 128), Image.NEAREST)
img0 = np.array(img0)
img0 = np.where(img0 > 128, 1, 0)


array([[0, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 1],
       ...,
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 0]], shape=(128, 128))

In [None]:
image_paths = tzetlin_trainer.base_images_dir
filenames = [filename for filename in os.listdir(tzetlin_trainer.base_images_dir)]
dataset_dir = tzetlin_trainer.base_images_dir

# size=(512, 512)
threshold=128
img_array = []
for path in filenames:
    path = os.path.join(dataset_dir, path)
    img = Image.open(path).convert("L")          # grayscale
    # img = img.resize(size, Image.NEAREST)       # resize
    img_arr = np.array(img)
    bin_arr = (img_arr >= threshold).astype(np.uint8)
    img_array.append(bin_arr)

In [None]:
df

Unnamed: 0.1,Unnamed: 0,feedback_id,user,image_id,timestamp,frame,lesion_id,lesion_frame,lesion_x,lesion_y,...,lesion_trifurcation,lesion_trifurcationType,lesion_bifurcation,lesion_bifurcationType,lesion_bifurcationAngle,lesion_aortoOstialStenosis,lesion_occlusionLength,lesion_heavyCalcification,lesion_thrombus,lesion_severeTortuosity
0,0,490e3a64-32db-45ec-aa13-6a6718763fcc,lekandnow@gmail.com,12aw4ack71831bocuf5j3pz235kn1v361de,2023-12-23 17:14:50.958991,35,ca8b2bbe-6b6d-4c84-9e53-56f098d582c1,35.0,137.0,261.0,...,False,,False,,,False,LT20,False,False,False
1,1,b2b724db-f6ac-4e98-b208-87c420d0753a,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1n6d1qnz22,2023-12-30 07:18:48.595050,30,21008078-1759-4d9b-b56b-88151c7a077b,30.0,128.0,135.0,...,False,,False,,,False,LT20,False,False,True
2,2,042f66bd-d3d1-4fb0-b339-727345afe47a,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1n8gd3gs22,2023-12-30 07:18:48.487550,23,4712cf93-c404-4fb3-89ba-66225870bb3d,23.0,254.0,211.0,...,False,,False,,,False,LT20,False,False,True
3,3,04b3ae76-d9dc-4d07-babe-17e1dbd65f47,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1na8zz7s22,2023-12-30 07:18:48.773056,29,f20650d1-ee08-42d9-be4e-dd5af8d10320,29.0,130.0,120.0,...,False,,False,,,False,LT20,False,False,True
4,4,09af7dfc-034c-4f89-919d-88363c0c8aa6,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1nekk6h022,2023-12-30 07:18:48.993038,29,6e2e2ad0-2403-44ea-9b44-2370eec3aa4d,29.0,233.0,400.0,...,False,,True,A,LT70,False,LT20,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1056,1144,1ecaaa15-1f11-46c4-992c-e13af7d91718,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u231,2023-12-23 12:23:43.381127,35,2ee857ea-87e2-4a3b-9873-80b71869715e,35.0,6.0,272.0,...,False,,False,,,False,LT20,False,False,False
1057,1145,1ecaaa15-1f11-46c4-992c-e13af7d91718,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u231,2023-12-23 12:23:43.381127,35,16020273-426f-4325-afbc-8d4fe61e1e39,35.0,263.0,380.0,...,False,,False,,,False,LT20,False,False,False
1058,1146,c969a559-636c-4379-9f15-47ca5c1bcca3,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u4p1,2023-12-23 12:23:43.440082,19,62a49393-76e1-4119-aa2f-67edf4e65654,19.0,170.0,250.0,...,False,,False,,,False,LT20,False,False,False
1059,1147,fb9a0ca3-5edf-4191-b7d4-6cb5ef40e723,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u6v1,2023-12-23 12:23:43.409926,16,81666fab-83e7-41cc-be4f-e17b3107f2fe,16.0,95.0,266.0,...,False,,False,,,False,LT20,False,False,False


In [None]:
id_col = 4
frame_col = 6
lesion_filenames = []
for el in df.itertuples():
    img_id = el[id_col]
    frame_id = el[frame_col]
    lesion_frame = int(el[6])
    lesion_width = el[7]
    assert frame_id == lesion_frame
    assert lesion_width is not None
    img_filename = f"{img_id}_{lesion_frame}.png"
    lesion_filenames.append(img_filename)

In [None]:
lesion_filenames = list(set(lesion_filenames))
lesion_filenames

['131aedfhs6pnf1fvtvp49h4bhdmvwf5x22_19.png',
 '131aedfhs6pnf1fvtvp49mk5l3qcg5hh22_19.png',
 '131aedfhs6pnf1fvtvp49jzq6gxwoxtj22_34.png',
 '131aedfhs6pnf1fvtvp49mjbu3dgtr4822_31.png',
 '131aedfhs6pnf1fvtvp49mj5g3tlqlef22_33.png',
 '131aedfhs6pnf1fvtvp498txzz7lihjn22_31.png',
 '13c2ur549vohc0jat2dvu3xs9w1_23.png',
 '131aedfhs6pnf1fvtvp49mg8hvug5owj22_24.png',
 '131aedfhs6pnf1fvtvp49mj6h8ni59pd22_29.png',
 '131aedfhs6pnf1fvtvp49jwozush898f22_27.png',
 '131aedfhs6pnf1fvtvp49jzll72bstib22_21.png',
 '131aedfhs6pnf1fvtvp49mjf02qnmxg322_45.png',
 '131aedfhs6pnf1fvtvp49jzll75vony422_31.png',
 '131aedfhs6pnf1fvtvp49jxvwaa9o9po22_34.png',
 '131aedfhs6pnf1fvtvp49jwoajvsvw6722_29.png',
 '131aedfhs6pnf1fvtvp49joxclh2wk8y22_23.png',
 '131aedfhs6pnf1fvtvp49mmcakgxr8i422_64.png',
 '131aedfhs6pnf1fvtvp49mh7wxe90n4z22_36.png',
 '131aedfhs6pnf1fvtvp49mlemi1a2kdg22_34.png',
 '131aedfhs6pnf1fvtvp49jvrnoq5zlq022_60.png',
 '131aedfhs6pnf1fvtvp49jstof8iams322_36.png',
 '131aedfhs6pnf1fvtvp49mj8zcmgxp0y22_71.p

In [None]:
len(filenames)

931

In [None]:
# df = pd.read_csv(tzetlin_trainer.lesion_data_path)
Y_train = []
for file_path, img in zip(filenames, img_array):
    if file_path in lesion_filenames:
        Y_train.append(1)
    else:
        Y_train.append(0)

Y_train = np.array(Y_train)
Y_train.shape

(931,)

In [None]:
img_size = (512, 512)

imgs_filtered = []
for img in img_array:
    if img.shape != img_size:
        print(f"Resizing img: {img.shape}")
        img = Image.fromarray(img*255)
        img = img.convert("L")          # grayscale
        img = img.resize(img_size, Image.NEAREST)       # resize
        img_arr = np.array(img)
        bin_arr = (img_arr >= threshold).astype(np.uint8)
        imgs_filtered.append(bin_arr)
    else:
        imgs_filtered.append(img)
    
imgs_filtered = np.array(imgs_filtered)
imgs_filtered.shape

Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)
Resizing img: (1016, 1016)


(931, 512, 512)

In [None]:
# imgs_filtered = np.array([img.reshape(img_size[0] * img_size[1]) for img in imgs_filtered])
imgs_filtered

array([[[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 0, 0, 1],
        [0, 0, 0, ..., 1, 0, 1],
        [0, 0, 0, ..., 0, 0, 1]],

       [[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1]],

       [[1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        ...,
        [0, 0, 0, ..., 

In [None]:
X_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], shape=(60000, 784))

In [None]:
Y_train.shape

(931,)

In [275]:
lesion_trainer = LesionTzetlinTrainer()
dataset_dir = lesion_trainer.base_images_dir          # change to your dataset path
# img_size = (28,28)               # you can change this
# X, y, class_to_idx = load_image_folder(dataset_dir, size=img_size)
# print("Loaded", X.shape, "labels:", np.unique(y))

# train/test split
X = imgs_filtered
y = Y_train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Convert to types expected by pyTsetlinMachine (0/1 uint8)
X_train = X_train.astype(np.uint8)
X_test  = X_test.astype(np.uint8)

In [277]:
X_train

array([[[1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        ...,
        [1, 1, 1, ..., 1, 0, 0],
        [1, 1, 1, ..., 1, 1, 0],
        [1, 1, 1, ..., 0, 0, 1]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1]],

       ...,

       [[1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        ...,
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1]],

       [[1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 1, 1],
        ...,
        [1, 0, 1, ..., 

In [265]:
y_train

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 

In [None]:
# instantiate Convolutional TM
# number_of_clauses, T, s, patch_dim
# tm = MultiClassConvolutionalTsetlinMachine2D(number_of_clauses=2000,
#                                                 T=50*100,   # adjust
#                                                 s=5.0,
#                                                 patch_dim=(8,8),
#                                                 weighted_clauses=True)

tm = MultiClassTsetlinMachine(number_of_clauses=2000,
                                                T=50*100,   # adjust
                                                s=5.0,
                                                # patch_dim=(8,8),
                                                weighted_clauses=True)

# train
epochs = 2
t0 = time()
tm.fit(X_train, y_train, epochs=epochs)
t1 = time()
print(f"Training done in {t1-t0:.1f}s")

MemoryError: Unable to allocate 47.4 GiB for an array with shape (12712486200,) and data type uint32

In [None]:
# predict & evaluate
y_pred = tm.predict(X_test)
acc = np.mean(y_pred == y_test)
print(f"Test accuracy: {acc*100:.2f}%")

# save state (optional)
state = tm.get_state()   # you can later set_state(state)
print("Saved TM state (length):", len(state))


In [28]:
tzetlin_trainer.lesion_df

Unnamed: 0.1,Unnamed: 0,feedback_id,user,image_id,timestamp,frame,lesion_id,lesion_frame,lesion_x,lesion_y,...,lesion_trifurcation,lesion_trifurcationType,lesion_bifurcation,lesion_bifurcationType,lesion_bifurcationAngle,lesion_aortoOstialStenosis,lesion_occlusionLength,lesion_heavyCalcification,lesion_thrombus,lesion_severeTortuosity
0,0,490e3a64-32db-45ec-aa13-6a6718763fcc,lekandnow@gmail.com,12aw4ack71831bocuf5j3pz235kn1v361de,2023-12-23 17:14:50.958991,35,ca8b2bbe-6b6d-4c84-9e53-56f098d582c1,35.0,137.0,261.0,...,False,,False,,,False,LT20,False,False,False
1,1,b2b724db-f6ac-4e98-b208-87c420d0753a,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1n6d1qnz22,2023-12-30 07:18:48.595050,30,21008078-1759-4d9b-b56b-88151c7a077b,30.0,128.0,135.0,...,False,,False,,,False,LT20,False,False,True
2,2,042f66bd-d3d1-4fb0-b339-727345afe47a,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1n8gd3gs22,2023-12-30 07:18:48.487550,23,4712cf93-c404-4fb3-89ba-66225870bb3d,23.0,254.0,211.0,...,False,,False,,,False,LT20,False,False,True
3,3,04b3ae76-d9dc-4d07-babe-17e1dbd65f47,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1na8zz7s22,2023-12-30 07:18:48.773056,29,f20650d1-ee08-42d9-be4e-dd5af8d10320,29.0,130.0,120.0,...,False,,False,,,False,LT20,False,False,True
4,4,09af7dfc-034c-4f89-919d-88363c0c8aa6,lekandnow@gmail.com,131aedfhs6pnf1fvtvp493iu1nekk6h022,2023-12-30 07:18:48.993038,29,6e2e2ad0-2403-44ea-9b44-2370eec3aa4d,29.0,233.0,400.0,...,False,,True,A,LT70,False,LT20,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1056,1144,1ecaaa15-1f11-46c4-992c-e13af7d91718,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u231,2023-12-23 12:23:43.381127,35,2ee857ea-87e2-4a3b-9873-80b71869715e,35.0,6.0,272.0,...,False,,False,,,False,LT20,False,False,False
1057,1145,1ecaaa15-1f11-46c4-992c-e13af7d91718,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u231,2023-12-23 12:23:43.381127,35,16020273-426f-4325-afbc-8d4fe61e1e39,35.0,263.0,380.0,...,False,,False,,,False,LT20,False,False,False
1058,1146,c969a559-636c-4379-9f15-47ca5c1bcca3,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u4p1,2023-12-23 12:23:43.440082,19,62a49393-76e1-4119-aa2f-67edf4e65654,19.0,170.0,250.0,...,False,,False,,,False,LT20,False,False,False
1059,1147,fb9a0ca3-5edf-4191-b7d4-6cb5ef40e723,sgurba@gmail.com,13c2ur549vohc0jat2wqk9u6v1,2023-12-23 12:23:43.409926,16,81666fab-83e7-41cc-be4f-e17b3107f2fe,16.0,95.0,266.0,...,False,,False,,,False,LT20,False,False,False


NameError: name 'lesion_df' is not defined

In [30]:
from pyTsetlinMachine.tm import MultiClassTsetlinMachine