#Setup and Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#!pip install opencv-contrib-python
# !pip install tqdm

In [None]:
import cv2
import tarfile
import os
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm #progress bars
import gc

#sampling
from collections import defaultdict # can group class without checking if key already exists
import random

#data
from sklearn.preprocessing import LabelEncoder
from pycocotools.coco import COCO
from collections import Counter

#clustering algs
from sklearn.cluster import MiniBatchKMeans
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA

#classifiers
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
#metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
#viz
import matplotlib.pyplot as plt

#Loading and sampling iCubWorld

In [None]:
def load_icub_from_drive():
    labels = []
    image_paths = []

    dataset_path = '/content/drive/MyDrive/U of Manchester/Robotics Assignment/data/icubworld_data'

    part_folders = [f for f in os.listdir(dataset_path) if f.startswith('part')]

    for part in part_folders:
        part_path = os.path.join(dataset_path, part)

        #class folders
        for class_name in os.listdir(part_path):
            class_path = os.path.join(part_path, class_name)
            if not os.path.isdir(class_path):
                continue

            #object instances
            for instance_name in os.listdir(class_path):
                instance_path = os.path.join(class_path, instance_name)

                if not os.path.isdir(instance_path):
                    continue

                # MIX transformation
                mix_path = os.path.join(instance_path, 'MIX')
                if not os.path.isdir(mix_path):
                    continue

                #go through days
                for day_name in os.listdir(mix_path):
                    day_path = os.path.join(mix_path, day_name)
                    if not os.path.isdir(day_path):
                        continue

                    #only left imags
                    left_path = os.path.join(day_path, 'left')
                    if not os.path.isdir(left_path):
                        continue

                    #get image file
                    for file in os.listdir(left_path):
                        if not file.lower().endswith(('.jpg', '.jpeg', '.png')):
                            continue

                        #now record label and image path for each image
                        img_path = os.path.join(left_path, file)
                        labels.append(class_name)  # use high-level class name
                        image_paths.append(img_path)
    return image_paths, labels

In [None]:
def sample_dataset(image_paths, labels, max_per_class=500, seed=42):
    #sample
    class_to_images = defaultdict(list)
    for path, label in zip(image_paths, labels):
        class_to_images[label].append(path)

    random.seed(seed)

    #get sample from each class
    sampled_image_paths = []
    sampled_labels = []

    for label, paths in class_to_images.items():
        random.shuffle(paths)
        selected = paths[:max_per_class]
        sampled_image_paths.extend(selected)
        sampled_labels.extend([label] * len(selected))

    # shuffle data -- avoid order bias
    combined = list(zip(sampled_image_paths, sampled_labels))
    random.shuffle(combined)
    sampled_image_paths, sampled_labels = zip(*combined)

    return list(sampled_image_paths), list(sampled_labels)


#Loading COCO

In [None]:
def build_image_label_map(coco, top_k=20, target_count=8000):
    img_ids = coco.getImgIds()
    temp_labels = {}
    cat_count = Counter()

    # most common object per image - no filtering yet
    for img_id in img_ids:
        anns = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
        if not anns:
            continue
        cat_ids = [ann['category_id'] for ann in anns]
        most_common_cat = Counter(cat_ids).most_common(1)[0][0]
        fname = coco.loadImgs(img_id)[0]['file_name']
        temp_labels[fname] = most_common_cat
        cat_count[most_common_cat] += 1

    #get only the top k categories
    top_cats = set([cat for cat, _ in cat_count.most_common(top_k)])
    label_to_index = {cat_id: idx for idx, cat_id in enumerate(sorted(top_cats))}

    # get images from top_k categories only until target_count is reached
    image_label_map = {}
    class_image_counts = defaultdict(int)

    for fname, cat_id in temp_labels.items():
        if cat_id in top_cats:
            image_label_map[fname] = label_to_index[cat_id]
            class_image_counts[cat_id] += 1
            if len(image_label_map) >= target_count:
                break

    return image_label_map, label_to_index

In [None]:
def paths_and_labels(image_label_map, img_dir):
    paths = []
    labels = []
    for fname, label in image_label_map.items():
        path = os.path.join(img_dir, fname)
        if os.path.exists(path):
            paths.append(path)
            labels.append(label)
    return paths, labels

# Feature extractor methods - 2 choices (SIFT, ORB)



In [None]:
#CHOICE 1: SIFT
def get_sift_descriptors(image_paths, resize_to=(256, 256)):
    sift = cv2.SIFT_create()
    img_descriptors = []
    all_descriptors = []

    for path in tqdm(image_paths, desc="Extracting SIFT descriptors"):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if resize_to:
            img = cv2.resize(img, resize_to)

        #this finds DoG interest point (sparse)
        keypoints, descriptors = sift.detectAndCompute(img, None)

        if descriptors is not None:
            img_descriptors.append(descriptors)
            all_descriptors.extend(descriptors)
        else:
            img_descriptors.append(np.array([]))

    return all_descriptors, img_descriptors

In [None]:
#CHOICE 2: ORB
def get_orb_descriptors(image_paths, resize_to=(256, 256), max_features=1500):
    orb = cv2.ORB_create(nfeatures=max_features)
    img_descriptors = []
    all_descriptors = []

    for path in tqdm(image_paths, desc="Extracting ORB descriptors"):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if resize_to:
            img = cv2.resize(img, resize_to)

        keypoints, descriptors = orb.detectAndCompute(img, None)

        if descriptors is not None:
            img_descriptors.append(descriptors)
            all_descriptors.extend(descriptors)
        else:
            img_descriptors.append(np.array([]))

    return all_descriptors, img_descriptors

# Clustering algorithm functions (building visual vocab) -- 2 choices (KMeans, Gaussian Mixture Models)

In [None]:
#CHOICE 1: KMEANS
def build_histograms_kmeans(descriptors, kmeans, k):
    #initalize hist
    hist = np.zeros(k)
    if descriptors is not None and len(descriptors) > 0:
        words_in_image = kmeans.predict(descriptors)
        #update histogram (word is an id)
        for word in words_in_image:
            hist[word] += 1
    return hist

In [None]:
#CHOICE 2: GMM
def build_histograms_gmm(descriptors, gmm, k):
    hist = np.zeros(k)
    if descriptors is not None and len(descriptors) > 0:
        words_in_image = gmm.predict(descriptors)
        for word in words_in_image:
            hist[word] += 1
    return hist

# Image classifiers functions -- 2 choices (SVM, naive bayes)

In [None]:
def evaluate_classifier(X_train, y_train, X_val, y_val, classifier='svm'):

    if classifier == 'svm':
        model = SVC(kernel='linear', random_state=42)
    elif classifier == 'nb':
        model = GaussianNB()
    else:
        raise ValueError("Unsupported classifier.")

    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_val, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_val, y_pred, average='macro', zero_division=0)

    print(f"{classifier.upper()} Results:")
    print(f"  Accuracy : {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall   : {recall:.4f}")
    print(f"  F1-score : {f1:.4f}")

    return model, {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}


# Experiments (24 total)

## RUN SIFT and ORB once, so I can reload in the future

In [None]:
# #RUN SIFT ONCE
# all_descriptors, image_descriptors = get_sift_descriptors(sampled_image_paths)
# # save
# with open("sift_descriptors_icub.pkl", "wb") as f:
#     pickle.dump((image_descriptors, all_descriptors), f)

In [None]:
# #RUN ORB
# # RUN ORB ONCE
# all_descriptors, image_descriptors = get_orb_descriptors(sampled_image_paths)

# # Save to file
# with open("orb_descriptors_icub.pkl", "wb") as f:
#     pickle.dump((image_descriptors, all_descriptors), f)

###CHECK that the .pkl files look *correct*

In [None]:
# # -- load descriptors for each method (SIFT and ORB)-- #
# with open("sift_descriptors_icub.pkl", "rb") as f:
#     sift_image_descriptors, sift_all_descriptors = pickle.load(f)
# with open("orb_descriptors_icub.pkl", "rb") as f:
#     orb_image_descriptors, orb_all_descriptors = pickle.load(f)

In [None]:
# print("SIFT:")
# print(f"  Total images: {len(sift_image_descriptors)}")
# print(f"  Total descriptors: {len(sift_all_descriptors)}")
# print(f"  Descriptors from first image: {sift_image_descriptors[0].shape if len(sift_image_descriptors[0]) > 0 else 'None'}")

# print("\nORB:")
# print(f"  Total images: {len(orb_image_descriptors)}")
# print(f"  Total descriptors: {len(orb_all_descriptors)}")
# print(f"  Descriptors from first image: {orb_image_descriptors[0].shape if len(orb_image_descriptors[0]) > 0 else 'None'}")


In [None]:
# #VISUALIZE SIFT

# # Load an image
# img_path = sampled_image_paths[0]
# img = cv2.imread(img_path)
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# # Recompute SIFT keypoints for comparison
# sift = cv2.SIFT_create()
# keypoints, _ = sift.detectAndCompute(gray, None)

# # Draw keypoints
# img_with_kp = cv2.drawKeypoints(img, keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# plt.figure(figsize=(8, 6))
# plt.imshow(cv2.cvtColor(img_with_kp, cv2.COLOR_BGR2RGB))
# plt.title("SIFT Keypoints")
# plt.axis('off')
# plt.show()

In [None]:
# ##VISUALIZE ORB
# # Pick a sample image path
# img_path = sampled_image_paths[0]

# # Load the image
# img = cv2.imread(img_path)
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# # Detect ORB keypoints
# orb = cv2.ORB_create()
# keypoints, _ = orb.detectAndCompute(gray, None)

# # Draw keypoints
# img_with_kp = cv2.drawKeypoints(img, keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

# # Display
# plt.figure(figsize=(8, 6))
# plt.imshow(cv2.cvtColor(img_with_kp, cv2.COLOR_BGR2RGB))
# plt.title("ORB Keypoints")
# plt.axis('off')
# plt.show()


In [None]:
##old code -- delete soon
# -- run for each feature extraction method -- #
# for feature_method in feature_methods:

#     if feature_method == 'sift':
#         image_descriptors = sift_image_descriptors
#         all_descriptors = sift_all_descriptors
#     elif feature_method == 'orb':
#         image_descriptors = orb_image_descriptors
#         all_descriptors = orb_all_descriptors
#     else:
#         raise ValueError("Unsupported feature extraction method.")

##EXPERIMENTS iCubWorld -- 2 feature methods, 3 vocab sizes, 2 clustering methods, 2 classifiers

In [None]:
vocab_sizes = [100, 250, 500]
classifiers = ['svm', 'nb']
results = []
cluster_methods = ['kmeans','gmm']
feature_methods = ['sift','orb']
data = 'coco' ##SWITCH DATASET HERE

if data == 'icubworld':
    #LOAD icubworld data
    image_paths, labels = load_icub_from_drive()
    sampled_image_paths, sampled_labels = sample_dataset(image_paths, labels, max_per_class=500)

    #label encoder helps map between labels and unique IDs -- easy to convert back and forth
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(sampled_labels)

    #split
    image_paths_trainval, image_paths_test, y_trainval, y_test = train_test_split(
    sampled_image_paths, sampled_labels, test_size=0.1, stratify=sampled_labels, random_state=42
    )
    image_paths_train, image_paths_val, y_train, y_val = train_test_split(
        image_paths_trainval, y_trainval, test_size=0.2, stratify=y_trainval, random_state=42
    )

elif data == 'coco':
    annotation_path_val = '/content/drive/MyDrive/U of Manchester/Robotics Assignment/data/coco/annotations/instances_val2017.json'
    annotation_path_train = '/content/drive/MyDrive/U of Manchester/Robotics Assignment/data/coco/annotations/instances_train2017.json'
    coco_val = COCO(annotation_path_val)
    coco_train = COCO(annotation_path_train)

    #coco only has train and val
    train_image_label_map, train_label_to_index = build_image_label_map(coco_train, target_count=8000)
    val_image_label_map,   val_label_to_index   = build_image_label_map(coco_val, target_count=2000)

    all_labels = set(train_image_label_map.values()) | set(val_image_label_map.values())
    label_encoder = LabelEncoder()
    label_encoder.fit(list(sorted(all_labels)))

    local_train_path = "/content/train2017"
    local_val_path   = "/content/val2017"

    # unzip
    if not os.path.exists(local_train_path):
        print("Unzipping training images...")
        !unzip -q "/content/drive/MyDrive/U of Manchester/Robotics Assignment/data/train2017.zip" -d /content/
    if not os.path.exists(local_val_path):
        print("Unzipping validation images...")
        !unzip -q "/content/drive/MyDrive/U of Manchester/Robotics Assignment/data/val2017.zip" -d /content/

    image_paths_train, y_train = paths_and_labels(train_image_label_map, local_train_path)
    image_paths_val, y_val = paths_and_labels(val_image_label_map, local_val_path)

    # encode labels
    train_labels = label_encoder.transform(train_labels)
    val_labels  = label_encoder.transform(val_labels)

print("Dataset: ", data)
print("Train images:", len(image_paths_train))
print("Val images:  ", len(image_paths_val))

#note no test labels are released for coco
if data == 'icubworld':
  print("Test images: ", len(image_paths_test))

for feature_method in feature_methods:
    # extract descriptors
    if feature_method == 'sift':
        all_descriptors_train, image_descriptors_train = get_sift_descriptors(image_paths_train)
        _, image_descriptors_val = get_sift_descriptors(image_paths_val)

    elif feature_method == 'orb':
        all_descriptors_train, image_descriptors_train = get_orb_descriptors(image_paths_train)
        _, image_descriptors_val = get_orb_descriptors(image_paths_val)

    all_descriptors_train = np.array(all_descriptors_train, dtype=np.float32)

    for clustering_alg in cluster_methods:
      #one run for each vocab size
      for k in vocab_sizes:
          if clustering_alg == 'kmeans':
              kmeans = MiniBatchKMeans(n_clusters=k, random_state=42, batch_size=1000, verbose=1)
              kmeans.fit(all_descriptors_train.astype(np.float32))
              X = [] #each row is histogram of visual words for one image

              X_train = [build_histograms_kmeans(d.astype(np.float32), kmeans, k) if len(d) > 0 else np.zeros(k)
                        for d in image_descriptors_train]
              X_val = [build_histograms_kmeans(d.astype(np.float32), kmeans, k) if len(d) > 0 else np.zeros(k)
                      for d in image_descriptors_val]
          elif clustering_alg == 'gmm':
              #take samle of descriptors bc colab keeps crashing
              sample_size = 100000
              if len(all_descriptors_train) > sample_size:
                  indices = np.random.choice(len(all_descriptors_train), sample_size, replace=False)
                  sampled_descriptors = all_descriptors_train[indices]
              else:
                  sampled_descriptors = all_descriptors_train

              #add pca to reduce dimensionality
              pca = PCA(n_components=16 if feature_method == 'orb' else 64, random_state=42) #why is this the case?
              pca.fit(sampled_descriptors)
              pca_all_descriptors = pca.transform(sampled_descriptors)

              gmm = GaussianMixture(n_components=k, random_state=42, verbose=1, covariance_type='diag')
              gmm.fit(pca_all_descriptors)

              X_train = []
              for d in image_descriptors_train:
                  if d is None or len(d) == 0:
                      X_train.append(np.zeros(k))
                  else:
                      d_pca = pca.transform(d.astype(np.float32))
                      X_train.append(build_histograms_gmm(d_pca, gmm, k))
              X_val = []
              for d in image_descriptors_val:
                  if d is None or len(d) == 0:
                      X_val.append(np.zeros(k))
                  else:
                      d_pca = pca.transform(d.astype(np.float32))
                      X_val.append(build_histograms_gmm(d_pca, gmm, k))

          for classifier in classifiers:
              model, metrics = evaluate_classifier(X_train, y_train, X_val, y_val, classifier=classifier)
              results.append({
                    'feature': feature_method,
                    'clustering': clustering_alg,
                    'vocab_size': k,
                    'classifier': classifier,
                    **metrics
                })
              print(f"[{feature_method.upper()} | {clustering_alg.upper()} | k={k} | {classifier.upper()}] "
                      f"Accuracy: {metrics['accuracy']:.4f}, F1: {metrics['f1']:.4f}")
              #reduce memory
              del model, metrics
              gc.collect()
          #CLEANUP MEMORY -- colab is crashing from high ram
          del X_train, X_val
          if clustering_alg == 'kmeans':
              del kmeans
          elif clustering_alg == 'gmm':
              del gmm, pca
          gc.collect()

df = pd.DataFrame(results)
df.to_csv(f"{feature_method}_experiments_results.csv", index=False)
print("\nsaved results!'")

loading annotations into memory...
Done (t=1.31s)
creating index...
index created!
loading annotations into memory...
Done (t=34.08s)
creating index...
index created!
Dataset:  coco
Train images: 8000
Val images:   2000


Extracting SIFT descriptors: 100%|██████████| 8000/8000 [03:11<00:00, 41.77it/s]
Extracting SIFT descriptors: 100%|██████████| 2000/2000 [00:46<00:00, 43.27it/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Minibatch step 1492/336270: mean batch inertia: 73678.744, ewa inertia: 81762.33309061355
Minibatch step 1493/336270: mean batch inertia: 74220.296, ewa inertia: 81757.84740074401
Minibatch step 1494/336270: mean batch inertia: 74671.096, ewa inertia: 81753.63249586029
Minibatch step 1495/336270: mean batch inertia: 72757.136, ewa inertia: 81748.28175405036
Minibatch step 1496/336270: mean batch inertia: 73402.488, ewa inertia: 81743.31802315828
Minibatch step 1497/336270: mean batch inertia: 73819.888, ewa inertia: 81738.6054966406
Minibatch step 1498/336270: mean batch inertia: 74890.96, ewa inertia: 81734.53280200048
Minibatch step 1499/336270: mean batch inertia: 72711.016, ewa inertia: 81729.16598963665
Minibatch step 1500/336270: mean batch inertia: 73061.624, ewa inertia: 81724.01089628063
Minibatch step 1501/336270: mean batch inertia: 74181.704, ewa inertia: 81719.5250459419
Minibatch step 1502/336270: mean batch

Extracting ORB descriptors: 100%|██████████| 8000/8000 [01:13<00:00, 108.85it/s]
Extracting ORB descriptors: 100%|██████████| 2000/2000 [00:17<00:00, 112.05it/s]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Minibatch step 9911/802009: mean batch inertia: 92528.736, ewa inertia: 92929.03155674887
Minibatch step 9912/802009: mean batch inertia: 90487.968, ewa inertia: 92928.42281970277
Minibatch step 9913/802009: mean batch inertia: 91871.6, ewa inertia: 92928.15927588813
Minibatch step 9914/802009: mean batch inertia: 91429.056, ewa inertia: 92927.7854389606
Minibatch step 9915/802009: mean batch inertia: 89978.656, ewa inertia: 92927.05000364664
Minibatch step 9916/802009: mean batch inertia: 91342.184, ewa inertia: 92926.65477975053
Minibatch step 9917/802009: mean batch inertia: 91695.84, ewa inertia: 92926.3478469174
Minibatch step 9918/802009: mean batch inertia: 91766.704, ewa inertia: 92926.05866224298
Minibatch step 9919/802009: mean batch inertia: 89785.312, ewa inertia: 92925.27544263346
Minibatch step 9920/802009: mean batch inertia: 90259.664, ewa inertia: 92924.61070925069
Minibatch step 9921/802009: mean batch i

In [None]:
##choose run with the highest f1
best_idx = df['f1'].idxmax()
best_config = df.loc[best_idx]
print("Best on val:", best_config.to_dict())

Best on val: {'feature': 'sift', 'clustering': 'kmeans', 'vocab_size': 500, 'classifier': 'svm', 'accuracy': 0.3595, 'precision': 0.11303748918817921, 'recall': 0.10753103731911964, 'f1': 0.10731922387378179}


In [None]:
#test the best model
#get descriptors on test set
if feature_method == 'sift':
    _, image_descriptors_test = get_sift_descriptors(image_paths_test)
else:  # orb
    _, image_descriptors_test = get_orb_descriptors(image_paths_test)

#get best hyperparams
feat   = best_config['feature']
clust  = best_config['clustering']
k      = int(best_config['vocab_size'])
clf_nm = best_config['classifier']

# merge train and val and get descriptors
paths_tv  = image_paths_train + image_paths_val
labels_tv = y_train + y_val

if feat == 'sift':
    all_desc_tv, descs_tv = get_sift_descriptors(paths_tv)
else:
    all_desc_tv, descs_tv = get_orb_descriptors(paths_tv)

all_desc_tv = np.array(all_desc_tv, dtype=np.float32)

# cluster to get codebook
if clust == 'kmeans':
    clust_model = MiniBatchKMeans(n_clusters=k, random_state=42, batch_size=1000)
    clust_model.fit(all_desc_tv)
else:  # GMM + PCA
    sample_size = 100000
    if len(all_desc_tv) > sample_size:
        indices = np.random.choice(len(all_desc_tv), sample_size, replace=False)
        sampled_descriptors = all_desc_tv[indices]
    else:
        sampled_descriptors = all_desc_tv
    pca = PCA(n_components=64 if feat=='sift' else 16, random_state=42)
    pca_all = pca.fit_transform(sampled_descriptors)

    clust_model = GaussianMixture(n_components=k, covariance_type='diag', random_state=42)
    clust_model.fit(pca_all)

def make_hist(d):
return build_histograms_kmeans(d.astype(np.float32), clust_model, k) if len(d)>0 else np.zeros(k)

def make_hist(d):
    if d is None or len(d)==0:
        return np.zeros(k)
    d_pca = pca.transform(d.astype(np.float32))
    return build_histograms_gmm(d_pca, clust_model, k)

# feature matrices for train+val and test
X_tv   = np.vstack([make_hist(d) for d in descs_tv])
X_test = np.vstack([make_hist(d) for d in image_descriptors_test])

#train classifier and evaluate on test
if clf_nm == 'svm':
    final_clf = SVC(random_state=42)
elif clf_nm == 'nb':
    final_clf = GaussianNB()

final_clf.fit(X_tv, labels_tv)
y_pred = final_clf.predict(X_test)

from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

test_acc = accuracy_score(y_test, y_pred)
test_f1  = f1_score(y_test, y_pred, average='weighted')

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(xticks_rotation='vertical')
plt.title("Confusion Matrix")
plt.show()

print(f"\nFINAL TEST → Acc: {test_acc:.4f}, F1: {test_f1:.4f}")


Extracting ORB descriptors: 100%|██████████| 2000/2000 [00:20<00:00, 98.75it/s] 
Extracting SIFT descriptors: 100%|██████████| 10000/10000 [04:02<00:00, 41.29it/s]


ValueError: X has 32 features, but MiniBatchKMeans is expecting 128 features as input.

In [None]:
#precision
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score



precision = precision_score(y_test, y_pred, average='weighted')
print(f"Precision: {precision:.4f}")
recall = recall_score(y_test, y_pred, average='weighted')
print(f"Recall: {recall:.4f}")

precision_micro = precision_score(y_test, y_pred, average='micro', zero_division=0)
recall_micro = recall_score(y_test, y_pred, average='micro', zero_division=0)
print(f"Micro-averaged Precision: {precision_micro:.4f}")
print(f"Micro-averaged Recall: {recall_micro:.4f}")



NameError: name 'y_test' is not defined