In [None]:
from google.colab import drive
drive.mount('/content/drive')


ValueError: mount failed

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
CLASSES = {
    "healthy": 0,
    # "brb": 1,
    "ub": 1,
    "brb_lite": 2,
    "gi": 3,
    "sitsc": 4
}


In [None]:
BASE = "/content/drive/MyDrive/dataset4"


In [None]:
def collect_pairs(base, cls):
    img_folder = os.path.join(base, f"{cls}_segments")
    feat_folder = os.path.join(base, f"{cls}_features")

    if not os.path.exists(img_folder) or not os.path.exists(feat_folder):
        print(f"[WARN] Missing folder for {cls}")
        return []

    img_files = {f.split('.')[0]: f for f in os.listdir(img_folder) if f.endswith(".png")}
    feat_files = {f.split('.')[0]: f for f in os.listdir(feat_folder) if f.endswith(".csv")}

    pairs = []
    for key in img_files:
        if key in feat_files:
            pairs.append((
                os.path.join(img_folder, img_files[key]),
                os.path.join(feat_folder, feat_files[key]),
                CLASSES[cls]
            ))

    return pairs


In [None]:
all_samples = []

for cls in CLASSES:
    pairs = collect_pairs(BASE, cls)
    print(cls, "â†’", len(pairs), "samples")
    all_samples.extend(pairs)

df = pd.DataFrame(all_samples, columns=["img_path", "feat_path", "label"])
print("\nLabel counts:")
print(df["label"].value_counts())


In [None]:
IMG_SIZE = 64

def load_image(path):
    img = cv2.imread(path)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype("float32") / 255.0
    return img

def load_features(path):
    arr = np.loadtxt(path, delimiter=",")
    return arr


In [None]:
import glob
import numpy as np
import pandas as pd
import cv2
from joblib import Parallel, delayed  # parallel loading



In [None]:
IMG_SIZE = 64

def fast_load_sample(img_path, feat_path, label):
    # load img
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype("float32") / 255.0

    # load numeric features (very fast)
    feat = np.loadtxt(feat_path, delimiter=",")

    return img, feat, label



In [None]:
from joblib import Parallel, delayed

all_imgs = []
all_feats = []
all_labels = []

results = Parallel(n_jobs=8, backend="threading")(
    delayed(fast_load_sample)(row.img_path, row.feat_path, row.label)
    for row in df.itertuples()
)

for img, feat, label in results:
    all_imgs.append(img)
    all_feats.append(feat)
    all_labels.append(label)


In [None]:
import numpy as np
from sklearn.impute import SimpleImputer

# Ensure features is a 2D NumPy array, assuming 'all_feats' is the source list of feature arrays
features = np.array(all_feats)

# Handle infinite values by replacing them with NaN
features[np.isinf(features)] = np.nan

# Impute NaN values (which now include the original infinite values)
# with the mean of each column. A SimpleImputer is used for this.
imputer = SimpleImputer(strategy='mean')
features_imputed = imputer.fit_transform(features)

# Scale the imputed features
scaler = StandardScaler()
features = scaler.fit_transform(features_imputed)

In [None]:
X_img_train, X_img_test, X_num_train, X_num_test, y_train, y_test = train_test_split(
    all_imgs, features, all_labels, test_size=0.2, stratify=all_labels, random_state=42)

In [None]:
img_in = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

x = Conv2D(32, (3,3), activation='relu')(img_in)
x = MaxPooling2D()(x)

x = Conv2D(64, (3,3), activation='relu')(x)
x = MaxPooling2D()(x)

x = Conv2D(128, (3,3), activation='relu')(x)
x = GlobalAveragePooling2D()(x)


In [None]:
num_in = Input(shape=(features.shape[1],))
y = Dense(64, activation='relu')(num_in)
y = Dense(32, activation='relu')(y)


In [None]:
combined = concatenate([x, y])
z = Dense(64, activation='relu')(combined)
z = Dense(len(CLASSES), activation='softmax')(z)

model = Model(inputs=[img_in, num_in], outputs=z)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


In [None]:
history = model.fit(
    [np.array(X_img_train), X_num_train],
    np.array(y_train),
    validation_split=0.2,
    epochs=5,
    batch_size=32
)

In [None]:
test_pred = model.predict([np.array(X_img_test), X_num_test])
test_pred = np.argmax(test_pred, axis=1)

acc = accuracy_score(y_test, test_pred)
print("Test Accuracy:", acc)

In [None]:
cm = confusion_matrix(y_test, test_pred)

plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=CLASSES.keys(),
            yticklabels=CLASSES.keys(),
            cmap="Blues")
plt.title("CNN Confusion Matrix")
plt.show()


In [None]:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(history.history["loss"], label="train")
plt.plot(history.history["val_loss"], label="val")
plt.title("Loss")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history["accuracy"], label="train")
plt.plot(history.history["val_accuracy"], label="val")
plt.title("Accuracy")
plt.legend()
plt.show()