In [27]:
import numpy as np
import cv2
import os
import random
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib


# Function to load images from folder, picking 1200 random images from each class

In [14]:
def load_images_from_folder(folder, num_images_per_class=1200):
    images = []
    labels = []
    for label in os.listdir(folder):
        class_folder = os.path.join(folder, label)
        if not os.path.isdir(class_folder):
            print(f"Skipping non-directory item: {class_folder}")
            continue
        all_images = os.listdir(class_folder)
        if len(all_images) < num_images_per_class:
            print(f"Not enough images in {class_folder}. Found {len(all_images)}, required {num_images_per_class}")
            continue
        selected_images = random.sample(all_images, num_images_per_class)
        for filename in selected_images:
            img_path = os.path.join(class_folder, filename)
            img = cv2.imread(img_path)
            if img is None:
                print(f"Failed to load image: {img_path}")
                continue
            img = cv2.resize(img, (256, 256))  # Resizing images to a consistent size 
            images.append(img)
            labels.append(label)
    return np.array(images), np.array(labels)

# Function to extract color histograms

In [15]:
def extract_color_histograms(images):
    histograms = []
    for img in images:
        hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
        hist = cv2.normalize(hist, hist).flatten()
        histograms.append(hist)
    return np.array(histograms)

# Function to extract Texture Features (LBP)

In [16]:
def extract_lbp_features(images):
    features = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        lbp = local_binary_pattern(gray, 8, 1, method='uniform')
        hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 10), range=(0, 9))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-7)
        features.append(hist)
    return np.array(features)

# Loading images and labels

In [19]:
folder = '/Users/raghavsenwal/Downloads/data_256' 
images, labels = load_images_from_folder(folder)
print(f"Loaded {len(images)} images with {len(labels)} labels.")
print(f"Unique labels found: {set(labels)}")

Skipping non-directory item: /Users/raghavsenwal/Downloads/data_256/.DS_Store
Loaded 6000 images with 6000 labels.
Unique labels found: {'SubwayStation ', 'Restaurant', 'Bar', 'HospitalRoom', 'Gymnasium'}


# Extracting color histograms

In [20]:
X_color_hist = extract_color_histograms(images)

# Extract LBP features

In [None]:
X_lbp = extract_lbp_features(images)

# Flatten images for the classifier

In [None]:
images_flattened = images.reshape((images.shape[0], -1))

# Concatenate features with flattened pixel values

In [None]:
X_combined = np.concatenate((images_flattened, X_color_hist, X_lbp), axis=1)

# Split the data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_combined, labels, test_size=0.2, random_state=42)

# Training the Decision Tree classifier

In [21]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predicting and evaluating the Decision Tree classifier

In [30]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Classification report and metrics

In [31]:
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print(f'Recall: {recall * 100:.2f}%')
print(f'F1 Score: {f1 * 100:.2f}%')
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 38.67%
Precision: 39.00%
Recall: 38.67%
F1 Score: 38.67%

Classification Report:
                 precision    recall  f1-score   support

           Bar       0.40      0.41      0.40       232
     Gymnasium       0.49      0.41      0.44       268
  HospitalRoom       0.42      0.50      0.45       212
    Restaurant       0.27      0.28      0.27       245
SubwayStation        0.37      0.35      0.36       243

      accuracy                           0.39      1200
     macro avg       0.39      0.39      0.39      1200
  weighted avg       0.39      0.39      0.39      1200



# Training the Random Forest classifier

In [32]:
clf_rf = RandomForestClassifier(random_state=43)
clf_rf.fit(X_train, y_train)

# Predict and evaluate the Random Forest Ensemble

In [35]:
y_pred_rf = clf_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted')
recall_rf = recall_score(y_test, y_pred_rf, average='weighted')
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')

 # Save the trained model to a file
model_filename = 'random_forest_model.joblib'
joblib.dump(clf, model_filename)
print(f'Model saved to {model_filename}')

# Classification reports and metrics for Random Forest Ensemble

In [34]:
print(f'Accuracy: {accuracy_rf * 100:.2f}%')
print(f'Precision: {precision_rf * 100:.2f}%')
print(f'Recall: {recall_rf * 100:.2f}%')
print(f'F1 Score: {f1_rf * 100:.2f}%')
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))

Accuracy: 52.17%
Precision: 52.23%
Recall: 52.17%
F1 Score: 51.56%

Classification Report:
                 precision    recall  f1-score   support

           Bar       0.51      0.56      0.54       232
     Gymnasium       0.59      0.62      0.60       268
  HospitalRoom       0.48      0.67      0.56       212
    Restaurant       0.49      0.40      0.44       245
SubwayStation        0.52      0.37      0.43       243

      accuracy                           0.52      1200
     macro avg       0.52      0.52      0.52      1200
  weighted avg       0.52      0.52      0.52      1200



# Loading the model from a file

In [None]:
loaded_clf = joblib.load(model_filename)
loaded_y_pred = loaded_clf.predict(X_test)
loaded_accuracy = accuracy_score(y_test, loaded_y_pred)
print(f'Loaded Model Accuracy: {loaded_accuracy * 100:.2f}%')