# Charles & Cole

In [None]:
import cv2
import os
import zipfile
import numpy as np

from google.colab import drive
from google.colab.patches import cv2_imshow
drive.mount('/content/drive')

import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive


##Unzip the compressed data folder 
setup directories

In [None]:
#unzip data [Charles]
#with zipfile.ZipFile(r'/content/drive/MyDrive/UTD/.UTD 2023 Spring/4391/ProjData.zip', 'r') as zip_ref:
#    zip_ref.extractall('/content')

In [None]:
#unzip data [Cole]
with zipfile.ZipFile(r'/content/drive/MyDrive/Laptop Sync/UTD/CS 4391/Term Project/ProjData.zip', 'r') as zip_ref:
    zip_ref.extractall('/content')

In [None]:
# Change all directory names to lowercase
!mv /content/ProjData/Test /content/ProjData/test
!mv /content/ProjData/Train /content/ProjData/train
for folder in ['/content/ProjData/test', '/content/ProjData/train']:
    !mv {folder}/Coast {folder}/coast
    !mv {folder}/Forest {folder}/forest
    !mv {folder}/Bedroom {folder}/bedroom

# Define the path to the output directory for resized images
!mkdir 'Pre-Processed_files'
resized_dir = '/content/Pre-Processed_files'
!mkdir 'Histogram_data'
hist_dir = '/content/Histogram_data'

# Define the path to the train and test images directory
train_dirs = ['/content/ProjData/train/coast', '/content/ProjData/train/forest', '/content/ProjData/train/bedroom']
test_dirs = ['/content/ProjData/test/coast', '/content/ProjData/test/forest', '/content/ProjData/test/bedroom']

mv: cannot stat '/content/ProjData/test/Coast': No such file or directory
mv: cannot stat '/content/ProjData/test/Forest': No such file or directory
mv: cannot stat '/content/ProjData/test/Bedroom': No such file or directory
mv: cannot stat '/content/ProjData/train/Bedroom': No such file or directory


## Pre-processing
resize all training images to 200\*200 and 50\*50 size images

In [None]:
!mkdir '/content/Pre-Processed_files/train'
!mkdir '/content/Pre-Processed_files/test'

In [None]:
!mkdir '/content/Pre-Processed_files/train/coast/'
!mkdir '/content/Pre-Processed_files/train/forest/'
!mkdir '/content/Pre-Processed_files/train/bedroom'

!mkdir '/content/Pre-Processed_files/test/coast/'
!mkdir '/content/Pre-Processed_files/test/forest/'
!mkdir '/content/Pre-Processed_files/test/bedroom'

In [None]:
def adjust_brightness(img, avg_brightness):
    if avg_brightness < 0.4:
        return cv2.convertScaleAbs(img, alpha=1.5, beta=50)
    elif avg_brightness > 0.6:
        return cv2.convertScaleAbs(img, alpha=0.75, beta=-50)
    else:
        return img

def preprocess_images(image_folder):
    images_250 = list()
    images_200 = list()
    images_50 = list()
    labels = list()
    preproc_out = "/content/Pre-Processed_files/train" if image_folder.find('train') != -1 else "/content/Pre-Processed_files/test"
    index = 0
    for category in ["coast", "forest", "bedroom"]:
        category_folder = os.path.join(image_folder, category)
        for filename in os.listdir(category_folder):
            img_path = os.path.join(category_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            avg_brightness = np.mean(img) / 255
            
            img = adjust_brightness(img, avg_brightness)
            img_200 = cv2.resize(img, (200, 200))
            img_50 = cv2.resize(img, (50, 50))
            
            images_250.append(img)
            images_200.append(img_200)
            images_50.append(img_50)
            labels.append(category)
            cv2.imwrite((preproc_out + f"/{category}/img250_{index}.jpg"), img)
            cv2.imwrite((preproc_out + f"/{category}/img200_{index}.jpg"), img_200)
            cv2.imwrite((preproc_out + f"/{category}/img50_{index}.jpg"), img_50)
            index+=1
    
    return np.array(images_250), np.array(images_200), np.array(images_50), np.array(labels)

In [None]:
!mkdir /content/SIFT_Data/
!mkdir /content/SIFT_Data/Train
!mkdir /content/SIFT_Data/Test

In [None]:
#2.	Extract SIFT features on ALL training images and save the data. 
def extract_SIFT_features(images, labels, isTest=False, descriptors=500):
  features = list()
  sift = cv2.SIFT_create()
  index = 0

  for img in images:
    kp, des = sift.detectAndCompute(img, None)
    try:
      #print(len(des))
      if len(des) > descriptors:
        des = des[:descriptors,:]
      elif len(des) < descriptors:
        des = np.pad(des, ((0, descriptors-len(des)),(0,0)), mode='constant')
      features.append(des.flatten())
      if isTest:
        with open(f"/content/SIFT_Data/Test/test{labels[index]}_{img.shape}_{index}.txt", 'w') as f:
          f.write(np.array2string(des))
          f.close()
      else:
        with open(f"/content/SIFT_Data/Train/train{labels[index]}_{img.shape}_{index}.txt", 'w') as f:
          f.write(np.array2string(des))
          f.close()
    except:
      print("why is this empty")
    
    index += 1
  print(len(features), features[0].shape, features[-1].shape)
  return np.array(features)

In [None]:
#3.	Extract Histogram features on ALL training images and save the data. 
def extract_histogram_features(images, labels, bins=30, isTest=False):
    features = []
    index = 0
    for img in images:
        hist = cv2.calcHist([img], [0], None, [bins], [0, 255])
        features.append(hist.flatten())
        if isTest:
          with open(f"/content/Histogram_data/test_{labels[index]}{index}_{img.shape}.txt", 'w') as f:
            f.write(np.array2string(hist))
            f.close()
        else:
          with open(f"/content/Histogram_data/train_{labels[index]}{index}_{img.shape}.txt", 'w') as f:
            f.write(np.array2string(hist))
            f.close()
        index +=1

    return np.array(features)

## Perform 4 training on the data

In [None]:
#c.	Represent the image using histogram feature data and use Nearest Neighbor classifier
def train_classifiers(images_50, hist_features_50, hist_features_200, hist_features_250, labels):
    labels = np.array([int(label == "coast") + 2 * int(label == "forest") for label in labels], dtype=np.float32)
    knn_pixels = cv2.ml.KNearest_create()
    knn_pixels.train(images_50.reshape(len(images_50), -1).astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

    knn_hist_50 = cv2.ml.KNearest_create()
    knn_hist_50.train(hist_features_50.astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

    knn_hist_200 = cv2.ml.KNearest_create()
    knn_hist_200.train(hist_features_200.astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

    knn_hist_250 = cv2.ml.KNearest_create()
    knn_hist_250.train(hist_features_250.astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

    return {'knn_pixels': knn_pixels, 'knn_hist_50': knn_hist_50, 'knn_hist_200' : knn_hist_200, 'knn_hist_250' : knn_hist_250}

In [None]:
#d.	Represent the image using SIFT feature data and use linear SVM classifier

In [None]:
def train_SIFT_classifiers(SIFT_50, SIFT_200, SIFT_250, labels, maxIter = 250, tol = 1e-6, c=0.01):

  knn_SIFT_200 = cv2.ml.KNearest_create()
  knn_SIFT_200.train(SIFT_200.astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

  knn_SIFT_250 = cv2.ml.KNearest_create()
  knn_SIFT_250.train(SIFT_250.astype(np.float32), cv2.ml.ROW_SAMPLE, labels)

  svm_SIFT_200 = cv2.ml.SVM_create()
  svm_SIFT_200.setType(cv2.ml.SVM_C_SVC); svm_SIFT_200.setKernel(cv2.ml.SVM_LINEAR); svm_SIFT_200.setC(c)
  svm_SIFT_200.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, maxIter, tol))
  svm_SIFT_200.train(SIFT_200.astype(np.float32), cv2.ml.ROW_SAMPLE, np.argmax(labels, axis=1))

  svm_SIFT_250 = cv2.ml.SVM_create()
  svm_SIFT_250.setType(cv2.ml.SVM_C_SVC); svm_SIFT_250.setKernel(cv2.ml.SVM_LINEAR); svm_SIFT_250.setC(c)
  svm_SIFT_250.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER, maxIter, tol))
  svm_SIFT_250.train(SIFT_250.astype(np.float32), cv2.ml.ROW_SAMPLE, np.argmax(labels, axis=1))

  return {'knn_SIFT_200' : knn_SIFT_200, 'knn_SIFT_250' : knn_SIFT_250, 'svm_SIFT_200': svm_SIFT_200, 'svm_SIFT_250' : svm_SIFT_250}

## Test the trained classifiers
Report Accuracy, False Positives, and False Negatives

In [None]:
def to_categorical(labels, classnum=3):
  ret = np.zeros((len(labels), classnum))
  for i, label in enumerate(labels):
    #print(i, label)
    ret[i, int(label)] = 1.0
  return ret.astype(np.float32)

In [None]:
def accuracy_score(true_labels, pred_labels):
    return np.mean(true_labels == pred_labels)

def false_positive_rate(true_labels, pred_labels):
    fp = np.sum((true_labels != pred_labels) & (pred_labels == 1))
    tn = np.sum((true_labels == pred_labels) & (true_labels == 0))
    return fp / (fp + tn)

def false_negative_rate(true_labels, pred_labels):
    fn = np.sum((true_labels != pred_labels) & (pred_labels == 0))
    tp = np.sum((true_labels == pred_labels) & (true_labels == 1))
    return fn / (fn + tp)

def evaluate_classifiers(classifiers, test_data, true_labels, knear= 15, SIFT=False):
    results = {}
    preddict = {}
    truedict = {}
    oldlab = true_labels
    if not SIFT:
      true_labels = np.array([int(label == "coast") + 2 * int(label == "forest") for label in true_labels], dtype=np.float32)
    else:
      true_labels = to_categorical((np.array([int(label == "coast") + 2 * int(label == "forest") for label in true_labels], dtype=np.float32)), 3)
    isSVM = False
    for name, clf in classifiers.items():
        try:
          _, pred_labels, _, _ = clf.findNearest(test_data[name].astype(np.float32), k=knear)
          if not SIFT:
            pred_labels = pred_labels.ravel()
        except AttributeError:
            isSVM = True
            pred_labels = clf.predict(test_data[name].astype(np.float32))[1]
        preddict[name] = pred_labels
        truedict[name] = true_labels

        if isSVM:
          true_labels = np.array([int(label == "coast") + 2 * int(label == "forest") for label in oldlab], dtype=np.float32)
          

        accuracy = accuracy_score(true_labels, pred_labels)
        false_positive = false_positive_rate(true_labels, pred_labels)
        false_negative = false_negative_rate(true_labels, pred_labels)

        results[name] = {
            'accuracy': accuracy,
            'false_positive_rate': false_positive,
            'false_negative_rate': false_negative,
        }

    return results, preddict, truedict

In [None]:
def printResults(results):
  for classifier_name, metrics in results.items():
    print(f"Results for {classifier_name}:")
    print(f"  Accuracy: {metrics['accuracy'] * 100:.2f}%")
    print(f"  False Positive Rate: {metrics['false_positive_rate'] * 100:.2f}%")
    print(f"  False Negative Rate: {metrics['false_negative_rate'] * 100:.2f}%")
    print()

In [None]:
img200bins = 50
img250bins = 100

img50descriptors = 30
img200descriptors = 600
img250descriptors = 6000

train_images_250, train_images_200, train_images_50, train_labels = preprocess_images('/content/ProjData/train')

hist_features_50 = extract_histogram_features(train_images_50, labels=train_labels)
hist_features_200 = extract_histogram_features(train_images_200, bins=img200bins, labels=train_labels)
hist_features_250 = extract_histogram_features(train_images_250, bins= img250bins, labels=train_labels)

SIFT_features_50 = extract_SIFT_features(train_images_50, train_labels, descriptors=img50descriptors)
SIFT_features_200 = extract_SIFT_features(train_images_200, train_labels, descriptors=img200descriptors)
SIFT_features_250 = extract_SIFT_features(train_images_250, train_labels, descriptors=img250descriptors)

test_images_250, test_images_200, test_images_50, test_labels = preprocess_images('/content/ProjData/test')

test_hist_features_50 = extract_histogram_features(test_images_50, labels=test_labels, isTest = True)
test_hist_features_200 = extract_histogram_features(test_images_200, labels= test_labels, bins=img200bins, isTest = True)
test_hist_features_250 = extract_histogram_features(test_images_250, labels= test_labels, bins=img250bins, isTest = True)

test_SIFT_features_50 = extract_SIFT_features(test_images_50, test_labels, isTest = True, descriptors=img50descriptors)
test_SIFT_features_200 = extract_SIFT_features(test_images_200, test_labels, isTest = True, descriptors= img200descriptors)
test_SIFT_features_250 = extract_SIFT_features(test_images_250, test_labels, isTest = True, descriptors= img250descriptors)


why is this empty
299 (3840,) (3840,)
300 (76800,) (76800,)
300 (768000,) (768000,)
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
why is this empty
593 (3840,) (3840,)
604 (76800,) (76800,)
604 (768000,) (768000,)


In [None]:
train_onehotlabels = to_categorical(np.array([int(label == "coast") + 2 * int(label == "forest") for label in train_labels], dtype=np.float32))
train_onehotlabels.shape

(300, 3)

In [None]:
test_onehotlabels = to_categorical(np.array([int(label == "coast") + 2 * int(label == "forest") for label in test_labels], dtype=np.float32))
test_onehotlabels.shape

(604, 3)

In [None]:
hist_classifiers = train_classifiers(train_images_50, hist_features_50, hist_features_200, hist_features_250, train_labels)
sift_classifiers = train_SIFT_classifiers(SIFT_features_50, SIFT_features_200, SIFT_features_250, train_onehotlabels)

test_data_hist = {
    'knn_pixels': test_images_50.reshape(len(test_images_50), -1),
    'knn_hist_50': test_hist_features_50.astype(np.float32),
    'knn_hist_200': test_hist_features_200.astype(np.float32),
    'knn_hist_250': test_hist_features_250.astype(np.float32)
}

test_data_sift = {
    'knn_SIFT_200' : test_SIFT_features_200,
    'knn_SIFT_250' : test_SIFT_features_250,
    'svm_SIFT_200' : test_SIFT_features_200,
    'svm_SIFT_250' : test_SIFT_features_250
}

hist_results,_,_ = evaluate_classifiers(hist_classifiers, test_data_hist, test_labels, knear=3)
sift_results, spred, strue = evaluate_classifiers(sift_classifiers, test_data_sift, test_labels, knear=3, SIFT=True)

printResults(hist_results)
printResults(sift_results)

Results for knn_pixels:
  Accuracy: 45.70%
  False Positive Rate: 94.72%
  False Negative Rate: 12.31%

Results for knn_hist_50:
  Accuracy: 50.50%
  False Positive Rate: 28.12%
  False Negative Rate: 71.19%

Results for knn_hist_200:
  Accuracy: 50.99%
  False Positive Rate: 24.72%
  False Negative Rate: 68.21%

Results for knn_hist_250:
  Accuracy: 51.32%
  False Positive Rate: 28.00%
  False Negative Rate: 65.33%

Results for knn_SIFT_200:
  Accuracy: 58.06%
  False Positive Rate: 25.83%
  False Negative Rate: 74.17%

Results for knn_SIFT_250:
  Accuracy: 57.06%
  False Positive Rate: 28.81%
  False Negative Rate: 71.19%

Results for svm_SIFT_200:
  Accuracy: 34.70%
  False Positive Rate: 80.34%
  False Negative Rate: 57.66%

Results for svm_SIFT_250:
  Accuracy: 35.14%
  False Positive Rate: 81.84%
  False Negative Rate: 55.25%

