In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
import os
from skimage import color
from skimage.feature import hog
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier # using 1NN
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tqdm import tqdm 
from torchvision.utils import make_grid
from torchvision.utils import save_image
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 1. Download the [cow teat datasets](https://github.com/YoushanZhang/SCTL) (10 points) resize image to (224, 224)

### (1). Create a train data loader that returns image arrays and labels

In [2]:
def train_data_loader(train_dirs):
    # Initialize empty lists for to store image arrays and labels
    image_arrays = []
    image_labels = []
   
    # loop to navitage all directories and read information from all the image files
    for label, directory in enumerate(sorted(os.listdir(train_dirs))):
        path = os.path.join(train_dirs, directory)

        # check if it's a directory
        if not os.path.isdir(path):
            continue

        # Iterate over image files in the directory
        for img_file in os.listdir(path):
            img_path = os.path.join(path, img_file)

            # resize and store images
            img = cv2.resize(plt.imread(img_path).copy(), (224, 224))
            image_labels.append(label)
            image_arrays.append(img)
            
    return image_arrays, image_labels

### (2). Create a test data loader that returns image arrays and file names

In [3]:
def test_data_loader(data_dir):
    # Get a list of image files in the test data directory
    image_files = [f for f in os.listdir(data_dir) if f.endswith(".jpg")]
    
    # Initialize lists for data and file names
    image_arrays = []
    image_names = []
    
    # Iterate over the image files
    for file in image_files:
        # Get the full path of the image
        path = os.path.join(data_dir, file)
        
        # Read and preprocess the image
        img = cv2.resize(plt.imread(path).copy(), (224, 224))
        #img = img.astype(np.float32) / 255.0
        
        # Append the preprocessed image and file name to the lists
        image_arrays.append(img)
        image_names.append(file)
    
    # Convert lists to NumPy arrays and return
    return image_arrays, image_names


### (3). Print image arrays, labels and file names dimensions 

### Training Dataset

In [4]:
train_loader = r'C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Assignment_Week5\Homework Week5\Homework Week5\Training'
training_array, training_labels = train_data_loader(train_loader)
training_array, training_labels = np.array(training_array), np.array(training_labels)

print("Training Dataset Array:",training_array.shape )
print("Training Label Shape:",training_labels.shape )

Training Dataset Array: (1149, 224, 224, 3)
Training Label Shape: (1149,)


**Observation:**
   - The training dataset contains total 1149 images of size 224, 224 and has 3 channels.

### Test Dataset

In [5]:
# Example usage
test_loader=r'C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Assignment_Week5\Homework Week5\Homework Week5\Test_Data'
test_array, test_file_name = test_data_loader(test_loader)
test_array, test_file_name = np.array(test_array), np.array(test_file_name)

print("Test Dataset Array:",test_array.shape )
print("Test File Names Extracted:",test_file_name.shape )


Test Dataset Array: (380, 224, 224, 3)
Test File Names Extracted: (380,)


**Observation:**
   - The test dataset contains total 380 images of size 224, 224 and has 3 channels.

# 2. Extract features of training and test images using HOG (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

#### Extracting HOG Features for Training Dataset

In [6]:
# Initialize lists to store features and labels
hog_features = []
hog_labels = []

# Extract features for training images
for i, img in enumerate(training_array):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  
    # Compute HOG descriptors for each grayscale image
    features = hog(img_gray, pixels_per_cell=(16, 16), transform_sqrt=False)
    hog_features.append(features)
    hog_labels.append(training_labels[i])
    
# Convert feature lists to numpy arrays
hog_features = np.array(hog_features)
hog_labels = np.array(hog_labels)

# Print the size of extracted features
print(f"Training Dataset HOG features: {hog_features.shape[0]} * {hog_features.shape[1]}")   

Training Dataset HOG features: 1149 * 11664


#### Extracting HOG Features for Test Dataset

In [7]:
# Initialize lists to store features and labels
hog_features_test = []
hog_labels_test = []

# Extract features for test images
for i, img in enumerate(test_array):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Compute HOG descriptors for each grayscale image
    features = hog(img_gray, pixels_per_cell=(16, 16), transform_sqrt=True)
    
    hog_features_test.append(features)
    hog_labels_test.append(test_file_name[i])

# Convert feature lists to numpy arrays
hog_features_test = np.array(hog_features_test)
hog_labels_test = np.array(hog_labels_test)

# Print the size of extracted features
print(f"Test Dataset HOG features: {hog_features_test.shape[0]} * {hog_features_test.shape[1]}")

Test Dataset HOG features: 380 * 11664


# 3. Extract features of training and test images using SIFT (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

In [8]:
def SIFT(image):
    """
    This function will be used to extract the SIFT features from an image.

    Parameters:
        image: The input image for which SURF descriptors/features needs to be extracted.

    Returns:
        An array of SURF descriptors extracted from the input image.
    """
    sift = cv2.xfeatures2d.SIFT_create(50)
    keypoints, descriptors = sift.detectAndCompute(image,None)
    return descriptors

#### Extracting SIFT Features for Training Dataset

In [9]:
# Initialize lists to store SIFT features
sift_features = []

# Load the training images
for img in training_array:
    gray1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    descriptors = SIFT(gray1)
    if descriptors is not None:
        sift_features.append(descriptors.flatten()[:128])
    else:
        sift_features.append(np.array([0]*(128)).flatten())

# Convert the lists to NumPy arrays
sift_features = np.array(sift_features)

# Print the size of the extracted features
print(f"Training features: {sift_features.shape[0]} * {sift_features.shape[1]}")

Training features: 1149 * 128


#### Extracting HOG Features for Test Dataset

In [10]:
# Initialize lists to store SIFT features
sift_features_test = []

# Load the test images
for img in test_array:
    gray1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    descriptors = SIFT(gray1)
    if descriptors is not None:
        sift_features_test.append(descriptors.flatten()[:128])
    else:
        sift_features_test.append(np.array([0]*(128)).flatten())

# Convert the lists to NumPy arrays
sift_features_test = np.array(sift_features_test)

# Print the size of the extracted features
print(f"Test features: {sift_features_test.shape[0]} * {sift_features_test.shape[1]}")

Test features: 380 * 128


# 4. Extract features of training and test images using SURF (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

In [11]:
# Function to extract SURF features
def SURF(image):
    """
    This function will be used to extract the SURF features from an image.

    Parameters:
        image: The input image for which SURF descriptors/features needs to be extracted.

    Returns:
        An array of SURF descriptors extracted from the input image.
    """
    surf = cv2.xfeatures2d.SURF_create()
    keypoints, descriptors = surf.detectAndCompute(image, None)
    return descriptors

#### Extracting SURF Features for Training Dataset

In [12]:
# Initialize lists to store SURF features
surf_features = []

# Load the training images
for img in training_array:
    descriptors = SURF(img)
    if descriptors is not None:
        surf_features.append(descriptors.flatten()[:128])
    else:
        surf_features.append(np.zeros(128, dtype=np.float32))

# Convert the lists to NumPy arrays
surf_features = np.array(surf_features, dtype=np.float32)

# Print the size of the extracted features
print(f"Training features: {surf_features.shape[0]} * {surf_features.shape[1]}")

Training features: 1149 * 128


#### Extracting SURF Features for Test Dataset

In [13]:
# Initialize lists to store SURF features
surf_features_test = []

# Load the test images
for img in test_array:
    descriptors = SURF(img)
    if descriptors is not None:
        surf_features_test.append(descriptors.flatten()[:128])
    else:
        surf_features_test.append(np.zeros(128, dtype=np.float32))

# Convert the lists to NumPy arrays
surf_features_test = np.array(surf_features_test, dtype=np.float32)

# Print the size of the extracted features
print(f"Test features: {surf_features_test.shape[0]} * {surf_features_test.shape[1]}")

Test features: 380 * 128


# 5. Call SVM and kNN from scikit-learn and train the extracted HOG, SIFT and SURF features, respectively, save three CSV files of test dataset using three features (10 points)

### (1). SVM and KNN using HOG features

#### Splitting the data

In [14]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(hog_features, hog_labels, test_size=0.2, random_state=42)

#### Training and Predicting SVM Classifier and Saving result to a CSV File. 

In [15]:
# Train an SVM classifier
svm_clf = SVC(kernel='rbf')
svm_clf.fit(X_train, y_train)

# Finally, make predictions on the test set
test_predictions = svm_clf.predict(hog_features_test)

svm_hog_predictions = pd.DataFrame({'a':test_file_name, 'b':test_predictions})

svm_hog_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\svm_hog_predictions_svm.csv", index=False, header=False)
print("SVM HOG predictions saved to CSV files")

SVM HOG predictions saved to CSV files


#### Training and Predicting KNN Classifier and Saving result to a CSV File. 

In [16]:
# Train an KNN classifier
n=20
knn_clf = KNeighborsClassifier(n_neighbors=n)
knn_clf.fit(hog_features, hog_labels)

# Finally, make predictions on the test set
test_predictions = knn_clf.predict(hog_features_test)

knn_hog_predictions = pd.DataFrame({'a':test_file_name, 'b':test_predictions})

knn_hog_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\knn_hog_predictions_svm.csv", index=False, header=False)
print("KNN HOG predictions saved to CSV files")

KNN HOG predictions saved to CSV files


### (2). SVM and KNN using SIFT features

#### Splitting the data

In [17]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(sift_features, training_labels, test_size=0.2, random_state=42)

#### Training and Predicting SVM Classifier and Saving result to a CSV File. 

In [18]:
# Create and Train an SVM classifier
svm_clf = SVC(kernel='rbf', C=0.5, gamma='scale', max_iter=-1, degree=5)
svm_clf.fit(X_train, y_train)

# Make predictions on the test setset images
test_predictions = svm_clf.predict(sift_features_test)

#Converting to dataframe to be write to csvfile
svm_sift_predictions = pd.DataFrame({'Filename':test_file_name, 'Prediction':test_predictions})

svm_sift_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\svm_sift_predictions_svm.csv", index=False, header=False)
print("SVM SIFT predictions saved to CSV files")

SVM SIFT predictions saved to CSV files


#### Training and Predicting KNN Classifier and Saving result to a CSV File. 

In [19]:
# Create and Train a KNN classifier
knn_clf = KNeighborsClassifier(n_neighbors=20, weights ='distance')
knn_clf.fit(X_train, y_train)

# Make predictions on the test setset images
test_predictions = knn_clf.predict(sift_features_test)

knn_sift_predictions = pd.DataFrame({'a':test_file_name, 'b':test_predictions})

knn_sift_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\knn_sift_predictions_svm.csv", index=False, header=False)
print("KNN SIFT predictions saved to CSV files")

KNN SIFT predictions saved to CSV files


### (3). SVM and KNN using SURF features

#### Splitting the data

In [20]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(surf_features, training_labels, test_size=0.2, random_state=42)

#### Training and Predicting SVM Classifier and Saving result to a CSV File. 

In [21]:
# Create and Train an SVM classifier
svm_clf = SVC(kernel='rbf', C=0.5, gamma='scale', max_iter=-1, degree=5)
svm_clf.fit(X_train, y_train)

# Make predictions on the test setset images
test_predictions = svm_clf.predict(surf_features_test)

#Converting to dataframe to be write to csvfile
svm_surf_predictions = pd.DataFrame({'Filename':test_file_name, 'Prediction':test_predictions})

svm_surf_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\svm_surf_predictions_svm.csv", index=False, header=False)
print("SVM SURF predictions saved to CSV files")

SVM SURF predictions saved to CSV files


#### Training and Predicting KNN Classifier and Saving result to a CSV File. 

In [22]:
# Create and Train a KNN classifier
knn_clf = KNeighborsClassifier(n_neighbors=20, weights ='distance')
knn_clf.fit(X_train, y_train)

# Make predictions on the test setset images
test_predictions = knn_clf.predict(surf_features_test)

knn_surf_predictions = pd.DataFrame({'a':test_file_name, 'b':test_predictions})

knn_surf_predictions.to_csv(r"C:\AKA\Backup Dell Laptop\D Drive\YU\Semester 2\Neural Network\DLNN\Output\knn_surf_predictions_svm.csv", index=False, header=False)
print("KNN SIFT predictions saved to CSV files")

KNN SIFT predictions saved to CSV files


# 6. Report the accuracy using Cow_teat_classfication_accuracy software, please attach the results image here (20 points)

#### SVM Accuracy with HOG Features

![svm_hog_accuracy.png](attachment:svm_hog_accuracy.png)

#### KNN Accuracy with HOG Features

![knn_hog_accuracy.png](attachment:knn_hog_accuracy.png)

#### SVM Accuracy with SIFT Features

![svm_sift_accuracy.png](attachment:svm_sift_accuracy.png)

#### KNN Accuracy with SIFT Features

![knn_sift_accuracy.png](attachment:knn_sift_accuracy.png)

#### SVM Accuracy with Surf Features

![svm_surf_accuracy.png](attachment:svm_surf_accuracy.png)

#### KNN Accuracy with Surf Features

![knn_surf_accuracy.png](attachment:knn_surf_accuracy.png)

### Citations: 
   - Zhang, Youshan, Ian R. Porter, Matthias J. Wieland, and Parminder S. Basran. 2022. "Separable Confident Transductive Learning for Dairy Cows Teat-End Condition Classification" Animals 12, no. 7: 886. https://doi.org/10.3390/ani12070886
   - https://www.analyticsvidhya.com/blog/2019/10/detailed-guide-powerful-sift-technique-image-matching-python/
   - https://docs.opencv.org/3.4/df/dd2/tutorial_py_surf_intro.html
   - https://docs.opencv.org/4.x/da/df5/tutorial_py_sift_intro.html
   - https://docs.opencv.org/4.x/d4/d11/group__objdetect__hog.html
