In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from glob import glob
from PIL import Image
import os
from skimage import color
from skimage.feature import hog
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier # using 1NN
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
# 1. Create a new environment

# conda create --name dlenv python==3.6.3 numpy pandas


# 2. Activate the new environment

# conda activate dlenv  


# 3. Install the related packages in the dlenv environment (not the base environment)

# pip install opencv-python==3.3.0.10 opencv-contrib-python==3.3.0.10
# pip install opencv-contrib-python==3.3.0.10

# conda install jupyter notebook

# pip install jupyter 


# 4. If some of your packages are not working, try to reinstall it.

# python3 -m pip install -U scikit-learn

# 1. Download the [cow teat datasets](https://github.com/YoushanZhang/SCTL) (10 points) resize image to (224, 224)

### (1). Create a train data loader that returns image arrays and labels
### (2). Create a test data loader that returns image arrays and file names
### (3). Print image arrays, labels and file names dimensions 

# Step1  :Create a train data loader that returns image arrays and labels

In [3]:
def load_train_dataset(train_dirs):
    labels = []
    data = []
    for l in os.listdir(train_dirs):
        for f in os.listdir(os.path.join(train_dirs, l)):
            labels.append(int(l[-1])-1)
            img = plt.imread(os.path.join(train_dirs, l, f)).copy()
            img = cv2.resize(img, (224, 224))
            data.append(img)
    return np.array(data), np.array(labels)

In [4]:
train_loader = '/Users/gf65/Desktop/Yeshiva/AIM5007/Homework Week5/Train'
train_data, labels = load_train_dataset(train_loader)

In [5]:
print(f"Train Images data shape: {train_data.shape}, Train labels shape {labels.shape}")

Train Images data shape: (1149, 224, 224, 3), Train labels shape (1149,)


# Step (2). Creating a test data loader that returns image arrays and file names


In [17]:

def load_test_dataset(data_dir):
    test_data = glob(data_dir+"/*.jpg")
    data, file_names = [], []
    for image in test_data:
        file_names.append(image.split("\\")[-1])
        img = plt.imread(image).copy()
        img = cv2.resize(img, (224, 224))
        data.append(img)
    return np.array(data), np.array(file_names)

test_data, test_file_names = load_test_dataset('/Users/gf65/Desktop/Yeshiva/AIM5007/Homework Week5/Test/')
test_data, test_file_names = np.array(test_data), np.array(test_file_names)

In [18]:
print(f"Train Images data shape: {train_data.shape}, Train labels shape {test_file_names.shape}")

Train Images data shape: (1149, 224, 224, 3), Train labels shape (380,)


# Step (3). Print image arrays, labels and file names dimensions 

In [19]:
def print_dataset_dimensions(train_data, train_labels, test_data, test_file_names):
    print("Train Data length: ", len(train_data))
    print("Train Labels length: ", len(train_labels))
    print("Test Data length: ", len(test_data))
    print("Test File Names length: ", len(test_file_names))
    
print_dataset_dimensions(train_data, labels, test_data, test_file_names)

Train Data length:  1149
Train Labels length:  1149
Test Data length:  380
Test File Names length:  380


# 2. Extract features of training and test images using HOG (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

In [8]:
# Set HOG parameters
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (3, 3)

# Initialize lists to store features and labels
train_hog_features = []
train_hog_labels = []
test_hog_features = []
test_hog_labels = []

# Extract features for training images
for i, img in enumerate(train_data):
    # Compute HOG descriptors for each image
    features = hog(img, orientations=orientations, pixels_per_cell=pixels_per_cell,
                   cells_per_block=cells_per_block, transform_sqrt=True, feature_vector=True)
    train_hog_features.append(features)
    train_hog_labels.append(labels[i])
    
# Extract features for test images
for i, img in enumerate(test_data):
    # Compute HOG descriptors for each image
    features = hog(img, orientations=orientations, pixels_per_cell=pixels_per_cell,
                   cells_per_block=cells_per_block, transform_sqrt=True, feature_vector=True)
    test_hog_features.append(features)
    test_hog_labels.append(test_file_names[i])

# Convert feature lists to numpy arrays
train_hog_features = np.array(train_hog_features)
test_hog_features = np.array(test_hog_features)

# Print the size of extracted features
print(f"Training features: {train_hog_features.shape[0]} * {train_hog_features.shape[1]}")
print(f"Test features: {test_hog_features.shape[0]} * {test_hog_features.shape[1]}")


Training features: 1149 * 54756
Test features: 380 * 54756


# 3. Extract features of training and test images using SIFT (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

In [21]:
train_sift_features =[]
train_sift_labels =[]
test_sift_features =[]
# test_sift_labels=[]

def extract_sift_features(image):
    sift = cv2.xfeatures2d.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image,None)
    return descriptors

# Load the training images 

for i, img in enumerate(train_data):
    # Extract SIFT features
    descriptors = extract_sift_features(img)
    if descriptors is not None:
        train_sift_features.append(descriptors.flatten()[:128])
#         train_sift_labels.append(labels[i])
    else:
        train_sift_features.append(np.array([0]*(128)).flatten())
#         train_sift_labels.append(labels[i])


# Load the test images 

for i, img in enumerate(test_data):
    # Extract SIFT features
    descriptors=extract_sift_features(img)
    if descriptors is not None:
        test_sift_features.append(descriptors.flatten()[:128])
#         test_sift_labels.append(labels[i])
    else:
        test_sift_features.append(np.array([0]*(128)).flatten())
#         test_sift_labels.append(labels[i])
        
        

# Convert the list of test features to a numpy array

sift_train_features = np.array(train_sift_features)
sift_test_features = np.array(test_sift_features)


# Print the size of the extracted features
print("Training features :", sift_train_features.shape)
print("Test features :", sift_test_features.shape)



Training features : (1149, 128)
Test features : (380, 128)


# 4. Extract features of training and test images using SURF (20 points)
Please print the size of extracted features, e.g., training features: 1149 * d, test features: 380 *d

In [26]:
# Function to extract SURF features from an image
def extract_surf_features(image):
    surf = cv2.xfeatures2d.SURF_create()
    keypoints, descriptors = surf.detectAndCompute(image, None)
    return descriptors
        
# Extract SURF features from the training data
train_surf_features = []


# Load the training images 
for i, img in enumerate(train_data):
    # Extract SURF features
    descriptors = extract_surf_features(img)
    if descriptors is not None:
        train_surf_features.append(descriptors.flatten()[:64])
    else:
        train_surf_features.append(np.array([0]*(64)).flatten())


# Load the test images 
test_surf_features = []
for i, img in enumerate(test_data):
    # Extract SURF features
    descriptors=extract_surf_features(img)
    if descriptors is not None:
        test_surf_features.append(descriptors.flatten()[:64])
    else:
        test_surf_features.append(np.array([0]*(64)).flatten())
        

train_surf_features = np.array(train_surf_features)
test_surf_features = np.array(test_surf_features)

# Print the size of extracted features
print(f"Training features: {train_surf_features.shape}")
print(f"Test features: {test_surf_features.shape}") 


Training features: (1149, 64)
Test features: (380, 64)


# 5. Call SVM and kNN from scikit-learn and train the extracted HOG, SIFT and SURF features, respectively, save three CSV files of test dataset using three features (10 points)

# KNN and SVM using 1NN --->HOG features

# Main code

In [12]:

train_features_hog = np.array(train_hog_features)
test_features_hog = np.array(test_hog_features)
train_features_sift = np.array(train_sift_features)
test_features_sift = np.array(test_sift_features)
train_features_surf = np.array(train_surf_features)
test_features_surf = np.array(test_surf_features)



In [13]:
# Train SVM classifier on HOG features
svmHog = svm.SVC(kernel='linear')
svmHog.fit(train_features_hog, train_hog_labels)

# Train KNN classifier on HOG features
knnHog = KNeighborsClassifier(n_neighbors=1)
knnHog.fit(train_features_hog, train_hog_labels)

# Predict using SVM classifier
svmHog_preds = []
for features in tqdm(test_features_hog):
    svmHog_preds.append(svmHog.predict([features]))

# Predict using KNN classifier
knnHog_preds = []
for features in tqdm(test_features_hog):
    knnHog_preds.append(knnHog.predict([features]))

# Convert numpy array to dataframe
svm_preds_df = pd.DataFrame(svmHog_preds, test_hog_labels)
knn_preds_df = pd.DataFrame(knnHog_preds, test_hog_labels)

# Write predictions to CSV files
svm_preds_df.to_csv("csv/hog_test_predictions_svm.csv")
knn_preds_df.to_csv("hog_test_predictions_knn.csv")
print("HOG predictions saved to CSV files")


100%|████████████████████████████████████████████████████████████████████████████████| 380/380 [00:10<00:00, 36.06it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 380/380 [00:49<00:00,  7.69it/s]

HOG predictions saved to CSV files





# KNN and SVM using 1NN --->SIFT features

In [23]:
# Train SVM classifier on SIFT features
svmSift = svm.SVC(kernel='linear')
svmSift.fit(sift_train_features,labels)


# Train KNN classifier on SIFT features
knnSift = KNeighborsClassifier(n_neighbors=1)
knnSift.fit(sift_train_features, labels)

# Predict using SVM classifier

svmSiftPred = svmSift.predict(sift_test_features)

# Predict using KNN classifier
knnSiftPred = knnSift.predict(sift_test_features)

# Convert numpy array to dataframe
svm_preds_df = pd.DataFrame(test_file_names, svmSiftPred)
knn_preds_df = pd.DataFrame(test_file_names, knnSiftPred)

# Write predictions to CSV files
svm_preds_df.to_csv("csv/sift_test_predictions_svm.csv")
knn_preds_df.to_csv("csv/sift_test_predictions_knn.csv")
print("SIFT predictions saved to CSV files")


SIFT predictions saved to CSV files


# KNN and SVM using 1NN --->SURF features

In [27]:

# Train SVM classifier on SURF features
svmSurf = svm.SVC(kernel='linear')
svmSurf.fit(train_surf_features, labels)

# Train KNN classifier on SURF features
knnSurf = KNeighborsClassifier(n_neighbors=1)
knnSurf.fit(train_surf_features, labels)

# Predict using SVM classifier

svmSurfPred = svmSurf.predict(test_surf_features)

# Predict using KNN classifier
knnSurfPred = knnSurf.predict(test_surf_features)

# Convert numpy array to dataframe
svm_preds_df = pd.DataFrame(test_file_names, svmSurfPred)
knn_preds_df = pd.DataFrame(test_file_names, knnSurfPred)

# Write predictions to CSV files
svm_preds_df.to_csv("csv/surf_test_predictions_svm.csv")
knn_preds_df.to_csv("csv/surf_test_predictions_knn.csv")
print("Surf predictions saved to CSV files")


Surf predictions saved to CSV files


# 6. Report the accuracy using Cow_teat_classfication_accuracy software, please attach the results image here (20 points)

### (1). SVM and 5NN using HOG features

![hog_svm.jpg](attachment:hog_svm.jpg)![hog_knn.jpg](attachment:hog_knn.jpg)

### (2). SVM and 5NN using SIFT features

![sift_svm.jpg](attachment:sift_svm.jpg)![sift_knn.jpg](attachment:sift_knn.jpg)

### (3). SVM and 5NN using SURF features

![surf_svm.jpg](attachment:surf_svm.jpg)![surf_knn.jpg](attachment:surf_knn.jpg)