In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d shaunthesheep/microsoft-catsvsdogs-dataset

Dataset URL: https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset
License(s): other
Downloading microsoft-catsvsdogs-dataset.zip to /content
100% 786M/788M [00:29<00:00, 26.4MB/s]
100% 788M/788M [00:29<00:00, 27.9MB/s]


In [None]:
import zipfile

with zipfile.ZipFile('dogs-vs-cats.zip', 'r') as zip_ref:
    zip_ref.extractall('dogs-vs-cats')

In [None]:
import os
dataset_path = 'dogs-vs-cats/PetImages'  # Adjust this if necessary
cats_path = os.path.join(dataset_path, 'Cat')
dogs_path = os.path.join(dataset_path, 'Dog')

In [None]:
from skimage import io, color
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# Function to load and preprocess images
def load_and_preprocess_images(image_paths, image_size=(64, 64)):
    images = []
    for path in image_paths:
        try:
            image = io.imread(path)  # Load image from file

            # Check if the image has 2 or 3 dimensions
            if image.ndim == 2:  # Grayscale image
                image_resized = resize(image, image_size, anti_aliasing=True)  # Resize to a fixed size
                image_gray = image_resized  # It's already grayscale
            elif image.ndim == 3 and image.shape[2] == 3:  # RGB image
                image_resized = resize(image, image_size, anti_aliasing=True)  # Resize to a fixed size
                image_gray = color.rgb2gray(image_resized)  # Convert to grayscale
            else:
                raise ValueError("Unexpected image shape: {image.shape}")


            images.append(image_gray.flatten())  # Flatten the image to a 1D array
        except Exception as e:
            print(f"Error processing {path}: {e}")

    return np.array(images)  # Return as a numpy array

In [None]:
def get_image_paths(directory):
    return [os.path.join(directory, filename) for filename in os.listdir(directory)]

In [None]:
cat_image_paths = get_image_paths(cats_path)
dog_image_paths = get_image_paths(dogs_path)

In [None]:
# Preprocess images
cat_images = load_and_preprocess_images(cat_image_paths)
dog_images = load_and_preprocess_images(dog_image_paths)

# Create labels (0 for cats, 1 for dogs)
cat_labels = np.zeros(cat_images.shape[0])
dog_labels = np.ones(dog_images.shape[0])

Error processing dogs-vs-cats/PetImages/Cat/10404.jpg: attempt to seek outside sequence
Error processing dogs-vs-cats/PetImages/Cat/666.jpg: 'NoneType' object has no attribute 'ReadAsArray'
Error processing dogs-vs-cats/PetImages/Cat/Thumbs.db: Could not find a backend to open `dogs-vs-cats/PetImages/Cat/Thumbs.db`` with iomode `ri`.
Error processing dogs-vs-cats/PetImages/Dog/11285.jpg: Unexpected image shape: {image.shape}
Error processing dogs-vs-cats/PetImages/Dog/7514.jpg: Unexpected image shape: {image.shape}
Error processing dogs-vs-cats/PetImages/Dog/9078.jpg: Unexpected image shape: {image.shape}
Error processing dogs-vs-cats/PetImages/Dog/6245.jpg: Unexpected image shape: {image.shape}
Error processing dogs-vs-cats/PetImages/Dog/11702.jpg: 'NoneType' object has no attribute 'ReadAsArray'
Error processing dogs-vs-cats/PetImages/Dog/1789.jpg: Unexpected image shape: {image.shape}
Error processing dogs-vs-cats/PetImages/Dog/Thumbs.db: Could not find a backend to open `dogs-vs-ca



In [None]:
X = np.concatenate((cat_images, dog_images), axis=0)
y = np.concatenate((cat_labels, dog_labels), axis=0)

In [None]:
# Path to save the preprocessed data
preprocessed_data_path = '/content/drive/My Drive/cats_dogs_preprocessed'

# Save the preprocessed data arrays
np.save(preprocessed_data_path + '_X.npy', X)
np.save(preprocessed_data_path + '_y.npy', y)


In [None]:
preprocessed_data_path = '/content/drive/MyDrive/cats_dogs_preprocessed'

X = np.load(preprocessed_data_path+'_X.npy')
y = np.load(preprocessed_data_path+'_y.npy')

X = X/255.0

In [None]:
print(X.shape)

(24992, 4096)


In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)

(19993, 4096)


In [None]:
model = SVC(kernel='linear')
model.fit(X_train[:5000], y_train[:5000])

In [None]:
y_pred = model.predict(X_test[:100])
accuracy = accuracy_score(y_test[:100], y_pred)

In [None]:
print(f"Model accuracy: {accuracy * 100:.2f}%")

Model accuracy: 55.00%
