This code is used to train a deep learning model on Bing Searches. The user can update what the code searches for by changing the searches list variable. After completing the Bing search, the user is shown some example images found before the model is trained. After training the model, the user can search for more images to test the model or they can import their own images.

## Setting things up...

In [None]:
# Checking kaggle is connected to the internet
import socket,warnings
try:
    socket.setdefaulttimeout(1)
    socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(('1.1.1.1', 53))
except socket.error as ex: raise Exception("STOP: No internet. Click '>|' in top right and set 'Internet' switch to on")

In [None]:
# Pip installing libraries - can ignore errors
# It's a good idea to ensure you're running the latest version of any libraries you need.
# `!pip install -Uqq <libraries>` upgrades to the latest version of <libraries>
# NB: You can safely ignore any warnings or errors pip spits out about running as root or incompatibilities
import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    !pip install -Uqq fastai icrawler
    
    # More pip for current numpy issues...
    !pip install numpy==1.26.5 --upgrade --force-reinstall
    !pip install scipy==1.10.1 --upgrade --force-reinstall

In [None]:
# Import necessary modules
from icrawler.builtin import BingImageCrawler
from fastai.vision.all import *
from pathlib import Path
import shutil
import matplotlib.pyplot as plt

## Importing images for the training

In [None]:
def search_images(image_name, num_images=20, show_n=5, max_size=400, root=Path('.'), offset=0):
    "Download images from Bing, remove failed ones, resize, and show examples"
    
    if num_images < show_n:
        show_n = num_images
        print(f'Showing {show_n} images')
    
    # Set up destination folder
    path = root/image_name
    
    # If folder exists, delete all contents
    if path.exists():
        print(f"Folder '{path}' exists. Deleting all contents...")
        shutil.rmtree(path)
    
    # Recreate the empty folder
    path.mkdir(parents=True, exist_ok=True)
    
    # Create a Bing image crawler
    crawler = BingImageCrawler(storage={'root_dir': path})
    
    # Crawl images automatically
    crawler.crawl(keyword=image_name, max_num=num_images, offset=offset)
    
    # List downloaded images
    images = get_image_files(path)
    print(f"Downloaded {len(images)} '{image_name}' images")
    
    # Remove any images that may have failed
    failed = verify_images(images)
    if len(failed) > 0:
        failed.map(Path.unlink)
        print(f"Removed {len(failed)} failed images")
    
    # Refresh the image list after cleanup
    images = get_image_files(path)
    
    # Display first few images
    if len(images) > 0 and show_n > 0:
        n = min(show_n, len(images))
        _, axs = plt.subplots(1, n, figsize=(n*3, 3))
        for i, ax in enumerate(axs):
            img = PILImage.create(images[i])
            ax.imshow(img)
            ax.axis('off')
        plt.show()
    
    # Resize all images in the folder
    resize_images(path, max_size=max_size, dest=path)

In [None]:
# Creating a folder for training data
training_root = Path('dataset')

# Create a list of what you want the AI to learn
searches = ['ugly cat', 'cute adult cat']

# Choose number of images to train on
no_images = 5

# Create folders with photos of items
for search in searches:
    search_images(search, num_images = no_images, show_n = 5, root = training_root)

## Training the model

In [None]:
# Get all image files from the root, but only from selected folders
items = [f for f in get_image_files(root) if f.parent.name in searches]
print(f"Training on the following folders: {searches}")

# Create data block which will be input to train the model
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock), # Input: Image, Output: Category
    get_items=lambda _: items,
    splitter=RandomSplitter(valid_pct=0.2, seed=42), # Testing, train with 80%
    get_y=parent_label, # Another function, returns parent folder
    item_tfms=[Resize(192, method='squish')] # Transform each item to be the same size
).dataloaders(source=None, bs=32)

dls.show_batch(max_n=6) # shows an example of what is passed into the model

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate) # Train the model on the data block, using model resnet18
learn.fine_tune(3) # epochs = 3

# Testing the Model

## Using search images

In [None]:
import random

# Pick a random image from the chosen data set
def choose_image(image_name, root):
    'User inputs one of the categories and an image will be chosen at random'
    category_path = root/image_name
    if not category_path.exists():
        print('ERROR: Category not trained in model')
        return None
    images = get_image_files(category_path)
    chosen_image = random.choice(images)
    return chosen_image

def test_image(img, model):
    'User inputs picture and the model predicts which category it fits in'
    category,pos,probs = model.predict(PILImage.create(img)) # string, _, list
    print(f'This is a: {category}')
    print(f"Probability it's a {category}: {probs[pos]:.4f}")

In [None]:
# Obtain new images to test the data
testing_root = Path('trainingset')
for search in searches:
    search_images(search, num_images = 5, show_n = 0, root = testing_root, offset=100) # change offset if no. images changed before

In [None]:
for img in get_image_files(testing_root/'cute dog'):
    display(PILImage.create(img))
    test_image(img, learn)

## Using own images 

In [None]:
### For one image
# Adjust path depending on what you uploaded
img_path = Path("/kaggle/input/joscat/IMG-20251002-WA0030.jpg")
img = PILImage.create(img_path)
img.thumbnail((400, 300))
display(PILImage.create(img))
test_image(img, learn)

In [None]:
### For a folder of images
# Path to your folder of images
test_folder = Path("/kaggle/input/catsss")  # adjust as needed

# Loop through all images
for img_path in test_folder.iterdir():
    if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png']:  # skip non-image files
        # Optionally resize the image
        img = PILImage.create(img_path)  # fastai PILImage
        img.thumbnail((400, 300))
        display(PILImage.create(img))
        test_image(img, learn)