In [None]:
import socket,warnings
try:
    socket.setdefaulttimeout(1)
    socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect(('1.1.1.1', 53))
except socket.error as ex: raise Exception("STOP: No internet. Click '>|' in top right and set 'Internet' switch to on")

In [None]:
# It's a good idea to ensure you're running the latest version of any libraries you need.
# `!pip install -Uqq <libraries>` upgrades to the latest version of <libraries>
# NB: You can safely ignore any warnings or errors pip spits out about running as root or incompatibilities
import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    !pip install -Uqq fastai

In [None]:
# Skip this cell if you already have duckduckgo_search installed
!pip install -Uqq duckduckgo_search
!pip install -Uqq fastai
!pip install -Uqq fastbook

In [None]:
from duckduckgo_search import DDGS 
from fastcore.all import *

def search_images(keywords, max_images=200): return L(DDGS().images(keywords, max_results=max_images)).itemgot('image')

In [None]:
urls = search_images('sphynx cat photos', max_images=1)
urls[0]

In [None]:
from fastdownload import download_url
dest = 'sphynx_cat.jpg'
download_url(urls[0], dest, show_progress=False)

from fastai.vision.all import *
im = Image.open(dest)
im.to_thumb(256,256)

In [None]:
download_url(search_images('hairy cat photos', max_images=1)[0], 'hairy_cat.jpg', show_progress=False)
Image.open('hairy_cat.jpg').to_thumb(256,256)

In [None]:
from fastai.vision.all import *
from time import sleep
from fastbook import search_images_ddg

def filtered_search(term, max_images=30):
    results = search_images_ddg(term, max_images)
    return [r for r in results if not r.lower().endswith('.fpx')]

searches = ['hairy cat', 'sphynx cat']
path = Path('sphynx_cat_or_not')

for o in searches:
    dest = (path/o)
    dest.mkdir(exist_ok=True, parents=True)

    for term in [f'{o} photo', f'{o} sun photo', f'{o} shade photo']:
        print(f"Searching and downloading: {term}")
        try:
            download_images(dest, urls=filtered_search(term))
            sleep(10)
        except Exception as e:
            print(f"Failed to download '{term}': {e}")

    # Delete post-download .fpx files
    for f in get_image_files(dest):
        if f.suffix.lower() == '.fpx':
            f.unlink()

    # Clean unreadable/corrupted images
    failed = verify_images(get_image_files(dest))
    failed.map(Path.unlink)

    # 📐 Resize clean images
    resize_images(dest, max_size=400)

In [None]:
failed = verify_images(get_image_files(path))
failed.map(Path.unlink)
len(failed)

In [None]:
dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=[Resize(192, method='squish')]
).dataloaders(path)

dls.show_batch(max_n=6)

In [None]:
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(3)

In [None]:
is_sphynx_cat,_,probs = learn.predict(PILImage.create('f9b6108c-4d88-442f-8bc8-4f7b13eaf164.jpg'))
print(f"This is a: {is_sphynx_cat}.")
print(f"Probability it's a sphynx cat: {probs[1]:.4f}")

In [50]:
from pathlib import Path

base_path = Path("bird_or_not")
image_exts = [".jpg", ".jpeg", ".png", ".webp", ".fpx", ".gif", ".img", ".jpg!d", ".jpg!bw700"]

deleted = 0

for file in base_path.rglob("*"):
    if file.is_file() and file.suffix.lower() in image_exts:
        file.unlink()
        deleted += 1

print(f"Recursively deleted {deleted} image files from bird_or_not/")

Recursively deleted 2 image files from bird_or_not/
