In [2]:
# run only once
!pip install duckduckgo_search

Collecting duckduckgo_search
  Downloading duckduckgo_search-6.2.13-py3-none-any.whl.metadata (25 kB)
Collecting primp>=0.6.3 (from duckduckgo_search)
  Downloading primp-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading duckduckgo_search-6.2.13-py3-none-any.whl (27 kB)
Downloading primp-0.6.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: primp, duckduckgo_search
Successfully installed duckduckgo_search-6.2.13 primp-0.6.3


# Definování funkce pro hledání obrázků

In [3]:
from duckduckgo_search import DDGS

def search_images(keyword, max_results=10):
    with DDGS() as ddgs:
        images = ddgs.images(
            keyword,
            max_results=max_results
        )
        return [img['image'] for img in images]

# Definování funkce pro stahování obrázků

In [4]:
import os
import requests
from urllib.parse import urlparse
import warnings

def download_image(url, folder, custom_name=None, verbose=True):
    # Create the folder if it doesn't exist
    os.makedirs(folder, exist_ok=True)

    # Get the filename from the URL or use the custom name
    if custom_name:
        filename = custom_name
    else:
        filename = os.path.basename(urlparse(url).path)
        if not filename:
            filename = 'image.jpg'  # Default filename if none is found in the URL

    # Ensure the filename has an extension
    if not os.path.splitext(filename)[1]:
        filename += '.jpg'

    filepath = os.path.join(folder, filename)

    # If the file already exists, append a number to make it unique
    base, extension = os.path.splitext(filepath)
    counter = 1
    while os.path.exists(filepath):
        filepath = f"{base}_{counter}{extension}"
        counter += 1

    try:
        # Send a GET request to the URL with a timeout of 10 seconds
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises an HTTPError for bad responses

        # Check if the content type is an image
        content_type = response.headers.get('content-type', '')
        if not content_type.startswith('image'):
            if verbose:
                warnings.warn(f"The URL does not point to an image. Content-Type: {content_type}")
            return False

        # Write the image content to the file
        with open(filepath, 'wb') as f:
            f.write(response.content)

        if verbose:
            print(f"Image successfully downloaded: {filepath}")
        return True

    except requests.exceptions.Timeout:
        if verbose:
            warnings.warn(f"Download timed out for URL: {url}")
    except requests.exceptions.HTTPError as e:
        if verbose:
            warnings.warn(f"HTTP error occurred: {e}")
    except requests.exceptions.RequestException as e:
        if verbose:
            warnings.warn(f"An error occurred while downloading the image: {e}")
    except IOError as e:
        if verbose:
            warnings.warn(f"An error occurred while writing the file: {e}")

    return False

# Stahování obrázků

## marshall paw patrol


In [5]:
keyword = "marshall paw patrol"
image_urls = search_images(keyword, 250)
len(image_urls)

247

In [6]:
from tqdm.notebook import tqdm

for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/marshall/", f'image{i}.jpg', verbose=False)

  0%|          | 0/247 [00:00<?, ?it/s]

## sky paw patrol


In [7]:
keyword = "sky paw patrol"
image_urls = search_images(keyword, 250)
len(image_urls)

224

In [8]:
from tqdm.notebook import tqdm

for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/sky/", f'image{i}.jpg', verbose=False)

  0%|          | 0/224 [00:00<?, ?it/s]

## chase paw patrol

In [9]:
keyword = "chase paw patrol"
image_urls = search_images(keyword, 250)
len(image_urls)

137

In [10]:
from tqdm.notebook import tqdm

for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/chase/", f'image{i}.jpg', verbose=False)

  0%|          | 0/137 [00:00<?, ?it/s]

## rubble paw patrol

In [11]:
keyword = "rubble paw patrol"
image_urls = search_images(keyword, 250)
len(image_urls)

236

In [12]:
from tqdm.notebook import tqdm

for i, url in enumerate(tqdm(image_urls)):
    download_image(url, "./dataset/rubble/", f'image{i}.jpg', verbose=False)

  0%|          | 0/236 [00:00<?, ?it/s]

# Nacteni prislusnych balicku a transformace obrazku

In [13]:
import torch
import torch.nn as tnn
import torch.nn.functional as tnf
import torch.optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

# Data transformations
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
])

# Load the dataset
dataset = datasets.ImageFolder(root='./dataset', transform=transform)

# Rozdeleni dat na trenovaci a testovaci

In [None]:
train_size = int(0.75 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

# Tvorba neuronové sítě

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class SimpleCNN(tnn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = tnn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.pool = tnn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = tnn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.flat = tnn.Flatten()
        self.fc1 = tnn.Linear(in_features=64*7*7, out_features=128)
        self.drop = tnn.Dropout(0.25)
        self.fc2 = tnn.Linear(in_features=128, out_features=10)

    def forward(self, x):
        x = self.pool(tnf.relu(self.conv1(x)))  # Pooling reduces image size
        x = self.pool(tnf.relu(self.conv2(x)))
        x = self.flat(x)
        x = tnf.relu(self.fc1(x))
        x = self.drop(x)
        x = self.fc2(x)
        return x

model = SimpleCNN().to(device)

# Stanovení kritéria a trénování neuronové sítě

In [None]:
criterion = tnn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02)

In [None]:
# Training loop
for epoch in range(5):
    model.train()
    running_loss = 0.0
    for im, lab in train_loader:
        im = im.to(device)
        lab = lab.to(device)

        optimizer.zero_grad()

        outputs = model(im)
        loss = criterion(outputs, lab)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1} / 5], Loss: {running_loss / len(train_loader):.4f}')




Epoch [1 / 5], Loss: 2.2713
Epoch [2 / 5], Loss: 2.1736
Epoch [3 / 5], Loss: 2.0564
Epoch [4 / 5], Loss: 1.8794
Epoch [5 / 5], Loss: 1.6773


# Vyhodnoceni neuronove site na testovacich datech

In [None]:
correct = 0
total = 0
with torch.no_grad():  # Disable gradient calculation for evaluation
    for im, lab in test_loader:
        # Move images and labels to the device
        im = im.to(device)
        lab = lab.to(device)

        outputs = model(im)
        _, predicted = torch.max(outputs, 1)
        total += lab.size(0)
        correct += (predicted == lab).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')

Accuracy: 26.03%


# Stažení datasetu


In [14]:
import os

# Zobrazíme obsah složky dataset
dataset_path = '/content/dataset'
print(os.listdir(dataset_path))

['sky', 'rubble', 'marshall', 'chase']


In [16]:
import shutil

# Znovu vytvoření ZIP souboru
shutil.make_archive('/content/dataset', 'zip', '/content/dataset')

# Stáhnutí souboru
from google.colab import files
files.download('/content/dataset.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>