In [None]:
from icrawler.builtin import GoogleImageCrawler, BingImageCrawler
import os

# Create output directory if it doesn't exist
output_dir = 'data/raw/'
os.makedirs(output_dir, exist_ok=True)

# Example using Google Image crawler
def crawl_google_images(keyword, max_num=100):
     """
     Download images from Google using the provided keyword
     
     Args:
          keyword: Search term
          max_num: Maximum number of images to download (default: 100)
     """
     google_crawler = GoogleImageCrawler(
          storage={'root_dir': os.path.join(output_dir, 'google', keyword)})
     
     google_crawler.crawl(keyword=f'{keyword} track layout 2024', max_num=max_num)
     print(f"Downloaded up to {max_num} images for '{keyword}' from Google")

# Example using Bing Image crawler
def crawl_bing_images(keyword, max_num=100):
     """
     Download images from Bing using the provided keyword
     
     Args:
          keyword: Search term
          max_num: Maximum number of images to download (default: 100)
     """
     bing_crawler = BingImageCrawler(
          storage={'root_dir': os.path.join(output_dir, 'bing', keyword)})
     
     bing_crawler.crawl(keyword=f'{keyword} track layout 2024', max_num=max_num)
     print(f"Downloaded up to {max_num} images for '{keyword}' from Bing")

# Usage example
search_terms = [
    "Bahrain International Circuit",
    "Jeddah Corniche Circuit",
    "Albert Park Circuit",
    "Suzuka International Racing Course",
    "Shanghai International Circuit",
    "Miami International Autodrome",
    "Imola (Autodromo Enzo e Dino Ferrari)",
    "Circuit de Monaco",
    "Circuit Gilles Villeneuve",
    "Circuit de Barcelona-Catalunya",
    "Red Bull Ring",
    "Silverstone Circuit",
    "Hungaroring",
    "Circuit de Spa-Francorchamps",
    "Circuit Zandvoort",
    "Monza (Autodromo Nazionale Monza)",
    "Baku City Circuit",
    "Marina Bay Street Circuit",
    "Circuit of the Americas",
    "Autódromo Hermanos Rodríguez",
    "Interlagos (Autódromo José Carlos Pace)",
    "Las Vegas Street Circuit",
    "Lusail International Circuit",
    "Yas Marina Circuit"
]
for term in search_terms:
    crawl_google_images(term, max_num=30)
    crawl_bing_images(term, max_num=30)

In [14]:
import torch
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import shutil
from sklearn.model_selection import train_test_split
from PIL import Image

In [7]:
def setup_model(num_classes=2):
    # Load pre-trained ResNet18
    model = models.resnet18(weights='IMAGENET1K_V1')
    
    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False
    
    # Replace the final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, num_classes)
    )
    
    # Move model to GPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    print(f"Using device: {device}")
    
    return model, device

In [31]:
def filter_images(model, model_path='f1_track_classifier.pth', 
                            source_dir='data/raw',
                            filtered_dir='data/filtered_images'):
     
     if isinstance(model, str):
        # Load model from saved state
        # Ensure setup_model is defined earlier in the notebook
        try:
            model, _ = setup_model() 
            model.load_state_dict(torch.load(model_path))
        except NameError:
             print("Error: setup_model function not defined. Cannot load model from path.")
             return 0, 0
        except FileNotFoundError:
             print(f"Error: Model file not found at {model_path}")
             return 0, 0
     
     model.eval()
     device = next(model.parameters()).device

     val_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
     
     os.makedirs(filtered_dir, exist_ok=True)

     total_images = 0
     filtered_images = 0

     for source in os.listdir(source_dir):
          source_path = os.path.join(source_dir, source)
          if not os.path.isdir(source_path):
               continue
          for track in os.listdir(source_path):
               track_path = os.path.join(source_path, track)
               if not os.path.isdir(track_path):
                    continue
               os.makedirs(os.path.join(filtered_dir, track), exist_ok=True)

               for img_name in os.listdir(track_path):
                    img_path = os.path.join(track_path, img_name)
                    total_images += 1

                    try:
                         img = Image.open(img_path).convert('RGB')
                         img_tensor = val_transforms(img).unsqueeze(0).to(device)

                         with torch.no_grad():
                              output = model(img_tensor)
                              _, predicted = torch.max(output, 1)
                         
                         if predicted.item() == 0:
                              filtered_images += 1
                              shutil.copy(img_path, os.path.join(filtered_dir, track, f'{source}_{img_name}'))
                    except Exception as e:
                         print(f"Error processing image {img_path}: {e}")
     # Report statistics
     print(f"\nProcessing complete!")
     print(f"Total images processed: {total_images}")
     print(f"Images with layout: {filtered_images}")
     # Add check for total_images before division
     if total_images > 0:
          print(f"Clean Images rate: {filtered_images/total_images*100:.1f}%")
     else:
          print("Clean Images rate: N/A (No images processed)")
    
     return None
                         

In [32]:
filter_images('f1_track_classifier.pth')

Using device: cpu


  model.load_state_dict(torch.load(model_path))


Error processing image data/raw/google/Bahrain International Circuit/.DS_Store: cannot identify image file '/Users/bszczesniak/projekty/f1-layout-recognition/data/raw/google/Bahrain International Circuit/.DS_Store'

Processing complete!
Total images processed: 1369
Images with layout: 1109
Clean Images rate: 81.0%


In [23]:
def clean_folders():
     removed = 0
     for source in os.listdir('data/raw'):
         source_path = os.path.join('data/raw', source)
         if not os.path.isdir(source_path):
               continue
         for track in os.listdir(source_path):
              track_path = os.path.join(source_path, track)
              if not os.path.isdir(track_path):
               continue
              for img_name in os.listdir(track_path):
                   if img_name.startswith(source):
                         img_path = os.path.join(track_path, img_name)
                         os.remove(img_path)
                         removed += 1
                         print(f"Removed {img_path}")
     return removed

print(clean_folders())
                   


Removed data/raw/google/Suzuka International Racing Course/google_000014.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000029.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000001.png
Removed data/raw/google/Suzuka International Racing Course/google_000015.png
Removed data/raw/google/Suzuka International Racing Course/google_000003.png
Removed data/raw/google/Suzuka International Racing Course/google_000017.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000002.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000016.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000006.png
Removed data/raw/google/Suzuka International Racing Course/google_000007.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000013.jpg
Removed data/raw/google/Suzuka International Racing Course/google_000005.png
Removed data/raw/google/Suzuka International Racing Course/google_000011.jpg