In [22]:
# pip install icrawler

import os
import random
import shutil
import stat
from icrawler.builtin import GoogleImageCrawler
from concurrent.futures import ThreadPoolExecutor, as_completed

def handle_remove_readonly(func, path, exc_info):
    os.chmod(path, stat.S_IWRITE)
    func(path)

# função worker para cada classe
def process_class(label, n_images, raw_dir, output_dir, splits):
    # 1) Download
    dest = os.path.join(raw_dir, label)
    os.makedirs(dest, exist_ok=True)
    crawler = GoogleImageCrawler(storage={"root_dir": dest})
    crawler.crawl(keyword=label, max_num=n_images)

    # 2) Shuffle e split
    imgs = [
        f for f in os.listdir(dest)
        if f.lower().endswith((".jpg", ".jpeg", ".png"))
    ]
    random.shuffle(imgs)
    n = len(imgs)
    i_train = int(splits["train"] * n)
    i_val   = int((splits["train"] + splits["val"]) * n)

    split_map = {
        "train": imgs[:i_train],
        "val":   imgs[i_train:i_val],
        "test":  imgs[i_val:]
    }

    # 3) Move para train/ val/ test/
    for split, files in split_map.items():
        dst_dir = os.path.join(output_dir, split, label)
        os.makedirs(dst_dir, exist_ok=True)
        for fname in files:
            shutil.move(
                os.path.join(dest, fname),
                os.path.join(dst_dir, fname)
            )
    return f"{label} concluído ({n} imagens processadas)"

if __name__ == "__main__":
    # seu dicionário de classes
    classes = {
        "ferrari":     1000,
        "lamborghini": 1000,
        "porsche":     1000,
        "bmw":          1000,
        "audi":         1000,
        "mercedes":     1000,
        "tesla":        1000,
        "toyota":       1000,
        "ford":         1000,
        "volkswagen":   1000,
    }

    raw_dir    = "dataset/raw_images"
    output_dir = "dataset"
    splits = {"train":0.70, "val":0.15, "test":0.15}

    # 4) Executor com uma thread por classe
    with ThreadPoolExecutor(max_workers=len(classes)) as executor:
        # mapeia cada classe a um Future
        futures = {
            executor.submit(process_class, label, n, raw_dir, output_dir, splits): label
            for label, n in classes.items()
        }
        # aguarda e imprime status
        for future in as_completed(futures):
            print(future.result())

    # 5) Limpeza final do raw_dir (apenas uma vez)
    try:
        shutil.rmtree(raw_dir, onerror=handle_remove_readonly)
        print("✅ Diretório temporário removido com sucesso.")
    except Exception as e:
        print(f"⚠️ Não foi possível remover completamente '{raw_dir}': {e}")

    print("✅ Concluído! Diretório 'dataset/' agora contém apenas train/, val/ e test/.")

2025-05-17 16:36:19,250 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,253 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,254 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,255 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-05-17 16:36:19,258 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-05-17 16:36:19,258 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,261 - INFO - icrawler.crawler - starting 1 feeder threads...
2025-05-17 16:36:19,262 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,264 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,266 - INFO - icrawler.crawler - starting 1 parser threads...
2025-05-17 16:36:19,267 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,269 - INFO - icrawler.crawler - start crawling...
2025-05-17 16:36:19,269 - INFO - icrawler.crawler - starting 1 parser threads...
2025-05-17 16:36:19,270 - INFO - ic

toyota concluído (57 imagens processadas)


2025-05-17 16:36:50,601 - INFO - downloader - image #23	https://motorshow.com.br/wp-content/uploads/sites/2/2023/01/ford-serief-46-anos-lideranca-e1672857701362.jpg
2025-05-17 16:36:50,672 - ERROR - downloader - Response status code 403, file https://www.ford.com.br/content/dam/Ford/website-assets/latam/br/nameplate/2025/ranger-raptor/colorizer/preto-asturia/fbr-ranger-raptor-color-preto-asturia.jpg
2025-05-17 16:36:50,712 - INFO - parser - parsing result page https://www.google.com/search?q=audi&ijn=1&start=100&tbs=&tbm=isch
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\Users\guilh\miniconda3\envs\Projeto_ML\Lib\threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "c:\Users\guilh\miniconda3\envs\Projeto_ML\Lib\site-packages\ipykernel\ipkernel.py", line 766, in run_closure
    _threading_Thread_run(self)
  File "c:\Users\guilh\miniconda3\envs\Projeto_ML\Lib\threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  Fi

porsche concluído (34 imagens processadas)


2025-05-17 16:36:51,663 - INFO - downloader - image #54	https://www.insidehook.com/wp-content/uploads/2024/08/lamborghini-revuelto-insidehook.jpg
2025-05-17 16:36:51,754 - ERROR - downloader - Exception caught when downloading file https://www.bmw.com.br/content/dam/bmw/common/all-models/m-series/x6m/2023/navigation/bmw-m-series-x6-m-modelfinder.png, error: HTTPSConnectionPool(host='www.bmw.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 0
2025-05-17 16:36:51,825 - INFO - downloader - image #2	https://cdn.motor1.com/images/mgl/JOXpRA/s3/2023-mercedes-glc-coupe.jpg
2025-05-17 16:36:52,030 - ERROR - downloader - Exception caught when downloading file https://media.ed.edmunds-media.com/tesla/model-3/2025/oem/2025_tesla_model-3_sedan_long-range_fq_oem_1_1600.jpg, error: HTTPSConnectionPool(host='media.ed.edmunds-media.com', port=443): Read timed out. (read timeout=5), remaining retry times: 0
2025-05-17 16:36:52,128 - INFO - downloader - image #53	https://uploa

volkswagen concluído (58 imagens processadas)


2025-05-17 16:36:55,898 - INFO - downloader - image #58	https://www.gran-turismo.com/images/c/i1wnlqdhkPPoTz.jpg
2025-05-17 16:36:56,481 - INFO - downloader - image #59	https://hips.hearstapps.com/hmg-prod/images/2022-lamborghini-aventador-109-1625607587.jpg
2025-05-17 16:36:56,515 - INFO - parser - parsing result page https://www.google.com/search?q=lamborghini&ijn=1&start=100&tbs=&tbm=isch
2025-05-17 16:36:56,520 - INFO - downloader - image #61	https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEixwlJcRCtWol_fxFuzzAN0p1XdxBPfA8uSBNe6Fx4lIrxnjmbh3D6KLKDyqMYB0juHqGDyBKVUHFfY6damcimSyd5hLz_XUHxWEYVhlHagKcizhH1krjq62g3HnXT4yV8OCSU-Jo7QEXVq1VxZl_734UqO17RtrNr_3XEdMvhaMQmhB2XVcd87SXnCAuhi/s2560/Ferrari-12Cilindri-Spider%20%289%29.jpg
2025-05-17 16:36:56,533 - INFO - downloader - image #36	https://www.motortrend.com/files/6793fa7df3811f0008acd379/1-2025-tesla-model-s-front-view.jpg
Exception in thread parser-001:
Traceback (most recent call last):
  File "c:\Users\guilh\miniconda3\en

audi concluído (58 imagens processadas)


2025-05-17 16:37:00,875 - INFO - downloader - image #41	https://ichef.bbci.co.uk/ace/ws/640/cpsprodpb/1485/live/80a729c0-029b-11ef-97f7-e98b193ef1b8.jpg
2025-05-17 16:37:01,224 - INFO - downloader - image #42	https://www.radioflyer.com/cdn/shop/files/front-left-silver.png
2025-05-17 16:37:01,998 - ERROR - downloader - Exception caught when downloading file https://www2.mercedes-benz.com.br/content/dam/hq/passengercars/cars/gle/gle-suv-v167-fl-pi/overview/highlights/01-2023/images/mercedes-benz-gle-suv-v167-highlights-product-film-videostill-3302x1858-01-2023.jpg, error: HTTPSConnectionPool(host='www2.mercedes-benz.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 1
2025-05-17 16:37:02,588 - ERROR - downloader - Exception caught when downloading file https://www.bmw.com.br/content/dam/bmw/common/all-models/x-series/x6/2023/navigation/bmw-x-series-x6-m60i-modelfinder.png, error: HTTPSConnectionPool(host='www.bmw.com.br', port=443): Read timed out. (read timeout

lamborghini concluído (64 imagens processadas)


2025-05-17 16:37:05,070 - INFO - downloader - image #44	https://i.abcnewsfe.com/a/947b6739-758a-424b-bbe6-893854151fe5/elon-musk-2-rt-gmh-250421_1745263339774_hpMain.jpg
2025-05-17 16:37:05,272 - INFO - downloader - image #45	https://i0.statig.com.br/bancodeimagens/56/nc/5x/56nc5x3j71fgzhc7fmqnxv68n.jpg
2025-05-17 16:37:05,349 - INFO - downloader - image #46	https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2113300644.jpg
2025-05-17 16:37:05,722 - INFO - downloader - image #47	https://kvdbil-object-images.imgix.net/7228193/3559a5e1.jpg
2025-05-17 16:37:06,389 - INFO - downloader - image #69	https://www.exor.com/sites/default/files/styles/wide_standard/public/2023/internal-images-single/2200005-ferrari-daytona-sp3-reddot-winner-2022.jpg
2025-05-17 16:37:07,143 - ERROR - downloader - Exception caught when downloading file https://www2.mercedes-benz.com.br/content/dam/hq/passengercars/cars/gle/gle-suv-v167-fl-pi/overview/highlights/01-2023/images/mercedes-benz-gle-suv-v167-high

ferrari concluído (69 imagens processadas)


2025-05-17 16:37:12,835 - ERROR - downloader - Exception caught when downloading file https://www.bmw.com.br/content/dam/bmw/common/all-models/m-series/x3-m50/navigation/bmw-x-series-x3m50i-xdrive.png, error: HTTPSConnectionPool(host='www.bmw.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 2
2025-05-17 16:37:13,067 - INFO - downloader - image #35	https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjmv6JwFXmFn9DVV87rte2fblxOdtXdPkaUB_2mQubjKOizBlHWHMIJTJ0Y1NQm-xUZ1dKO5V52P5Xg7O4Z8bYuvYZd-vCn9mnikv7Ft2Pl77-2A7GV5cyC0R3d8MlYYWKm_M4J5lbxKiWk/s1280/Explorer+Timberline+1.jpg
2025-05-17 16:37:13,203 - INFO - downloader - image #36	https://garagem360.com.br/wp-content/uploads/2024/01/Ford-Territory-2023-3.jpg
2025-05-17 16:37:13,341 - INFO - downloader - image #37	https://hips.hearstapps.com/hmg-prod/images/2025-ford-expedition-stealth-performance-exterior-108-66fadf15d18f2.jpg
2025-05-17 16:37:16,989 - ERROR - downloader - Exception caught when downloading 

ford concluído (37 imagens processadas)


2025-05-17 16:37:22,078 - ERROR - downloader - Exception caught when downloading file https://media.ed.edmunds-media.com/tesla/model-s/2025/oem/2025_tesla_model-s_sedan_plaid_fq_oem_1_1600.jpg, error: HTTPSConnectionPool(host='media.ed.edmunds-media.com', port=443): Read timed out. (read timeout=5), remaining retry times: 1
2025-05-17 16:37:22,392 - ERROR - downloader - Exception caught when downloading file https://www2.mercedes-benz.com.br/content/dam/hq/passengercars/cars/g-class/w465-pi/amg/modeloverview/02-2024/images/mercedes-amg-g-class-w456-model-overview-696x392-02-2024.png, error: HTTPSConnectionPool(host='www2.mercedes-benz.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 0
2025-05-17 16:37:22,639 - ERROR - downloader - Response status code 403, file https://fotos-jornaldocarro-estadao.nyc3.cdn.digitaloceanspaces.com/wp-content/uploads/2025/01/22133124/Mercedes-Benz-C200-AMG-Line-dianteira.jpg
2025-05-17 16:37:22,919 - ERROR - downloader - Excepti

tesla concluído (57 imagens processadas)


2025-05-17 16:37:38,179 - ERROR - downloader - Exception caught when downloading file https://www.bmw.com.br/content/dam/bmw/common/all-models/3-series/sedan/2024/navigation/bmw-3-series-ice-lci-modelfinder.png, error: HTTPSConnectionPool(host='www.bmw.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 0
2025-05-17 16:37:38,193 - ERROR - downloader - Exception caught when downloading file https://www2.mercedes-benz.com.br/content/dam/hq/passengercars/cars/c-class/c-class-saloon-w206-pi/modeloverview/06-2022/images/mercedes-amg-c-class-w206-modeloverview-696x392-06-2022.png, error: HTTPSConnectionPool(host='www2.mercedes-benz.com.br', port=443): Read timed out. (read timeout=5), remaining retry times: 0
2025-05-17 16:37:38,307 - ERROR - downloader - Response status code 404, file https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Mercedes-benz_star_amk.jpg
2025-05-17 16:37:38,360 - INFO - downloader - image #2	https://s2-autoesporte.glbimg.com/ZUhrIt61jJ

mercedes concluído (18 imagens processadas)


2025-05-17 16:38:27,724 - INFO - downloader - image #9	https://pugachev.miami/wp-content/uploads/2024/04/bmw-m4-competition-2024-cabrio-one.jpg
2025-05-17 16:38:29,257 - INFO - downloader - image #10	https://stephenjames.co.uk/media/pages/new-cars/bmw/bmw-m135-xdrive-sport-auto-pch-q2-2025/3b199522d6-1736186733/new-1-series-800x450-800x450-crop.png
2025-05-17 16:38:34,273 - INFO - downloader - no more download task for thread downloader-001
2025-05-17 16:38:34,274 - INFO - downloader - thread downloader-001 exit
2025-05-17 16:38:34,391 - INFO - icrawler.crawler - Crawling task done!


bmw concluído (10 imagens processadas)
✅ Diretório temporário removido com sucesso.
✅ Concluído! Diretório 'dataset/' agora contém apenas train/, val/ e test/.


### Preparação do modelo base

In [23]:
from tensorflow.keras.applications import MobileNetV2

# Carrega o MobileNetV2 pré-treinado no ImageNet, sem as camadas de classificação (include_top=False).
base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Congela TODAS as camadas do modelo base para não atualizar seus pesos na fase inicial.
base_model.trainable = False

### Construção do “head” personalizado

In [27]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model

x = base_model.output
x = GlobalAveragePooling2D()(x)      # comprime cada mapa de características em um único valor
x = Dense(128, activation='relu')(x) # camada densa aprendível
x = Dropout(0.5)(x)                  # evita overfitting
predictions = Dense(10, activation='softmax')(x)  # ajuste o “10” para o número de classes do seu dataset

model = Model(inputs=base_model.input, outputs=predictions)

### Compilação e data augmentation

In [28]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_dir = "./dataset"

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    directory=f"{data_dir}/train",
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    directory=f"{data_dir}/val",
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

Found 458 images belonging to 10 classes.
Found 126 images belonging to 10 classes.


### Treinamento inicial (camadas novas)

In [29]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=val_generator.samples // val_generator.batch_size
)

Epoch 1/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - accuracy: 0.1220 - loss: 2.6743 - val_accuracy: 0.3750 - val_loss: 1.8707
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 240ms/step - accuracy: 0.1250 - loss: 2.3057 - val_accuracy: 0.3646 - val_loss: 1.8883
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step - accuracy: 0.2897 - loss: 2.0110 - val_accuracy: 0.4479 - val_loss: 1.8297
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 235ms/step - accuracy: 0.2500 - loss: 1.9268 - val_accuracy: 0.4688 - val_loss: 1.8089
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.3407 - loss: 1.7918 - val_accuracy: 0.4688 - val_loss: 1.6073
Epoch 6/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 238ms/step - accuracy: 0.3125 - loss: 1.9733 - val_accuracy: 0.4375 - val_loss: 1.6692
Epoch 7/10
[1m14/14[0m [32m━━━━

### Fine‑tuning (ajuste fino)

In [30]:
# 1) “Descongela” todo o modelo
base_model.trainable = True

# 2) Opcionalmente, recongele as camadas iniciais (mais genéricas)
for layer in base_model.layers[:100]:
    layer.trainable = False

# 3) Recompile com learning rate menor
import tensorflow as tf
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 4) Continue o treino por mais N épocas
history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=val_generator.samples // val_generator.batch_size
)

Epoch 1/5
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2s/step - accuracy: 0.3076 - loss: 2.0117 - val_accuracy: 0.5521 - val_loss: 1.4529
Epoch 2/5
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 229ms/step - accuracy: 0.3750 - loss: 1.7521 - val_accuracy: 0.5417 - val_loss: 1.4918
Epoch 3/5
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - accuracy: 0.2942 - loss: 1.9361 - val_accuracy: 0.5938 - val_loss: 1.4036
Epoch 4/5
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 252ms/step - accuracy: 0.2812 - loss: 2.2154 - val_accuracy: 0.5625 - val_loss: 1.4785
Epoch 5/5
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.3789 - loss: 1.8380 - val_accuracy: 0.6042 - val_loss: 1.4926


### Testando em uma única imagem

In [38]:
# 1) Capture os nomes de classe a partir do train_generator (ou val_generator)
class_names = list(train_generator.class_indices.keys())

# 2) Importe o que falta
from tensorflow.keras.preprocessing import image
import numpy as np

# 3) Defina a função de predição
def predict_image(path):
    # carrega e redimensiona
    img = image.load_img(path, target_size=(224, 224))
    x   = image.img_to_array(img) / 255.0
    x   = np.expand_dims(x, axis=0)            # batch size = 1

    # faz a predição
    probs = model.predict(x)[0]                # array de probabilidades
    idx   = np.argmax(probs)                   # índice da classe mais provável

    # formata e imprime
    label = class_names[idx]
    print(f"Predicted: {label} ({probs[idx]*100:.2f}%)")

# 4) Exemplo de uso
predict_image("dataset/test/toyota/000020.png")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Predicted: toyota (63.72%)
