In [None]:
# Instalação da biblioteca icrawler (necessária para download de imagens)
# Execute apenas uma vez
!pip install icrawler

Collecting icrawler
  Downloading icrawler-0.6.10-py3-none-any.whl.metadata (6.2 kB)
Collecting bs4 (from icrawler)
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading icrawler-0.6.10-py3-none-any.whl (36 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4, icrawler
Successfully installed bs4-0.0.2 icrawler-0.6.10


In [None]:
import os
import cv2
import pandas as pd
from icrawler.builtin import BingImageCrawler

# Baixando as imagens

In [None]:
base_dir = 'dataset'

In [None]:
IMAGENS_POR_TERMO = 50

estrategia_busca = {
    "dataset_severe_accident": [
        "totaled car accident street",    # Carro perda total
        "wrecked car crash road",         # Carro destruído na estrada
        "carro capotado rodovia",         # Termo em PT-BR
        "severe car collision wreckage",  # Destroços colisão grave
        "smashed car front view"          # Frente destruída
    ],

    "dataset_moderate_accident": [
        "car bumper dent parking lot",    # Amassado parachoque estacionamento
        "fender bender accident",         # Batida leve (termo clássico)
        "batida leve transito",           # Termo PT-BR (traz fotos reais de notícias)
        "car door scratch dent",          # Arranhão/Amassado porta
        "broken tail light car",          # Lanterna quebrada
        "minor car crash street"          # Acidente menor na rua
    ],

    "dataset_no_accident": [
        "sedan car side view street",     # Carro de lado na rua
        "hatchback parked on road",       # Carro estacionado
        "carros no transito cidade",      # Trânsito (cenário real)
        "suv driving on highway",         # Carro andando
        "common car front view street"    # Frente de carro comum
    ]
}

In [None]:
def executar_download_inteligente():

    for nome_pasta, lista_termos in estrategia_busca.items():
        diretorio_destino = os.path.join(base_dir, nome_pasta)

        # Cria a pasta se não existir
        if not os.path.exists(diretorio_destino):
            os.makedirs(diretorio_destino)

        print(f"\n>>> Iniciando downloads para a classe: {nome_pasta.upper()}")

        # Inicializa o acumulador (Meta)
        meta_atual = 0

        for termo in lista_termos:
            # Aumentamos a meta para este termo
            meta_atual += IMAGENS_POR_TERMO

            print(f"   Termo: '{termo}' -> Buscando até completar {meta_atual} imagens na pasta...")

            try:
                # Configura o crawler
                bing_crawler = BingImageCrawler(
                    storage={'root_dir': diretorio_destino},
                    parser_threads=2,
                    downloader_threads=4,
                    log_level='ERROR' # Menos poluição na tela
                )

                # O max_num é a meta cumulativa (50, 100, 150...)
                bing_crawler.crawl(keyword=termo, max_num=meta_atual)

            except Exception as e:
                print(f"   [!] Erro ao buscar '{termo}': {e}")
                # Não paramos o código, apenas vamos para o próximo termo

    print("\n" + "="*40)
    print("DOWNLOAD CONCLUÍDO!")
    print("Verifique as pastas. Agora você deve ter uma grande variedade de imagens.")

In [None]:
executar_download_inteligente()


>>> Iniciando downloads para a classe: ACIDENTE_GRAVE
   Termo: 'totaled car accident street' -> Buscando até completar 50 imagens na pasta...


ERROR:downloader:Response status code 403, file https://www.askadamskutner.com/wp-content/uploads/2023/01/vehicle-totaled-after-car-accident-opt.jpg
ERROR:downloader:Response status code 400, file https://media.istockphoto.com/id/1412101026/photo/totaled-car.jpg
ERROR:downloader:Response status code 403, file https://www.tuleylaw.com/wp-content/uploads/2017/08/bigstock-Two-Car-Crash-Closeup-1529636.jpg
ERROR:downloader:Response status code 403, file https://www.wshanejennings.com/wp-content/uploads/2024/07/totaled-car-in-llano-texas.jpg
ERROR:downloader:Response status code 403, file https://www.coloradolaw.net/wp-content/uploads/2022/05/car-accident-totals-car.jpg
ERROR:downloader:Response status code 403, file https://www.kbb.com/wp-content/uploads/2023/05/car-totaled-in-accident.jpg
ERROR:downloader:Response status code 403, file https://www.edmunds.com/assets/m/cs/bltc5716c4b521c3664/669996d4d316c769ffb34b48/car_accident_totaled_car_1600.jpg
ERROR:downloader:Response status code 40

   Termo: 'wrecked car crash road' -> Buscando até completar 100 imagens na pasta...


ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/previews/030/806/818/non_2x/wrecked-car-on-the-road-after-collision-with-another-car-car-crash-dangerous-accident-on-the-road-ai-generated-free-photo.jpg
ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/thumbnails/007/520/297/small_2x/wrecked-car-without-wheels-photo.jpg
ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/thumbnails/045/584/867/small_2x/front-part-of-a-crashed-car-wreck-photo.jpg


   Termo: 'carro capotado rodovia' -> Buscando até completar 150 imagens na pasta...
   Termo: 'severe car collision wreckage' -> Buscando até completar 200 imagens na pasta...
   Termo: 'smashed car front view' -> Buscando até completar 250 imagens na pasta...

>>> Iniciando downloads para a classe: ACIDENTE_MODERADO
   Termo: 'car bumper dent parking lot' -> Buscando até completar 50 imagens na pasta...


ERROR:downloader:Response status code 403, file https://rare-gallery.com/thumbs/997560-car-blue-cars-Audi-vehicle.jpg
ERROR:downloader:Response status code 404, file https://i0.wp.com/xclusivecustomz.com/wp-content/uploads/2015/10/DSC_0628.jpg
ERROR:downloader:Response status code 403, file https://s30740.pcdn.co/wp-content/uploads/2019/06/car2.jpg
ERROR:downloader:Exception caught when downloading file https://s1.1zoom.me/big0/904/Audi_RS_7Blue_473358.jpg, error: HTTPSConnectionPool(host='s1.1zoom.me', port=443): Max retries exceeded with url: /big0/904/Audi_RS_7Blue_473358.jpg (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7b3821085e50>, 'Connection to s1.1zoom.me timed out. (connect timeout=5)')), remaining retry times: 2
ERROR:downloader:Exception caught when downloading file https://s1.1zoom.me/big0/904/Audi_RS_7Blue_473358.jpg, error: HTTPSConnectionPool(host='s1.1zoom.me', port=443): Max retries exceeded with url: /big0/904/Audi_RS_7Blue_473358.jp

   Termo: 'fender bender accident' -> Buscando até completar 100 imagens na pasta...


ERROR:downloader:Response status code 403, file https://oraclelawfirm.com/wp-content/uploads/2024/02/Fender-Bender-Accident-2.jpg
ERROR:downloader:Response status code 403, file https://oraclelawfirm.com/wp-content/uploads/2024/02/Fender-Bender-Accident.jpg
ERROR:downloader:Response status code 403, file https://oraclelawfirm.com/wp-content/uploads/2024/02/Fender-Bender-Accident-3.jpg
ERROR:downloader:Response status code 403, file https://www.getgordon.com/wp-content/uploads/2022/04/Two-Silver-Cars-in-Minor-Accident.jpg
ERROR:downloader:Response status code 403, file https://mayfirm.com/wp-content/uploads/2021/12/car-accident-lawyer-for-expect-physically.jpg
ERROR:downloader:Exception caught when downloading file https://cms.westcoasttriallawyers.com/wp-content/uploads/2024/08/Fender-Bender-car-damage-blog-768x432.jpg, error: HTTPSConnectionPool(host='cms.westcoasttriallawyers.com', port=443): Max retries exceeded with url: /wp-content/uploads/2024/08/Fender-Bender-car-damage-blog-768

   Termo: 'batida leve transito' -> Buscando até completar 150 imagens na pasta...
   Termo: 'car door scratch dent' -> Buscando até completar 200 imagens na pasta...
   Termo: 'broken tail light car' -> Buscando até completar 250 imagens na pasta...
   Termo: 'minor car crash street' -> Buscando até completar 300 imagens na pasta...

>>> Iniciando downloads para a classe: NAO_ACIDENTE
   Termo: 'sedan car side view street' -> Buscando até completar 50 imagens na pasta...


ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/previews/040/567/823/large_2x/side-view-of-black-sedan-car-isolated-on-white-background-with-clipping-path-photo.jpg
ERROR:downloader:Response status code 400, file https://media.istockphoto.com/id/465876806/photo/street-in-shanghai-lujiazui-china.jpg
ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/previews/017/216/366/non_2x/modern-sedan-car-side-view-illustration-free-vector.jpg
ERROR:downloader:Response status code 400, file https://media.istockphoto.com/id/494093356/photo/generic-black-car-side-view.jpg
ERROR:downloader:Response status code 403, file https://static.vecteezy.com/system/resources/previews/042/570/756/large_2x/side-view-of-black-sedan-car-isolated-on-white-background-with-clipping-path-photo.jpg


   Termo: 'hatchback parked on road' -> Buscando até completar 100 imagens na pasta...


ERROR:downloader:Response status code 403, file http://www.lthlaw.com/wp-content/uploads/2024/03/parked-car.jpg


   Termo: 'carros no transito cidade' -> Buscando até completar 150 imagens na pasta...


ERROR:downloader:Response status code 403, file https://c.pxhere.com/photos/a3/a0/cars_traffic_road_transportation_transport_drive_urban_street-1232163.jpg
ERROR:downloader:Response status code 403, file https://c.pxhere.com/images/85/ca/5597c4a7cf2b20027b4a958d384f-1418601.jpg
ERROR:downloader:Response status code 403, file https://c.pxhere.com/photos/1c/f0/cars_traffic_vehicles_drive_driving_street_automobiles_city-740551.jpg
ERROR:parser:Exception caught when fetching page https://www.bing.com/images/async?q=carros no transito cidade&first=120, error: HTTPSConnectionPool(host='www.bing.com', port=443): Read timed out. (read timeout=5), remaining retry times: 2


   Termo: 'suv driving on highway' -> Buscando até completar 200 imagens na pasta...


ERROR:downloader:Response status code 403, file https://www.kbb.com/wp-content/uploads/2020/11/2021-genesis-gv80-front-left-3qtr-4x3-1.jpg
ERROR:downloader:Response status code 403, file https://media.ed.edmunds-media.com/non-make/ot/ot_415191_1600.jpg
ERROR:downloader:Exception caught when downloading file https://cars.usnews.com/images/article/202105/128927/1_2021_kia_telluride.jpg, error: HTTPSConnectionPool(host='cars.usnews.com', port=443): Read timed out. (read timeout=5), remaining retry times: 2
ERROR:downloader:Exception caught when downloading file https://cars.usnews.com/images/article/202105/128927/1_2021_kia_telluride.jpg, error: HTTPSConnectionPool(host='cars.usnews.com', port=443): Read timed out. (read timeout=5), remaining retry times: 1
ERROR:downloader:Exception caught when downloading file https://cars.usnews.com/images/article/202105/128927/1_2021_kia_telluride.jpg, error: HTTPSConnectionPool(host='cars.usnews.com', port=443): Read timed out. (read timeout=5), rema

   Termo: 'common car front view street' -> Buscando até completar 250 imagens na pasta...

DOWNLOAD CONCLUÍDO!
Verifique as pastas. Agora você deve ter uma grande variedade de imagens.


# Iniciando a criação do CSV

## Definição das classes e seus números (Rótulos)

In [None]:
classes_num = {
    "dataset_severe_accident": 0,
    "dataset_moderate_accident": 1,
    "dataset_no_accident": 2
}

img_size = 64

dados_lista = []

## Processamento

In [None]:
for nome_pasta, rotulo in classes_num.items():

  count = 0

  caminho_pasta = os.path.join(base_dir, nome_pasta)

  if not os.path.exists(caminho_pasta):
    print(f"ERRO: A pasta '{nome_pasta}' não foi encontrada em {base_dir}")
    continue

  for nome_arquivo in os.listdir(caminho_pasta):
    try:
      caminho_img = os.path.join(caminho_pasta, nome_arquivo)

      img_matriz = cv2.imread(caminho_img, cv2.IMREAD_GRAYSCALE)

      if img_matriz is None:
        continue

      img_resized = cv2.resize(img_matriz, (img_size, img_size))

      img_vectorized = img_resized.flatten()

      dados_imagem = list(img_vectorized)
      dados_imagem.append(rotulo)

      dados_lista.append(dados_imagem)
      count += 1

    except Exception as e:
      print(f"Erro ao ler {nome_arquivo}: {e}")

  print(f"Processadas {count} imagens na pasta '{nome_pasta}'.")

Processadas 60 imagens na pasta 'acidente_grave'.
Processadas 60 imagens na pasta 'acidente_moderado'.
Processadas 60 imagens na pasta 'nao_acidente'.


## Salvando o Arquivo

In [None]:
colunas = [f'pixel_{i}' for i in range(img_size * img_size)]
colunas.append('rotulo (target)')

df = pd.DataFrame(dados_lista, columns=colunas)

caminho_csv = os.path.join(base_dir, 'dados_acidentes.csv')

df.to_csv(caminho_csv, index=False)

## Exibindo estatísticas

In [None]:
df

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095,rotulo (target)
0,149,176,167,169,168,184,176,171,163,141,...,171,151,158,166,185,158,163,163,178,0
1,133,185,245,127,28,48,76,66,23,26,...,17,28,6,5,8,7,25,44,9,0
2,142,143,145,147,149,151,152,153,155,154,...,19,19,15,21,18,20,20,20,17,0
3,16,19,19,27,22,16,18,16,16,16,...,139,17,25,16,23,23,90,113,50,0
4,249,246,252,252,252,252,252,252,252,252,...,204,204,205,203,201,195,179,191,181,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,118,96,79,61,71,74,98,81,119,81,...,115,127,152,194,234,236,217,175,205,2
176,10,9,31,7,7,46,5,9,22,6,...,89,95,99,98,104,108,113,119,127,2
177,91,33,47,213,128,159,122,129,37,184,...,90,81,81,77,78,70,64,194,189,2
178,29,25,23,138,21,20,20,20,40,20,...,138,142,142,145,129,143,133,140,135,2


In [None]:
df.groupby('rotulo (target)').size()

Unnamed: 0_level_0,0
rotulo (target),Unnamed: 1_level_1
0,60
1,60
2,60
