In [12]:
import requests
from bs4 import BeautifulSoup
import os
import urllib.parse
import pandas as pd

In [13]:
df = pd.DataFrame({'image_id': [], 'url': [], 'dx' : []})

In [14]:
def download_image(img_url, save_path):
    response = requests.get(img_url, stream=True)
    if response.status_code == 200:
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(1024):
                file.write(chunk)
        print(f"Downloaded: {save_path}")
    else:
        print(f"Failed to download: {img_url}")

In [15]:
list_web_url = ['https://dermnetnz.org/topics/seborrhoeic-keratosis-dermoscopy-images',
                'https://dermnetnz.org/topics/actinic-keratosis-dermoscopy-images',
                'https://dermnetnz.org/topics/basal-cell-carcinoma-dermoscopy']
list_dx = ['bkl', 'akiec', 'bcc']


In [16]:
def crawl_images(list_web_url, df , list_dx,folder_path):
    for web_url,dx in zip(list_web_url, list_dx):
        print('Getting html from ', web_url)
        response = requests.get(web_url)
        if response.status_code != 200:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            exit()
        soup = BeautifulSoup(response.content, 'html.parser')
        img_tags = soup.find_all('img')
        urls = ['https://dermnetnz.org'+img['src'] for img in img_tags][1:]
        index = len(urls)
        new_rows = []
        for i in range(index):
            path = os.path.join(folder_path, f'{dx}_{str(i)}.jpg')
            download_image(urls[i], path)
            new_rows.append({'image_id': f'{dx}_{str(i)}', 'url': urls[i], 'dx': f'{dx}'})
        new_rows_df = pd.DataFrame(new_rows)
        df = pd.concat([df, new_rows_df], ignore_index=True)
    return df

In [17]:
df = crawl_images(list_web_url = list_web_url,
                    df = df, 
                    list_dx = list_dx,
                    folder_path = 'C:\\Users\\FPT\\Desktop\\dermnet_images\\')

Getting html from  https://dermnetnz.org/topics/seborrhoeic-keratosis-dermoscopy-images
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_0.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_1.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_2.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_3.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_4.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_5.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_6.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_7.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_8.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_9.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_10.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_11.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_12.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_13.jpg
Downloaded: C:\Users\FPT\Desktop\dermnet_images\bkl_14.jpg
Downloaded: C:\Users\FPT\Desktop\derm

In [22]:
df.to_csv('C:\\Users\\FPT\\Desktop\\dermnet_images\\dermnet_images.csv', index=False)

In [None]:
!isic image download --search "diagnosis:\"seborrheic keratosis\" AND image_type:\"dermoscopic\"" --limit 1000 bkl/
!isic image download --search "diagnosis:\"basal cell carcinoma\" AND image_type:\"dermoscopic\"" --limit 1500 bcc/
!isic image download --search "diagnosis:\"actinic keratosis\" AND image_type:\"dermoscopic\"" --limit 1300 akiec/
!isic image download --search "diagnosis:\"vascular lesion\" AND image_type:\"dermoscopic\"" --limit 300 vasc/
!isic image download --search "diagnosis:\"dermatofibroma\" AND image_type:\"dermoscopic\"" --limit 300 df/
!isic image download --search "diagnosis:\"melanoma\" AND image_type:\"dermoscopic\"" --limit 1200 mel/
!isic image download --search "diagnosis:\"nevus\" AND image_type:\"dermoscopic\"" --limit 3000 nv/