In [1]:
# !pip install requests
# !pip install beautifulsoup4
# !pip install gTTS

In [2]:
# import modules
import os
import requests
from bs4 import BeautifulSoup

from gtts import gTTS


## Skin Cancer 
#### Website: https://www.skincancer.org/skin-cancer-information/skin-cancer-pictures/

In [3]:
url = 'https://www.skincancer.org/skin-cancer-information/skin-cancer-pictures/'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
response = requests.get(url, headers=headers)
#print(response.status_code)

In [4]:
cancer_types = ["Basal Cell Carcinoma (BCC)","Squamous Cell Carcinoma (SCC)","Melanoma", "Merkel Cell Carcinoma (MCC)",
              "Actinic Keratosis"]

cancer_ids = ['bcc', 'scc', 'melanoma','merkel','ak']


In [5]:
def get_img_cap_cancer(cancer_id):
    soup = BeautifulSoup(response.content, 'html.parser')
    bcc_sec = soup.find('section',attrs = {'id':cancer_id})

    sec_img = bcc_sec.find_all('img')
    sec_imgs = []
    for img in sec_img:
        if img.get('src').find("https")== 0:
            sec_imgs.append(img.get('src'))

    bcc_sec_cap = bcc_sec.find_all('figcaption')
    captions = []
    for cap in bcc_sec_cap:
        captions.append(cap.get_text())

    return sec_imgs, captions


In [6]:
language = 'en'    
def get_text_to_speech(mytext):
    myobj = gTTS(text=mytext, lang=language, slow=False)
    return myobj


In [15]:
data = []
id = 0
for cancer_type,cancer_id in zip(cancer_types,cancer_ids):
    bcc_sec_imgs,captions = get_img_cap_cancer(cancer_id)
    for img,cap in zip(bcc_sec_imgs,captions):
        get_text_to_speech(cap).save(f"id_{id}.mp3")
        dataset_cancer_cell = {
            'id':id,
            'cancer_type':cancer_type,
            'cancer_img':img,
            'cancer_img_caption':cap,
            'caption_speech':f"id_{id}.mp3",
        }
        id += 1
        data.append(dataset_cancer_cell)

In [16]:
import json
with open('skin_cancer_dataset.json', 'w') as f:
    json.dump(data, f)

## Creating MMBT model

### image.py

In [17]:
import os
import logging
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F


logger = logging.getLogger(__name__)

# mapping number of image embeddings to AdaptiveAvgPool2d output size
POOLING_BREAKDOWN = {1: (1, 1), 2: (2, 1), 3: (3, 1), 4: (2, 2), 5: (5, 1), 6: (3, 2), 7: (7, 1), 8: (4, 2), 9: (3, 3)}

# module assumes that the directory where the saved chexnet weight is in the same level as this module
MMBT_DIR_PARENT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join(MMBT_DIR_PARENT, "data")
MODELS_DIR = os.path.join(DATA_DIR, "models")
SAVED_CHEXNET = os.path.join(MODELS_DIR, "saved_chexnet.pt")


class ImageEncoderDenseNet(nn.Module):
    def __init__(self, num_image_embeds, saved_model=True, path=os.path.join(MODELS_DIR, SAVED_CHEXNET)):
        """

        :type num_image_embeds: int
        :param num_image_embeds: number of image embeddings to generate; 1-9 as they map to specific numbers of pooling
        output shape in the 'POOLING_BREAKDOWN'
        :param saved_model: True to load saved pre-trained model False to use torch pre-trained model
        :param path: path to the saved .pt model file
        """
        super().__init__()
        if saved_model:
            # loading pre-trained weight, e.g. ChexNet
            # the model here expects the weight to be regular Tensors and NOT cuda Tensor
            model = torch.load(path)
            logger.info(f"Saved model loaded from: {path}")
        else:
            model = torchvision.models.densenet121(pretrained=True)

        # DenseNet architecture last layer is the classifier; we only want everything before that
        modules = list(model.children())[:-1]
        self.model = nn.Sequential(*modules)
        # self.model same as original DenseNet self.features part of the forward function
        self.pool = nn.AdaptiveAvgPool2d(POOLING_BREAKDOWN[num_image_embeds])

    def forward(self, input_modal):
        """
        B = batch
        N = number of image embeddings
        1024 DenseNet embedding size, this can be changed when instantiating MMBTconfig for modal_hidden_size

        Bx3x224x224 (this is input shape) -> Bx1024x7x7 (this is shape after DenseNet CNN layers before the last layer)
        -> Bx1024xN (this is after torch.flatten step in this function below) -> BxNx1024 (this is the shape of the
        output tensor)

        :param input_modal: image tensor
        :return:
        """
        # Bx3x224x224 -> Bx1024x7x7 -> Bx1024xN -> BxNx1024
        features = self.model(input_modal)
        out = F.relu(features, inplace=True)
        out = self.pool(out)
        out = torch.flatten(out, start_dim=2)
        out = out.transpose(1, 2).contiguous()

        return out  # BxNx1024

NameError: name '__file__' is not defined