In [29]:
import pandas as pd
import os
import shutil

In [30]:
df = pd.read_csv("../ddidiversedermatologyimages/ddi_metadata.csv")

# ambos son indices
df.drop(["DDI_ID", "Unnamed: 0"], axis=1, inplace=True)
# solo hay una muestra asi que idk
counts = df.groupby("disease").skin_tone.value_counts()
keep = counts.index[counts > 1]

keep_df = keep.to_frame(index=False)

df = df.merge(keep_df, how="inner")
counts

disease                                      skin_tone
abrasions-ulcerations-and-physical-injuries  56            3
abscess                                      34            1
acne-cystic                                  56            1
acquired-digital-fibrokeratoma               34            1
                                             56            1
                                                          ..
verruca-vulgaris                             56           17
                                             34            7
verruciform-xanthoma                         56            1
wart                                         56            1
xanthogranuloma                              34            2
Name: count, Length: 138, dtype: int64

In [31]:
DDI_TO_HAM10000_MAPPING = {
    # === MALIGNANT MELANOCYTIC LESIONS (4) ===
    'melanoma': 'mel',
    'melanoma-acral-lentiginous': 'mel',
    'melanoma-in-situ': 'mel',
    'nodular-melanoma-(nm)': 'mel',
    
    # === BASAL CELL CARCINOMA VARIANTS (3) ===
    'basal-cell-carcinoma': 'bcc',
    'basal-cell-carcinoma-nodular': 'bcc',
    'basal-cell-carcinoma-superficial': 'bcc',
    
    # === SQUAMOUS CELL CARCINOMA (3) - Map to actinic keratosis category ===
    'squamous-cell-carcinoma': 'akiec',
    'squamous-cell-carcinoma-in-situ': 'akiec',
    'squamous-cell-carcinoma-keratoacanthoma': 'akiec',
    
    # === ACTINIC/PRECANCEROUS LESIONS (2) ===
    'actinic-keratosis': 'akiec',
    'solar-lentigo': 'bkl',
    
    # === MALIGNANT/SERIOUS LESIONS (5) ===
    'sebaceous-carcinoma': 'akiec',
    'leukemia-cutis': 'akiec',
    'metastatic-carcinoma': 'akiec',
    'blastic-plasmacytoid-dendritic-cell-neoplasm': 'akiec',
    'kaposi-sarcoma': 'vasc',
    
    # === CUTANEOUS LYMPHOMAS (3) ===
    'mycosis-fungoides': 'akiec',
    'subcutaneous-t-cell-lymphoma': 'akiec',
    'reactive-lymphoid-hyperplasia': 'bkl',
    'lymphocytic-infiltrations': 'akiec',
    
    # === BENIGN KERATOTIC LESIONS (6) ===
    'seborrheic-keratosis': 'bkl',
    'seborrheic-keratosis-irritated': 'bkl',
    'benign-keratosis': 'bkl',
    'inverted-follicular-keratosis': 'bkl',
    'lichenoid-keratosis': 'bkl',
    'clear-cell-acanthoma': 'bkl',
    
    # === BENIGN MELANOCYTIC LESIONS (8) ===
    'melanocytic-nevi': 'nv',
    'acral-melanotic-macule': 'nv',
    'blue-nevus': 'nv',
    'dysplastic-nevus': 'nv',
    'epidermal-nevus': 'nv',
    'nevus-lipomatosus-superficialis': 'nv',
    'atypical-spindle-cell-nevus-of-reed': 'nv',
    'pigmented-spindle-cell-nevus-of-reed': 'nv',
    
    # === DERMATOFIBROMA & RELATED (3) ===
    'dermatofibroma': 'df',
    'fibrous-papule': 'df',
    'acquired-digital-fibrokeratoma': 'df',
    
    # === VASCULAR LESIONS (5) ===
    'angioma': 'vasc',
    'arteriovenous-hemangioma': 'vasc',
    'angioleiomyoma': 'vasc',
    'glomangioma': 'vasc',
    'pyogenic-granuloma': 'vasc',
    
    # === VIRAL LESIONS (4) ===
    'verruca-vulgaris': 'bkl',
    'wart': 'bkl',
    'condyloma-accuminatum': 'bkl',
    'molluscum-contagiosum': 'bkl',
    
    # === INFECTIOUS/INFLAMMATORY (8) ===
    'folliculitis': 'bkl',
    'eczema-spongiotic-dermatitis': 'bkl',
    'coccidioidomycosis': 'bkl',
    'onychomycosis': 'bkl',
    'tinea-pedis': 'bkl',
    'morphea': 'bkl',
    'dermatomyositis': 'bkl',
    'graft-vs-host-disease': 'bkl',
    
    # === CYSTS & BENIGN GROWTHS (8) ===
    'epidermal-cyst': 'bkl',
    'lipoma': 'bkl',
    'keloid': 'bkl',
    'scar': 'bkl',
    'hematoma': 'bkl',
    'acne-cystic': 'bkl',
    'acrochordon': 'bkl',  # Skin tag
    'abscess': 'bkl',
    
    # === NEUROGENIC LESIONS (5) ===
    'neurofibroma': 'bkl',
    'neuroma': 'bkl',
    'cellular-neurothekeoma': 'bkl',
    'trichilemmoma': 'bkl',
    'trichofolliculoma': 'bkl',
    
    # === APPENDAGEAL & OTHER (7) ===
    'chondroid-syringoma': 'bkl',
    'eccrine-poroma': 'bkl',
    'syringocystadenoma-papilliferum': 'bkl',
    'xanthogranuloma': 'bkl',
    'verruciform-xanthoma': 'bkl',
    'foreign-body-granuloma': 'bkl',
    'prurigo-nodularis': 'bkl',
    
    # === TRAUMA & INJURY (1) ===
    'abrasions-ulcerations-and-physical-injuries': 'bkl',
    
    # === PIGMENTATION ===
    'hyperpigmentation': 'bkl',
    'focal-acral-hyperkeratosis': 'bkl',
}
df.disease = df.disease.map(lambda x: DDI_TO_HAM10000_MAPPING[x], na_action='ignore')

In [32]:
for _, i in df.iterrows():
    dest_dir = (
        f"../image_dataset_with_skin_tone/{i.skin_tone}+{i.disease}"
    )
    os.makedirs(dest_dir, exist_ok=True)

    img_path = os.path.join(dest_dir, i.DDI_file)

    if not os.path.exists(img_path):
        # symlink para no duplicar imágenes,, encontre erroress
        #os.symlink(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)
        shutil.copy2(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)

In [33]:
for _, i in df.iterrows():
    dest_dir = f"../image_dataset_without_skin_tone/{i.disease}"
    os.makedirs(dest_dir, exist_ok=True)

    img_path = os.path.join(dest_dir, i.DDI_file)

    if not os.path.exists(img_path):
        # symlink para no duplicar imágenes,, encontre erroress
        #os.symlink(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)
        shutil.copy2(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)


In [34]:
for _, i in df.iterrows():
    dest_dir = f"../test_dataset/{i.skin_tone}/{i.disease}"
    os.makedirs(dest_dir, exist_ok=True)

    img_path = os.path.join(dest_dir, i.DDI_file)

    if not os.path.exists(img_path):
        # symlink para no duplicar imágenes,, encontre erroress
        #os.symlink(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)
        shutil.copy2(f"../ddidiversedermatologyimages/{i.DDI_file}", img_path)
