# **DATA AQUISTION**

In [None]:
!pip install -q kaggle

This line tells Kaggle's API where to look for your Kaggle API key on Google Colab.

In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content"


This will download a .zip file containing the HAM10000 dataset.

In [None]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000

Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
User cancelled operation


Unzip the Dataset

!unzip /content/skin-cancer-mnist-ham10000.zip -d /content/HAM10000

In [None]:
!file /content/HAM10000/HAM10000_images_part_1
!file /content/HAM10000/HAM10000_images_part_2


/content/HAM10000/HAM10000_images_part_1: directory
/content/HAM10000/HAM10000_images_part_2: directory


 Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Move the Unzipped Folder to Drive

In [None]:
!cp -r /content/HAM10000 /content/drive/MyDrive/HAM10000

In [None]:
!ls /content/HAM10000


ham10000_images_part_1	HAM10000_images_part_2	hmnist_28_28_RGB.csv
HAM10000_images_part_1	HAM10000_metadata.csv	hmnist_8_8_L.csv
ham10000_images_part_2	hmnist_28_28_L.csv	hmnist_8_8_RGB.csv


# Data **Preprocessing**

Import Required Libraries

In [76]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


Load the Metadata CSV

In [77]:
metadata=pd.read_csv('/content/HAM10000/HAM10000_metadata.csv')
metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


 create a new column image_path in your dataset where each image ID now has its full file path


In [78]:
image_dir_1 = '/content/HAM10000/HAM10000_images_part_1'
image_dir_2 = '/content/HAM10000/HAM10000_images_part_2'

# Create full image path dictionary
image_path_dict = {os.path.splitext(img)[0]: os.path.join(image_dir_1, img)
                   for img in os.listdir(image_dir_1)}
image_path_dict.update({os.path.splitext(img)[0]: os.path.join(image_dir_2, img)
                        for img in os.listdir(image_dir_2)})


# Add image path to metadata DataFrame
metadata['image_path'] = metadata['image_id'].map(image_path_dict)
metadata.head()


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/content/HAM10000/HAM10000_images_part_2/ISIC_...


Create Labels

In [79]:
# Define the classes considered malignant (cancerous)
malignant = ['mel', 'bcc', 'akiec']

# Create the binary label: 1 if malignant, 0 if non-cancerous
metadata['binary_label'] = metadata['dx'].apply(lambda x: 1 if x in malignant else 0)

# Check the first few rows
metadata.head()


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path,binary_label
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...,0
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...,0
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...,0
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,/content/HAM10000/HAM10000_images_part_1/ISIC_...,0
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,/content/HAM10000/HAM10000_images_part_2/ISIC_...,0


In [81]:
print(metadata.columns)


Index(['lesion_id', 'image_id', 'dx', 'dx_type', 'age', 'sex', 'localization',
       'image_path', 'binary_label'],
      dtype='object')


 Load and Resize Images Using OpenCV

In [80]:
def load_images(df, size=(64, 64), use_binary_labels=False):
    images = []
    labels = []

    for i, row in df.iterrows():
        img = cv2.imread(row['image_path'])
        img = cv2.resize(img, size)
        images.append(img)

        if use_binary_labels:
            labels.append(row['binary_label'])
        else:
            labels.append(row['label'])

    return np.array(images), np.array(labels)

# Call the function with binary labels
X, y = load_images(metadata, use_binary_labels=True)



In [84]:
print(X.shape) # (100015, 64, 64, 3)
print(y.shape) # (100015,)
print(X[0].shape) # (64, 64, 3)
print(y[0])  # 0 or 1


(10015, 64, 64, 3)
(10015,)
(64, 64, 3)
0


normalise

In [85]:


X = X / 255.0  # normalize all pixel values between 0 and 1

In [86]:
from sklearn.model_selection import train_test_split

# Split the dataset: 80% for training, 20% for testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [87]:
import tensorflow as tf
from tensorflow.keras import layers, models


In [88]:
# Build the CNN model
model = models.Sequential([
    # First Convolutional Layer
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D((2, 2)),

    # Second Convolutional Layer
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    # Third Convolutional Layer
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    # Flatten the output for fully connected layers
    layers.Flatten(),

    # Fully Connected Layer
    layers.Dense(128, activation='relu'),

    # Output Layer
    layers.Dense(1, activation='sigmoid')  # For binary classification (Cancer or Not)
])


# Print model summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [89]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,  # Adjust based on performance (5-10 is a good start)
    batch_size=32,
    validation_data=(X_test, y_test)
)


Epoch 1/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 249ms/step - accuracy: 0.8056 - loss: 0.4713 - val_accuracy: 0.7963 - val_loss: 0.4186
Epoch 2/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 249ms/step - accuracy: 0.8154 - loss: 0.4041 - val_accuracy: 0.8063 - val_loss: 0.4341
Epoch 3/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 236ms/step - accuracy: 0.7981 - loss: 0.4054 - val_accuracy: 0.8068 - val_loss: 0.4062
Epoch 4/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 246ms/step - accuracy: 0.8140 - loss: 0.3926 - val_accuracy: 0.8113 - val_loss: 0.4162
Epoch 5/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 240ms/step - accuracy: 0.8140 - loss: 0.3837 - val_accuracy: 0.8158 - val_loss: 0.3825
Epoch 6/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 243ms/step - accuracy: 0.8208 - loss: 0.3676 - val_accuracy: 0.8188 - val_loss: 0.4082
Epoch 7/10

In [90]:
# Evaluate on test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - accuracy: 0.1843 - loss: 0.7058
Test Accuracy: 19.52%


In [None]:
# Make predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int")  # Since it's binary classification


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 79ms/step


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.96      0.90      1612
           1       0.67      0.30      0.42       391

    accuracy                           0.83      2003
   macro avg       0.76      0.63      0.66      2003
weighted avg       0.81      0.83      0.81      2003

Confusion Matrix:
 [[1553   59]
 [ 273  118]]


In [None]:
model.save("skin_cancer_model.h5")




In [None]:
!ls


HAM10000     sample_data		     skin_cancer_model.h5
kaggle.json  skin-cancer-mnist-ham10000.zip


In [None]:
from google.colab import files
files.download("skin_cancer_model.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

if we want to predict here using image

In [91]:
from google.colab import files
uploaded = files.upload()


Saving skin_cancer_2.jpg to skin_cancer_2 (8).jpg


load your model

In [92]:
from tensorflow.keras.models import load_model

model = load_model("skin_cancer_model.h5")  # Make sure you've already trained & saved this




Preprocess the Uploaded Image

In [93]:
import cv2
import numpy as np

# Replace 'your_image.jpg' with the actual file name you uploaded
img_path = list(uploaded.keys())[0]
img = cv2.imread(img_path)
img = cv2.resize(img, (64, 64))
img = img / 255.0  # Normalize
img = np.expand_dims(img, axis=0)  # Add batch dimension (1, 64, 64, 3)


final prediction

In [94]:
prediction = model.predict(img)

# For binary classification
if prediction[0][0] > 0.5:
    print("🔴 Predicted: Malignant (Cancerous)")
else:
    print("🟢 Predicted: Benign (Not Cancerous)")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
🔴 Predicted: Malignant (Cancerous)


In [96]:
from PIL import Image

# Replace 'ISIC_0000000.jpg' with the actual file name you uploaded
img_path = list(uploaded.keys())[0] # Assuming you've used 'files.upload()' before
img = Image.open(img_path)
print(img.size)  # This will print the width and height of the image

(500, 500)
