# Skin Cancer Classification

In [1]:
# Dataset from 'https://www.kaggle.com/datasets/kylegraupe/skin-cancer-binary-classification-dataset'

In [2]:
import cv2
import pandas as pd
import os

In [3]:
#pip install opencv-python

In [4]:
labels=['Cancer','Non_cancer']
img_path='Skin_Data/'
os.listdir('Skin_Data')

['Cancer', 'Non_Cancer']

In [5]:
# label all images regardless of the folder
img_list=[]
label_list=[]
for label in labels:
    for image in os.listdir(img_path+label):
        img_list.append(img_path+label+'/'+image)
        label_list.append(label)

In [6]:
# make dataframe from arrays
df=pd.DataFrame({'img':img_list,'label':label_list})

In [7]:
df.head()

Unnamed: 0,img,label
0,Skin_Data/Cancer/1007-1.jpg,Cancer
1,Skin_Data/Cancer/1010-01.JPG,Cancer
2,Skin_Data/Cancer/1012-2.JPG,Cancer
3,Skin_Data/Cancer/1031-1.jpg,Cancer
4,Skin_Data/Cancer/1051-3(94).jpg,Cancer


In [8]:
import matplotlib.pyplot as plt

In [9]:
d={'Cancer':1,'Non_cancer':0} # label encoding

In [10]:
df['encode_label'] = df['label'].map(d)

In [11]:
df.tail()

Unnamed: 0,img,label,encode_label
283,Skin_Data/Non_cancer/953-1.JPG,Non_cancer,0
284,Skin_Data/Non_cancer/954-3.JPG,Non_cancer,0
285,Skin_Data/Non_cancer/955.JPG,Non_cancer,0
286,Skin_Data/Non_cancer/984.JPG,Non_cancer,0
287,Skin_Data/Non_cancer/986-1.JPG,Non_cancer,0


In [12]:
import numpy as np

In [13]:
x=[]
for img in df['img']:
    img = cv2.imread(str(img))
    img = cv2.resize(img,(170,170)) # resize the image to 170x170
    img = img/255.0 # normalize the image(divide by 255 regardless 0-255 pixels)
    x.append(img)

In [14]:
x = np.array(x)

In [15]:
y=df['encode_label']

In [16]:
from sklearn.model_selection import train_test_split

In [17]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [18]:
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,Dense,Input,Reshape,BatchNormalization

In [19]:
# CNN Convolutional Neural Network

In [20]:
model=Sequential()
model.add(Input(shape=(170,170,3)))
model.add(Conv2D(32,kernel_size=(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128))
model.add(Dense(2,activation='softmax'))
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])


In [21]:
hist=model.fit(x_train,y_train,epochs=15,validation_data=(x_test,y_test),verbose=1)

Epoch 1/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 507ms/step - accuracy: 0.5195 - loss: 15.9353 - val_accuracy: 0.7414 - val_loss: 1.6657
Epoch 2/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 484ms/step - accuracy: 0.6882 - loss: 0.7740 - val_accuracy: 0.7586 - val_loss: 0.6460
Epoch 3/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 493ms/step - accuracy: 0.7703 - loss: 0.5920 - val_accuracy: 0.7414 - val_loss: 0.5221
Epoch 4/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 493ms/step - accuracy: 0.7527 - loss: 0.5509 - val_accuracy: 0.8103 - val_loss: 0.4813
Epoch 5/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 494ms/step - accuracy: 0.8151 - loss: 0.4286 - val_accuracy: 0.8448 - val_loss: 0.4005
Epoch 6/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 476ms/step - accuracy: 0.8843 - loss: 0.3402 - val_accuracy: 0.9310 - val_loss: 0.2946
Epoch 7/15
[1m8/8[0m [32m━━━━━━━━━━━

In [22]:
model.save('skin_cancer_model.h5')

