<a href="https://colab.research.google.com/github/Vishal3347/Data-driven-innovation/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import joblib  # Import joblib for saving the encoder

# Read and inspect the dataset
metadata = pd.read_csv(r'C:\Users\visha\Downloads\archive\filtered_HAM10000_metadata.csv')
print(metadata.head())

# Add full image path to the DataFrame
metadata['image_path'] = metadata['image_id'].apply(
    lambda x: os.path.join(r'C:\Users\visha\Downloads\archive\HAM10000_images_part_1', f"{x}.jpg"))

# Visualize class distribution
sns.countplot(x='dx', data=metadata)
plt.show()

# Encode the labels (dx) into numerical values
label_encoder = LabelEncoder()
metadata['label'] = label_encoder.fit_transform(metadata['dx'])

# Save the LabelEncoder to a file
joblib.dump(label_encoder, 'label_encoder.pkl')  # Save the entire encoder object

# Optionally, save the classes to a .npy file (if you want to use them separately)
np.save('label_encoder_classes.npy', label_encoder.classes_)

# Split the data into training and testing sets
train_data, test_data = train_test_split(metadata, test_size=0.2, stratify=metadata['label'], random_state=42)

# Preprocessing function for images
def preprocess_image(image_path, target_size=(150, 150)):
    img = load_img(image_path, target_size=target_size)  # Load and resize the image
    img = img_to_array(img) / 255.0  # Convert image to array and normalize pixel values
    return img

# Process training and testing images
X_train = np.array([preprocess_image(img) for img in train_data['image_path']])
X_test = np.array([preprocess_image(img) for img in test_data['image_path']])

# Convert labels to one-hot encoding
y_train = to_categorical(train_data['label'])
y_test = to_categorical(test_data['label'])

# Check image dimensions
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

# Build a CNN model
model = Sequential()

# Convolutional layer 1
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Convolutional layer 2
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output of the convolutional layers
model.add(Flatten())

# Fully connected layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Dropout to prevent overfitting

# Output layer
model.add(Dense(len(metadata['label'].unique()), activation='softmax'))  # The number of classes

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')

# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Model accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot training and validation loss
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Model loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Save the trained model
model.save('skin_cancer_classifier.h5')

# Loading the saved LabelEncoder
loaded_label_encoder = joblib.load('label_encoder.pkl')  # Load the saved LabelEncoder

# Example: Use the loaded encoder to transform labels
new_label = loaded_label_encoder.transform(['nv'])  # Example label transformation
print(new_label)

# Example: Use the loaded encoder to decode labels
decoded_label = loaded_label_encoder.inverse_transform([0])  # Example decoding
print(decoded_label)
