In [9]:


import os
import pandas as pd
import matplotlib.pyplot as plt  
import imageio.v3 as iio 
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array
import joblib  # For saving and loading the model


In [10]:

# Set paths for images 
train_root = 'skin_disease_dataset/train_set'
test_root = 'skin_disease_dataset/test_set'

# Path to save/load the model and features
model_filename = 'skin_disease_model.pkl'#to save the model
train_features_filename = 'train_features.npy'#to save the tarin features
test_features_filename = 'test_features.npy'#to save the test features
#loading data
def load_data(root):
    path = []#list to store the file paths of all images 
    labels = []#list to store the labels for each image 
    for folder_name in os.listdir(root):
        folder_path = os.path.join(root, folder_name)#construct full path 
        if os.path.isdir(folder_path):#check if the folder_path is directory
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                path.append(file_path)
                labels.append(folder_name)  # Assuming folder name is the label
    return pd.DataFrame({'disease path': path, 'disease name': labels})

# Load train and test datasets
train_df = load_data(train_root)
# print(train_df)
test_df = load_data(test_root)

# --- Display the first image ---
# if not train_df.empty:
#     num_images = min(5, len(train_df))  # Display just 5 images
    
#     fig, axes = plt.subplots(1, num_images, figsize=(15, 5))  # Create a single row of subplots

#     for i in range(num_images):
#         # Get the image path and label
#         image_path = train_df.iloc[i]['disease path']
#         label = train_df.iloc[i]['disease name']
        
#         try:
#             # Read the image
#             image = iio.imread(image_path)
            
#             # Display the image in the corresponding subplot
#             axes[i].imshow(image)
#             axes[i].set_title(f"Disease: {label}")
#             axes[i].axis('off')  # Hide axes
#         except Exception as e:
#             print(f"Error loading image {image_path}: {e}")
#             axes[i].axis('off')  # Hide the axes even if an error occurs

#     plt.tight_layout()  # Adjust the layout to prevent overlapping
#     plt.show()
# else:
#     print("No images found in the training dataset.")
#     print("No images found in the training dataset.")

In [None]:

# Encode labels
le = LabelEncoder()
train_df['disease name'] = le.fit_transform(train_df['disease name'])
test_df['disease name'] = le.transform(test_df['disease name'])
# print(train_df['disease name'])

# Prepare features and labels
X_train = train_df['disease path']
y_train = train_df['disease name']
X_test = test_df['disease path'] 
y_test = test_df['disease name']

def load_and_preprocess_image(filepath, size=(128, 128)):
    try:
        img = Image.open(filepath).convert('RGB')  # Ensure image is in RGB mode
        img = img.resize(size)  # Resize the image
        img = img_to_array(img) / 255.0  # Convert to array and normalize
        return img
    except Exception as e:
        print(f"Error loading image {filepath}: {e}")
        return None

# Load pretrained CNN model for feature extraction
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
feature_extractor = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

# Extract features from images
def extract_features(image_paths):
    features = []
    for path in image_paths:
        img = load_and_preprocess_image(path)
        if img is not None:
            img = np.expand_dims(img, axis=0)  # Add batch dimension
            feature = feature_extractor.predict(img)
            features.append(feature.flatten())
    return np.array(features)

# Check if the feature files exist
if os.path.exists(train_features_filename) and os.path.exists(test_features_filename):
    # Load extracted features from files
    X_train_features = np.load(train_features_filename)
    X_test_features = np.load(test_features_filename)
    print("Features loaded from files, no need to extract again.")
else:
    # Extract features for train and test datasets
    X_train_features = extract_features(X_train)
    X_test_features = extract_features(X_test)
    # Save the features to files
    np.save(train_features_filename, X_train_features)
    np.save(test_features_filename, X_test_features)
    print("Features extracted and saved to files.")

# Check if the model file exists
if os.path.exists(model_filename):
    # Load the saved model
    clf = joblib.load(model_filename)
    print("Model loaded from file, no need to retrain.")
else:
    # SVM Parameter Grid
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale']
    }
    grid_search = GridSearchCV(svm.SVC(class_weight='balanced'), param_grid, scoring='accuracy', cv=5)
    
    # Train the model
    grid_search.fit(X_train_features, y_train)
    clf = grid_search.best_estimator_
    

    # Save the model after training
    joblib.dump(clf, model_filename) 
    print("Model trained and saved to file.")

# Test the model with the test set
y_pred = clf.predict(X_test_features)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


Features loaded from files, no need to extract again.
Model loaded from file, no need to retrain.
Accuracy: 92.31%
Precision: 0.92
Recall: 0.92
F1-score: 0.92
