##  Train Skin Cancer Model

In [1]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import joblib








In [5]:
# 📂 Load Dataset
skin_df = pd.read_csv("skin_cancer.csv")
print(skin_df.head())

   Age  Gender Family_History Sun_Exposure Mole_Growth Skin_Pigmentation  \
0   45    Male            Yes         High         Yes              Dark   
1   30  Female             No       Medium          No              Fair   
2   60    Male            Yes         High         Yes              Dark   
3   25  Female             No          Low          No              Fair   
4   50    Male            Yes         High         Yes              Dark   

      Cancer  
0  Malignant  
1     Benign  
2  Malignant  
3     Benign  
4  Malignant  


In [6]:
# 🛠️ Preprocessing
# Encode categorical columns
label_encoders = {}
for column in skin_df.columns:
    if skin_df[column].dtype == 'object':
        le = LabelEncoder()
        skin_df[column] = le.fit_transform(skin_df[column])
        label_encoders[column] = le

# 🎯 Split Features and Target
X = skin_df.drop(columns=["Cancer"])
print("Number of training features:", X.shape[1])

y = skin_df["Cancer"]

# 📏 Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 🔀 Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


Number of training features: 6


In [7]:
# 🤖 Train Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# 🤖 Train XGBoost Classifier
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

Parameters: { "use_label_encoder" } are not used.



In [8]:
# 📈 Evaluate Models
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Random Forest Report:\n", classification_report(y_test, rf_pred))

print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))
print("XGBoost Report:\n", classification_report(y_test, xgb_pred))



Random Forest Accuracy: 1.0
Random Forest Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

XGBoost Accuracy: 1.0
XGBoost Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4



In [9]:
# 💾 Save the better model (Random Forest or XGBoost)
joblib.dump(rf_model, "models/skin_cancer_model.pkl")


['models/skin_cancer_model.pkl']