In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Generate synthetic dataset
np.random.seed(42)
n_samples = 1000

# Features: age, skin_type, exposure_time, symptoms_duration
X = np.column_stack((
    np.random.randint(18, 80, n_samples),  # age
    np.random.randint(1, 6, n_samples),    # skin_type (1-5)
    np.random.randint(1, 8, n_samples),    # exposure_time (hours)
    np.random.randint(1, 30, n_samples)    # symptoms_duration (days)
))

# Target: disease_spread (0: no spread, 1: mild spread, 2: severe spread)
y = np.random.randint(0, 3, n_samples)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

# KNN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)
knn_pred = knn_model.predict(X_test_scaled)
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print("KNN Classification Report:")
print(classification_report(y_test, knn_pred))

# CNN
# Reshape data for CNN input
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], 2, 2, 1)
X_test_cnn = X_test_scaled.reshape(X_test_scaled.shape[0], 2, 2, 1)

cnn_model = Sequential([
    Conv2D(32, (2, 2), activation='relu', input_shape=(2, 2, 1)),
    MaxPooling2D((1, 1)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=0)

cnn_pred = cnn_model.predict(X_test_cnn)
cnn_pred_classes = np.argmax(cnn_pred, axis=1)
print("CNN Accuracy:", accuracy_score(y_test, cnn_pred_classes))
print("CNN Classification Report:")
print(classification_report(y_test, cnn_pred_classes))

# Function to predict disease spread
def predict_disease_spread(age, skin_type, exposure_time, symptoms_duration):
    input_data = np.array([[age, skin_type, exposure_time, symptoms_duration]])
    input_scaled = scaler.transform(input_data)

    svm_prediction = svm_model.predict(input_scaled)[0]
    knn_prediction = knn_model.predict(input_scaled)[0]
    cnn_prediction = np.argmax(cnn_model.predict(input_scaled.reshape(1, 2, 2, 1)), axis=1)[0]

    return svm_prediction, knn_prediction, cnn_prediction

# Example prediction
age = 45
skin_type = 3
exposure_time = 4
symptoms_duration = 7

svm_result, knn_result, cnn_result = predict_disease_spread(age, skin_type, exposure_time, symptoms_duration)
print(f"\nPrediction for: Age={age}, Skin Type={skin_type}, Exposure Time={exposure_time}h, Symptoms Duration={symptoms_duration} days")
print(f"SVM Prediction: {svm_result}")
print(f"KNN Prediction: {knn_result}")
print(f"CNN Prediction: {cnn_result}")

SVM Accuracy: 0.35
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.32      0.66      0.43        58
           1       0.08      0.01      0.03        68
           2       0.44      0.42      0.43        74

    accuracy                           0.35       200
   macro avg       0.28      0.36      0.30       200
weighted avg       0.28      0.35      0.29       200

KNN Accuracy: 0.34
KNN Classification Report:
              precision    recall  f1-score   support

           0       0.30      0.57      0.40        58
           1       0.37      0.32      0.34        68
           2       0.42      0.18      0.25        74

    accuracy                           0.34       200
   macro avg       0.36      0.36      0.33       200
weighted avg       0.37      0.34      0.32       200



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
CNN Accuracy: 0.305
CNN Classification Report:
              precision    recall  f1-score   support

           0       0.30      0.60      0.40        58
           1       0.20      0.10      0.14        68
           2       0.39      0.26      0.31        74

    accuracy                           0.30       200
   macro avg       0.30      0.32      0.28       200
weighted avg       0.30      0.30      0.28       200

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step

Prediction for: Age=45, Skin Type=3, Exposure Time=4h, Symptoms Duration=7 days
SVM Prediction: 0
KNN Prediction: 1
CNN Prediction: 0


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Generate synthetic dataset for melanoma spread prediction
np.random.seed(42)
n_samples = 1000

# Features:
# age: 18-90 years
# skin_type: Fitzpatrick scale (1-6)
# sun_exposure: Annual hours of sun exposure (100-2000)
# num_moles: Number of moles (0-100)
# family_history: Binary (0: No, 1: Yes)
# sunburn_history: Number of severe sunburns (0-20)
# tumor_thickness: in mm (0.1-10)

X = np.column_stack((
    np.random.randint(18, 91, n_samples),          # age
    np.random.randint(1, 7, n_samples),            # skin_type
    np.random.randint(100, 2001, n_samples),       # sun_exposure
    np.random.randint(0, 101, n_samples),          # num_moles
    np.random.randint(0, 2, n_samples),            # family_history
    np.random.randint(0, 21, n_samples),           # sunburn_history
    np.round(np.random.uniform(0.1, 10, n_samples), 1)  # tumor_thickness
))

# Target: melanoma_spread (0: no spread, 1: local spread, 2: regional spread, 3: distant spread)
y = np.random.randint(0, 4, n_samples)

# Create a more realistic distribution based on tumor thickness
y = np.where(X[:, 6] < 1, 0,
             np.where(X[:, 6] < 2, 1,
                      np.where(X[:, 6] < 4, 2, 3)))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

# KNN
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)
knn_pred = knn_model.predict(X_test_scaled)
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print("KNN Classification Report:")
print(classification_report(y_test, knn_pred))

# CNN
# Reshape data for CNN input (7 features to 3x3 matrix with padding)
X_train_cnn = np.pad(X_train_scaled, ((0, 0), (0, 2))).reshape(X_train_scaled.shape[0], 3, 3, 1)
X_test_cnn = np.pad(X_test_scaled, ((0, 0), (0, 2))).reshape(X_test_scaled.shape[0], 3, 3, 1)

cnn_model = Sequential([
    Conv2D(32, (2, 2), activation='relu', input_shape=(3, 3, 1)),
    MaxPooling2D((1, 1)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(4, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(X_train_cnn, y_train, epochs=25, batch_size=32, validation_split=0.2, verbose=0)

cnn_pred = cnn_model.predict(X_test_cnn)
cnn_pred_classes = np.argmax(cnn_pred, axis=1)
print("CNN Accuracy:", accuracy_score(y_test, cnn_pred_classes))
print("CNN Classification Report:")
print(classification_report(y_test, cnn_pred_classes))

# Function to predict melanoma spread
def predict_melanoma_spread(age, skin_type, sun_exposure, num_moles, family_history, sunburn_history, tumor_thickness):
    input_data = np.array([[age, skin_type, sun_exposure, num_moles, family_history, sunburn_history, tumor_thickness]])
    input_scaled = scaler.transform(input_data)

    svm_prediction = svm_model.predict(input_scaled)[0]
    knn_prediction = knn_model.predict(input_scaled)[0]
    cnn_prediction = np.argmax(cnn_model.predict(np.pad(input_scaled, ((0, 0), (0, 2))).reshape(1, 3, 3, 1)), axis=1)[0]

    return svm_prediction, knn_prediction, cnn_prediction

# Real-world scenarios
scenarios = [
    {
        'description': "Young adult with low risk factors",
        'age': 25,
        'skin_type': 3,
        'sun_exposure': 500,
        'num_moles': 10,
        'family_history': 0,
        'sunburn_history': 2,
        'tumor_thickness': 0.5
    },
    {
        'description': "Middle-aged person with moderate risk factors",
        'age': 45,
        'skin_type': 2,
        'sun_exposure': 1000,
        'num_moles': 30,
        'family_history': 1,
        'sunburn_history': 8,
        'tumor_thickness': 2.5
    },
    {
        'description': "Elderly person with high risk factors",
        'age': 70,
        'skin_type': 1,
        'sun_exposure': 1800,
        'num_moles': 50,
        'family_history': 1,
        'sunburn_history': 15,
        'tumor_thickness': 5.0
    }
]

for scenario in scenarios:
    svm_result, knn_result, cnn_result = predict_melanoma_spread(
        scenario['age'], scenario['skin_type'], scenario['sun_exposure'],
        scenario['num_moles'], scenario['family_history'], scenario['sunburn_history'],
        scenario['tumor_thickness']
    )
    print(f"\nScenario: {scenario['description']}")
    print(f"Age: {scenario['age']}, Skin Type: {scenario['skin_type']}, Sun Exposure: {scenario['sun_exposure']}h/year")
    print(f"Number of Moles: {scenario['num_moles']}, Family History: {'Yes' if scenario['family_history'] else 'No'}")
    print(f"Sunburn History: {scenario['sunburn_history']}, Tumor Thickness: {scenario['tumor_thickness']}mm")
    print(f"SVM Prediction: {svm_result}")
    print(f"KNN Prediction: {knn_result}")
    print(f"CNN Prediction: {cnn_result}")
    print("Prediction Key: 0 - No spread, 1 - Local spread, 2 - Regional spread, 3 - Distant spread")

SVM Accuracy: 0.86
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.67      0.73        18
           1       0.57      0.55      0.56        22
           2       0.82      0.80      0.81        51
           3       0.94      0.98      0.96       109

    accuracy                           0.86       200
   macro avg       0.78      0.75      0.76       200
weighted avg       0.86      0.86      0.86       200

KNN Accuracy: 0.735
KNN Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.44      0.47        18
           1       0.43      0.41      0.42        22
           2       0.55      0.53      0.54        51
           3       0.90      0.94      0.92       109

    accuracy                           0.73       200
   macro avg       0.60      0.58      0.59       200
weighted avg       0.73      0.73      0.73       200



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
CNN Accuracy: 0.97
CNN Classification Report:
              precision    recall  f1-score   support

           0       0.82      1.00      0.90        18
           1       0.94      0.77      0.85        22
           2       0.98      0.98      0.98        51
           3       1.00      1.00      1.00       109

    accuracy                           0.97       200
   macro avg       0.94      0.94      0.93       200
weighted avg       0.97      0.97      0.97       200

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step

Scenario: Young adult with low risk factors
Age: 25, Skin Type: 3, Sun Exposure: 500h/year
Number of Moles: 10, Family History: No
Sunburn History: 2, Tumor Thickness: 0.5mm
SVM Prediction: 1
KNN Prediction: 1
CNN Prediction: 0
Prediction Key: 0 - No spread, 1 - Local spread, 2 - Regional spread, 3 - Distant spread
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Generate synthetic data
data = pd.DataFrame({
    'age': np.random.randint(18, 90, n_samples),
    'skin_type': np.random.randint(1, 7, n_samples),  # Fitzpatrick scale 1-6
    'sun_exposure': np.random.randint(100, 2001, n_samples),  # Annual hours
    'num_moles': np.random.randint(0, 101, n_samples),
    'family_history': np.random.choice([0, 1], n_samples),  # 0: No, 1: Yes
    'sunburn_history': np.random.randint(0, 21, n_samples),
    'tumor_thickness': np.round(np.random.uniform(0.1, 10, n_samples), 2)  # mm
})

# Create a more realistic distribution of the target variable based on risk factors
def assign_spread(row):
    risk_score = (
        (row['age'] > 50) * 1 +
        (row['skin_type'] <= 3) * 1 +
        (row['sun_exposure'] > 1000) * 1 +
        (row['num_moles'] > 50) * 1 +
        (row['family_history'] == 1) * 1 +
        (row['sunburn_history'] > 10) * 1 +
        (row['tumor_thickness'] > 2) * 2 +
        (row['tumor_thickness'] > 4) * 2
    )

    if risk_score <= 2:
        return 0  # No spread
    elif risk_score <= 4:
        return 1  # Local spread
    elif risk_score <= 6:
        return 2  # Regional spread
    else:
        return 3  # Distant spread

data['melanoma_spread'] = data.apply(assign_spread, axis=1)

# Display the first few rows and basic statistics
print(data.head())
print("\nDataset Info:")
print(data.info())
print("\nDataset Description:")
print(data.describe())

print("\nMelanoma Spread Distribution:")
print(data['melanoma_spread'].value_counts(normalize=True))

# Save the dataset to a CSV file
data.to_csv('melanoma_dataset.csv', index=False)
print("\nDataset saved as 'melanoma_dataset.csv'")

# Prepare features and target for model training
X = data.drop('melanoma_spread', axis=1)
y = data['melanoma_spread']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("\nScaled Features (first 5 rows):")
print(pd.DataFrame(X_scaled, columns=X.columns).head())

# At this point, X_scaled and y would be used to train the models (SVM, KNN, CNN)

   age  skin_type  sun_exposure  num_moles  family_history  sunburn_history  \
0   69          5           101         85               1                2   
1   32          1           688         14               1                6   
2   89          1           743         84               0                8   
3   78          3           189         78               1                4   
4   38          3          1468         85               0                9   

   tumor_thickness  melanoma_spread  
0             5.52                3  
1             7.37                2  
2             3.53                2  
3             4.14                3  
4             1.34                1  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   age              1000 non-null   int64  
 1   skin_type        1000 non-null   i