In [6]:
import numpy as np
import os
import cv2
import random
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import joblib

data_path = '/kaggle/input/dermnet/train'

train_data = []
val_data = []

for folder in os.listdir(data_path):
    folder_path = os.path.join(data_path, folder)
    files = os.listdir(folder_path)
    num_train = int(0.8 * len(files))
    files_train = random.sample(files, num_train)
    files_val = list(set(files) - set(files_train))
    
    for file in files_train:
        file_path = os.path.join(folder_path, file)
        img = cv2.imread(file_path)
        img = cv2.resize(img, (224, 224))
        train_data.append((img, folder))
        
    for file in files_val:
        file_path = os.path.join(folder_path, file)
        img = cv2.imread(file_path)
        img = cv2.resize(img, (224, 224))
        val_data.append((img, folder))

X_train, y_train = zip(*train_data)
X_val, y_val = zip(*val_data)

X_train = preprocess_input(np.array(X_train))
X_val = preprocess_input(np.array(X_val))


le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_val_encoded = le.transform(y_val)

y_train_one_hot = to_categorical(y_train_encoded, num_classes=23)
y_val_one_hot = to_categorical(y_val_encoded, num_classes=23)

joblib.dump(le, 'label_encoder.pkl')


base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(512, activation='relu')(x)
predictions = Dense(23, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

EPOCHS = 20
BATCH_SIZE = 64
model.fit(X_train, y_train_one_hot, validation_data=(X_val, y_val_one_hot),
          epochs=EPOCHS, batch_size=BATCH_SIZE)

model.save('disease_classification_model.h5')
print("Model training complete and saved as 'disease_classification_model.h5'.")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
Epoch 1/20


I0000 00:00:1735312554.490687     116 service.cc:145] XLA service 0x7b9c50012910 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1735312554.491437     116 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m  2/195[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18s[0m 97ms/step - accuracy: 0.0625 - loss: 4.2946 

I0000 00:00:1735312560.684025     116 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 184ms/step - accuracy: 0.2395 - loss: 2.8514 - val_accuracy: 0.3379 - val_loss: 2.2614
Epoch 2/20
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 108ms/step - accuracy: 0.4104 - loss: 2.0032 - val_accuracy: 0.3754 - val_loss: 2.1335
Epoch 3/20
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 109ms/step - accuracy: 0.4689 - loss: 1.7648 - val_accuracy: 0.3847 - val_loss: 2.0886
Epoch 4/20
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 109ms/step - accuracy: 0.5345 - loss: 1.5281 - val_accuracy: 0.4155 - val_loss: 2.0442
Epoch 5/20
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 109ms/step - accuracy: 0.6021 - loss: 1.3253 - val_accuracy: 0.4123 - val_loss: 2.0476
Epoch 6/20
[1m195/195[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 109ms/step - accuracy: 0.6444 - loss: 1.1680 - val_accuracy: 0.4271 - val_loss: 2.0623
Epoch 7/20
[1m195/19

In [3]:
import pandas as pd
df = pd.read_csv('/kaggle/input/cardio/cardio_train.csv',delimiter=';')
df = df.dropna()
df['age'] = (df['age'] / 365.35).astype(int)
df.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,168,62.0,110,80,1,1,0,0,1,0
1,1,55,1,156,85.0,140,90,3,1,0,0,1,1
2,2,51,1,165,64.0,130,70,3,1,0,0,0,1
3,3,48,2,169,82.0,150,100,1,1,0,0,1,1
4,4,47,1,156,56.0,100,60,1,1,0,0,0,0


In [4]:
df.drop('id',axis=1,inplace=True)

df['Systolic blood pressure'] = df['ap_hi']
df['Diastolic blood pressure'] = df['ap_lo']
df.drop(['ap_hi','ap_lo'],axis=1,inplace=True)

from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

label_encoder = LabelEncoder()
df['cholesterol'] = label_encoder.fit_transform(df['cholesterol'])
df['gluc'] = label_encoder.fit_transform(df['gluc'])

scaler = StandardScaler()
df[['age','weight', 'height']] = scaler.fit_transform(df[['age','weight', 'height']])

X = df.drop('cardio',axis=1)
y = df['cardio']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
cv_scores = cross_val_score(model, X, y, cv=5)
print(f"Cross-validated accuracy: {cv_scores.mean()}")

y_pred = model.predict(X_test)
print(f"Accuracy: {model.score(X_test, y_test)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Cross-validated accuracy: 0.7088428571428571
Accuracy: 0.7062857142857143
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.71      0.71      6988
           1       0.71      0.70      0.71      7012

    accuracy                           0.71     14000
   macro avg       0.71      0.71      0.71     14000
weighted avg       0.71      0.71      0.71     14000

Confusion Matrix:
[[4956 2032]
 [2080 4932]]


In [19]:
joblib.dump(label_encoder, 'cardio_label_encoder.pkl')
print("Cardio LabelEncoder exported as 'cardio_label_encoder.pkl'")

Cardio LabelEncoder exported as 'cardio_label_encoder.pkl'


In [5]:
import joblib
joblib.dump(model, 'random_forest_model.joblib')

['random_forest_model.joblib']

In [20]:
import numpy as np
import os
import cv2
import random
import joblib
import pandas as pd
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

skin_model = load_model('disease_classification_model.h5')
cardio_model = joblib.load('random_forest_model.joblib')
skin_label_encoder = joblib.load('label_encoder.pkl')
cardio_label_encoder = joblib.load('/kaggle/working/cardio_label_encoder.pkl')
pathology_df = pd.read_csv('/kaggle/input/pathology-test-final-data/pathology_tests_dataset.csv')

def classify_skin_disease(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))
    img_array = np.expand_dims(img, axis=0)
    img_array = preprocess_input(img_array)
    
    predictions = skin_model.predict(img_array)
    predicted_class_index = np.argmax(predictions)
    predicted_class = skin_label_encoder.classes_[predicted_class_index]
    
    original_predicted_class = predicted_class
    if "photos" in predicted_class.lower():
        final_predicted_class = predicted_class.lower().replace("photos", "").strip().capitalize()
    else:
        final_predicted_class = predicted_class
    
    return final_predicted_class, original_predicted_class

def get_pathology_test(disease_name):
    match = pathology_df[pathology_df.iloc[:, 0].str.strip().str.lower() == disease_name.lower().strip()]
    
    if not match.empty:
        return match.iloc[0, 1]
    else:
        return "No matching pathology test found."

def predict_cardio_disease(inputs):
    columns = ['age', 'gender', 'height', 'weight', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'Systolic blood pressure', 'Diastolic blood pressure']
    X = pd.DataFrame([inputs], columns=columns)
    cardio_label_encoder = joblib.load('cardio_label_encoder.pkl')
    X['cholesterol'] = cardio_label_encoder.transform(X['cholesterol'])
    X['gluc'] = cardio_label_encoder.transform(X['gluc'])
    prediction = cardio_model.predict(X)
    predicted_disease = 'Cardio Disease' if prediction[0] == 1 else 'No disease detected'
    return predicted_disease

In [22]:
def main():
    print("Please select an option:")
    print("1. Skin Disease Classification")
    print("2. Cardio Disease Prediction")
    
    choice = input("Enter your choice (1 or 2): ")
    
    if choice == '1': 
        image_path = input("Enter the path of the image to classify: ")
        predicted_disease, original_predicted_disease = classify_skin_disease(image_path)
        print(f"The model predicts this disease as: {predicted_disease}")
        
        pathology_test = get_pathology_test(original_predicted_disease)
        print(f"Pathology test Recommended: {pathology_test}")
    
    elif choice == '2':
        print("Enter the following details for Cardio Disease Prediction:")
        age = int(input("Age (in years): "))
        gender = int(input("Gender (1 for Male, 2 for Female): "))
        height = float(input("Height (in cm): "))
        weight = float(input("Weight (in kg): "))
        cholesterol = int(input("Cholesterol (1: Normal, 2: Above normal, 3: Well above normal): "))
        gluc = int(input("Glucose (1: Normal, 2: Above normal, 3: Well above normal): "))
        smoke = int(input("Smoker (1: Yes, 0: No): "))
        alco = int(input("Alcohol consumption (1: Yes, 0: No): "))
        active = int(input("Physical activity (1: Yes, 0: No): "))
        systolic_bp = int(input("Systolic Blood Pressure (mmHg): "))
        diastolic_bp = int(input("Diastolic Blood Pressure (mmHg): "))
        
        inputs = [age, gender, height, weight, cholesterol, gluc, smoke, alco, active, systolic_bp, diastolic_bp]
        predicted_disease = predict_cardio_disease(inputs)
        print(f"The model predicts: {predicted_disease}")
        
        pathology_test = get_pathology_test(predicted_disease)
        print(f"Recommended Pathology Test: {pathology_test}")
    else:
        print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()

Please select an option:
1. Skin Disease Classification
2. Cardio Disease Prediction


Enter your choice (1 or 2):  1
Enter the path of the image to classify:  /kaggle/input/dermnet/test/Urticaria Hives/PUPPP-18.jpg


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
The model predicts this disease as: Urticaria Hives
Pathology test Recommended: Allergy tests (IgE), CBC, Thyroid function tests
