In [None]:


import numpy as np
import pandas as pd

import os


# Import packages

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report

from imblearn.over_sampling import RandomOverSampler

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, callbacks
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop

ModuleNotFoundError: No module named 'tensorflow'

# Load data

In [None]:
df = pd.read_csv('C:/Users/Vahoaka/multiple-disease-prediction-streamlit-app-main/dataset/asthma_disease_data.csv')
df.head()

# Data preprocessing

In [None]:
# Dropping irrelevant columns
df = df.drop(['PatientID', 'DoctorInCharge'], axis = 1)

In [None]:
df.info()

In [None]:
df[df.duplicated()].shape[0]

In [None]:
df.describe().T

# Exploratory Data Analysis, EDA

In [None]:
df_copy = df.copy()
df_copy['Gender'] = np.where(df_copy['Gender'] == 1, 'female', 'male')
df_copy['Diagnosis'] = np.where(df_copy['Diagnosis'] == 1, 'Yes', 'No')
df_copy['Smoking'] = np.where(df_copy['Smoking'] == 1, 'Yes', 'No')

In [None]:
# How many asthmatics vs. non-asthmatics exist in data.
fig, ax = plt.subplots(figsize=(4, 3))
sns.countplot(x = 'Diagnosis', data = df_copy)

In [None]:
df_copy.groupby('Diagnosis').Diagnosis.count()

In [None]:
# Asthma rates between the Male and Female.
fig, ax = plt.subplots(figsize=(4, 3))
sns.countplot(x = 'Diagnosis', hue = 'Gender', data = df_copy)

In [None]:
# Asthma rates between the smoking vs. non-smoking.
fig, ax = plt.subplots(figsize=(4, 3))
sns.countplot(x = 'Diagnosis', hue = 'Smoking', data = df_copy)

In [None]:
# Distribution of asthmatics age.
fig, ax = plt.subplots(figsize=(4, 3))
plt.hist(df_copy[df_copy['Diagnosis'] == 'Yes']['Age'])

# Building model

In [None]:
X = df.drop(['Diagnosis'], axis = 1)
y = df['Diagnosis']

OverS = RandomOverSampler(random_state = 24)
X, y = OverS.fit_resample(X, y)

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [None]:
early_stopping = callbacks.EarlyStopping(
    min_delta = 0.001, # (0.001)minimium amount of change to count as an improvement
    patience = 10,     #(20) how many epochs to wait before stopping
    restore_best_weights = True,
)

model = keras.Sequential([
    layers.Dense(512, activation = 'relu', input_shape = [X_train.shape[1]]),
    layers.Dense(1, activation = 'sigmoid')
])

model.compile(
    optimizer = RMSprop(learning_rate = 0.001),
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

# Training model

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data = (X_test, y_test),
    batch_size = 20,
    epochs = 500,
    callbacks = [early_stopping],
    verbose=0
)

# Getting the accuracy and loss
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

# Plotting the accuracy
epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label = 'Training accuracy')
plt.plot(epochs, val_acc, 'b', label = 'Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

# Plotting the loss
plt.figure()

plt.plot(epochs, loss, 'bo', label = 'Training Loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Testing model

In [None]:
y_pred = np.where(model.predict(X_test) >= 0.5, 1, 0)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
model_matrix_df = pd.DataFrame(confusion_matrix(y_test, y_pred),
                               columns = ['Predicted (No)', 'Predicted (Yes)'],
                               index = ['Actual (No)', 'Actual (Yes)'])
model_matrix_df

In [None]:
import sys
print(sys.version)


3.10.14 | packaged by Anaconda, Inc. | (main, May  6 2024, 19:44:50) [MSC v.1916 64 bit (AMD64)]
