In [1]:
pip install ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [2]:
import wave
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime
from os import listdir
from os.path import isfile, join
import librosa
import librosa.display
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
from tqdm.notebook import tqdm
from tensorflow.keras import layers, models

# Function to extract data
def Extract_Data(file_name, root):
    tokens = file_name.split('_')
    recording_info = pd.DataFrame(data=[tokens], columns=['Patient number', 'Recording index', 'Chest location', 'Acquisition mode', 'Recording equipment'])
    recording_annotations = pd.read_csv(os.path.join(root, file_name + '.txt'), names=['Start', 'End', 'Crackles', 'Wheezes'], delimiter='\t')
    return recording_info, recording_annotations

# Read patient info
patient_info = pd.read_csv(r"C:\Users\Ankit yadav\Desktop\stetho\Respiratory_Sound_Database\demographic_info.txt",
                           names=['Patient ID', 'Age', 'Sex', 'Adult BMI', 'Child Weight(kgs)', 'Child height(cms)'],
                           delimiter=" ")

# Read patient diagnosis info (ensure this file is correctly loaded)
patient_diagnosis = pd.read_csv(r"C:\Users\Ankit yadav\Desktop\stetho\Respiratory_Sound_Database\Respiratory_Sound_Database\patient_diagnosis.csv",
                                names=['Patient ID', 'Diagnosis'])

# Merge patient info with diagnosis
patient_record = patient_info.join(patient_diagnosis.set_index('Patient ID'), on='Patient ID', how='left')

# Root directory for audio files
root = r"C:\Users\Ankit yadav\Desktop\stetho\Respiratory_Sound_Database\Respiratory_Sound_Database\audio_and_txt_files"
filenames = [s.split('.')[0] for s in os.listdir(path=root) if '.txt' in s]

# Initialize lists for storing data
i_list = []  # For storing patient details
rec_annotations = []  # For storing recording annotations
rec_annotations_dict = {}  # For storing annotations in dictionary

# Iterate over files and extract data
for s in filenames:
    i, a = Extract_Data(s, root)
    i_list.append(i)
    rec_annotations.append(a)
    rec_annotations_dict[s] = a

# Concatenate all patient info into a DataFrame
recording_info = pd.concat(i_list, axis=0)

# Initialize lists for storing labels
no_label_list, crack_list, wheeze_list, both_sym_list, filename_list = [], [], [], [], []

# Loop to extract symptoms from each file
for f in filenames:
    d = rec_annotations_dict[f]
    no_labels = len(d[(d['Crackles'] == 0) & (d['Wheezes'] == 0)].index)
    n_crackles = len(d[(d['Crackles'] == 1) & (d['Wheezes'] == 0)].index)
    n_wheezes = len(d[(d['Crackles'] == 0) & (d['Wheezes'] == 1)].index)
    both_sym = len(d[(d['Crackles'] == 1) & (d['Wheezes'] == 1)].index)
    
    # Append results to lists
    no_label_list.append(no_labels)
    crack_list.append(n_crackles)
    wheeze_list.append(n_wheezes)
    both_sym_list.append(both_sym)
    filename_list.append(f)

# Create a DataFrame for file labels
file_label_df = pd.DataFrame(data={'filename': filename_list})

# Adding diagnosis and patient details
diagnosis, patient_number, recording_index, chest_location, acquisition_mode, recording_equipment = [], [], [], [], [], []

# Extract relevant details from filenames and join with patient info
for i in tqdm(range(len(file_label_df['filename']))):
    info = file_label_df['filename'][i].split('_')
    patient_id, recording_idx, chest_loc, acq_mode, equipment = info
    diagnosis.append(patient_record['Diagnosis'][int(patient_id) - 101])
    patient_number.append(patient_id)
    recording_index.append(recording_idx)
    chest_location.append(chest_loc)
    acquisition_mode.append(acq_mode)
    recording_equipment.append(equipment)

# Add extracted information to the DataFrame
file_label_df['Diagnosis'] = diagnosis
file_label_df['Patient Number'] = patient_number
file_label_df['Chest Location'] = chest_location
file_label_df['Acquisition Mode'] = acquisition_mode
file_label_df['Recording Equipment'] = recording_equipment

# Create a 3-class diagnosis column based on conditions
diagnosis_3 = []
for diagnosis in file_label_df['Diagnosis']:
    if diagnosis in ['COPD', 'Bronchiectasis', 'Asthma']:
        diagnosis_3.append('Chronic Disease')
    elif diagnosis in ['URTI', 'LRTI', 'Pneumonia', 'Bronchiolitis']:
        diagnosis_3.append('Non-Chronic Disease')
    else:
        diagnosis_3.append('Normal')

# Add the 3-class diagnosis to the DataFrame
file_label_df['3 label diagnosis'] = diagnosis_3

# Print the diagnosis counts
print(file_label_df['Diagnosis'].value_counts())


  0%|          | 0/920 [00:00<?, ?it/s]

Diagnosis
COPD              793
Pneumonia          37
Healthy            35
URTI               23
Bronchiectasis     16
Bronchiolitis      13
LRTI                2
Asthma              1
Name: count, dtype: int64


In [3]:
file_label_df.head()


Unnamed: 0,filename,Diagnosis,Patient Number,Chest Location,Acquisition Mode,Recording Equipment,3 label diagnosis
0,101_1b1_Al_sc_Meditron,URTI,101,Al,sc,Meditron,Non-Chronic Disease
1,101_1b1_Pr_sc_Meditron,URTI,101,Pr,sc,Meditron,Non-Chronic Disease
2,102_1b1_Ar_sc_Meditron,Healthy,102,Ar,sc,Meditron,Normal
3,103_2b2_Ar_mc_LittC2SE,Asthma,103,Ar,mc,LittC2SE,Chronic Disease
4,104_1b1_Al_sc_Litt3200,COPD,104,Al,sc,Litt3200,Chronic Disease


In [4]:
import sklearn
print(sklearn.__version__)



1.6.0


In [5]:
import streamlit as st


In [6]:
def preprocessing(audio_file, mode):
    # we want to resample audio to 16 kHz
    sr_new = 16000 # 16kHz sample rate
    x, sr = librosa.load(audio_file, sr=sr_new)

    # padding sound 
    # because duration of sound is dominantly 20 s and all of sample rate is 22050
    # we want to pad or truncated sound which is below or above 20 s respectively
    max_len = 5 * sr_new  # length of sound array = time x sample rate
    if x.shape[0] < max_len:
      # padding with zero
      pad_width = max_len - x.shape[0]
      x = np.pad(x, (0, pad_width))
    elif x.shape[0] > max_len:
      # truncated
      x = x[:max_len]
    
    if mode == 'mfcc':
      feature = librosa.feature.mfcc(y=x, sr=sr_new)
    
    elif mode == 'log_mel':
      feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=128, fmax=8000)
      feature = librosa.power_to_db(feature, ref=np.max)
    

    return feature

In [8]:
# Define the audio directory path
audio = r"C:\Users\Ankit yadav\Desktop\stetho\Respiratory_Sound_Database\Respiratory_Sound_Database\audio_and_txt_files"

labels = []
labels_3 = []
preprocessed_data = []

# Process each file in the file_label_df DataFrame
for i in tqdm(range(len(file_label_df['filename']))):
    labels.append(file_label_df['Diagnosis'][i])
    # labels_3.append(file_label_df['3 label diagnosis'][i])
    
    # Construct the full path for the audio file
    audio_file = audio + "/" + file_label_df['filename'][i] + '.wav'
    
    # Preprocess the audio data (ensure that the 'preprocessing' function is defined)
    data = preprocessing(audio_file, mode='mfcc')
    preprocessed_data.append(data)

# Convert lists to numpy arrays
preprocessed_data = np.array(preprocessed_data)
labels = np.array(labels)
labels_3 = np.array(labels_3)


  0%|          | 0/920 [00:00<?, ?it/s]

In [9]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D,BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
print(preprocessed_data.shape)

(920, 20, 157)


In [10]:
preprocessed_data = preprocessed_data.reshape((-1, 20, 157, 1))
encoder = LabelEncoder()
i_labels = encoder.fit_transform(labels)
oh_labels = to_categorical(i_labels,num_classes=8) 
oh_labels
type(oh_labels)
print(list(encoder.classes_))

['Asthma', 'Bronchiectasis', 'Bronchiolitis', 'COPD', 'Healthy', 'LRTI', 'Pneumonia', 'URTI']


In [11]:
unique_values, counts = np.unique(oh_labels, return_counts=True)

# Display unique values and their counts
for value, count in zip(unique_values, counts):
    print(f"Value: {value}, Count: {count}")

Value: 0.0, Count: 6440
Value: 1.0, Count: 920


In [12]:
x_train, x_test, y_train, y_test = train_test_split(preprocessed_data, oh_labels, 
                                                    test_size=0.2, random_state = 42)

In [13]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D,BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

In [14]:
num_rows = 20
num_columns = 157
num_channels = 1


num_labels = oh_labels.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=filter_size,
                 input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))

model.add(Conv2D(filters=32, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))

model.add(Conv2D(filters=64, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))


model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
y_train[1].shape
num_epochs = 40
num_batch_size = 64

callbacks = [
    ModelCheckpoint(
        filepath='mymodel2_{epoch:02d}.keras',
        # Path where to save the model
        # The two parameters below mean that we will overwrite
        # the current checkpoint if and only if
        # the `val_accuracy` score has improved.C
        save_best_only=True,
        monitor='val_accuracy',
        verbose=1)
]
start = datetime.now()
history = model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs,
          validation_split=0.1, callbacks=callbacks, verbose=1)



duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/40
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.4722 - loss: 5.3844
Epoch 1: val_accuracy improved from -inf to 0.82432, saving model to mymodel2_01.keras
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 143ms/step - accuracy: 0.4911 - loss: 5.1772 - val_accuracy: 0.8243 - val_loss: 1.3020
Epoch 2/40
[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 59ms/step - accuracy: 0.8490 - loss: 0.9387
Epoch 2: val_accuracy did not improve from 0.82432
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.8520 - loss: 0.9241 - val_accuracy: 0.8243 - val_loss: 1.0983
Epoch 3/40
[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 61ms/step - accuracy: 0.8854 - loss: 0.6580
Epoch 3: val_accuracy did not improve from 0.82432
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - accuracy: 0.8843 - loss: 0.6623 - val_accuracy: 0.8243 - val_loss: 0.9223
Epo

[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 56ms/step - accuracy: 0.8652 - loss: 0.3917
Epoch 26: val_accuracy did not improve from 0.86486
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - accuracy: 0.8675 - loss: 0.3876 - val_accuracy: 0.8243 - val_loss: 0.4174
Epoch 27/40
[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 58ms/step - accuracy: 0.8698 - loss: 0.3911
Epoch 27: val_accuracy did not improve from 0.86486
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step - accuracy: 0.8704 - loss: 0.3861 - val_accuracy: 0.8514 - val_loss: 0.4288
Epoch 28/40
[1m10/11[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 56ms/step - accuracy: 0.8825 - loss: 0.3684
Epoch 28: val_accuracy did not improve from 0.86486
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - accuracy: 0.8825 - loss: 0.3677 - val_accuracy: 0.8514 - val_loss: 0.4076
Epoch 29/40
[1m10/11[0m [32m━━━━━━━━━━━━━━━

In [18]:
pip install imbalanced-learn


Collecting imbalanced-learnNote: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for imbalanced-learn from https://files.pythonhosted.org/packages/9d/41/721fec82606242a2072ee909086ff918dfad7d0199a9dfd4928df9c72494/imbalanced_learn-0.13.0-py3-none-any.whl.metadata
  Downloading imbalanced_learn-0.13.0-py3-none-any.whl.metadata (8.8 kB)
Collecting sklearn-compat<1,>=0.1 (from imbalanced-learn)
  Obtaining dependency information for sklearn-compat<1,>=0.1 from https://files.pythonhosted.org/packages/f0/a8/ad69cf130fbd017660cdd64abbef3f28135d9e2e15fe3002e03c5be0ca38/sklearn_compat-0.1.3-py3-none-any.whl.metadata
  Downloading sklearn_compat-0.1.3-py3-none-any.whl.metadata (18 kB)
Downloading imbalanced_learn-0.13.0-py3-none-any.whl (238 kB)
   ---------------------------------------- 0.0/238.4 kB ? eta -:--:--
   --- ----------------------------------- 20.5/238.4 kB 682.7 kB/s eta 0:00:01
   ---------------------------------------  235.5/238.

In [19]:
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9035326242446899
Testing Accuracy:  0.820652186870575


In [20]:
from imblearn.over_sampling import RandomOverSampler
import numpy as np



n_samples, height, width, channels = preprocessed_data.shape
X_reshaped = preprocessed_data.reshape(n_samples, -1)

# Instantiate RandomOverSampler
random_oversampler = RandomOverSampler(random_state=42)

# Upsample the minority class using RandomOverSampler
X_resampled, y_resampled = random_oversampler.fit_resample(X_reshaped, oh_labels)
X_restored = X_resampled.reshape(-1, 20, 157, 1)


# Check the shape of the resampled data
print("Shape of X_resampled:", X_resampled.shape)
print("Shape of y_resampled:", y_resampled.shape)

Shape of X_resampled: (6344, 3140)
Shape of y_resampled: (6344, 8)


In [21]:
x_train, x_test, y_train, y_test = train_test_split(X_restored, y_resampled, stratify=y_resampled, 
                                                    test_size=0.2, random_state = 42)

In [22]:
num_rows = 20
num_columns = 157
num_channels = 1


num_labels = oh_labels.shape[1]
filter_size = 2

# Construct model 
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=filter_size,
                 input_shape=(num_rows, num_columns,num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))

model.add(Conv2D(filters=32, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Conv2D(filters=64, kernel_size=filter_size, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))


model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

model.summary()

# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# train model
num_epochs = 30
num_batch_size = 64
callbacks = [
    ModelCheckpoint(
        filepath='mymodel3_{epoch:02d}.keras',
        save_best_only=True,
        monitor='val_accuracy',
        verbose=1)
]
start = datetime.now()

history = model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs,
          validation_split=0.1, callbacks=callbacks, verbose=1)



duration = datetime.now() - start
print("Training completed in time: ", duration)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.1593 - loss: 9.9889 
Epoch 1: val_accuracy improved from -inf to 0.21063, saving model to mymodel3_01.keras
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 66ms/step - accuracy: 0.1595 - loss: 9.9180 - val_accuracy: 0.2106 - val_loss: 1.9348
Epoch 2/30
[1m71/72[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.2570 - loss: 1.9159
Epoch 2: val_accuracy improved from 0.21063 to 0.37992, saving model to mymodel3_02.keras
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 61ms/step - accuracy: 0.2579 - loss: 1.9138 - val_accuracy: 0.3799 - val_loss: 1.7017
Epoch 3/30
[1m71/72[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 59ms/step - accuracy: 0.4181 - loss: 1.5770
Epoch 3: val_accuracy improved from 0.37992 to 0.59449, saving model to mymodel3_03.keras
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 63ms/step -

[1m71/72[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - accuracy: 0.9382 - loss: 0.1758
Epoch 25: val_accuracy improved from 0.92717 to 0.95866, saving model to mymodel3_25.keras
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 63ms/step - accuracy: 0.9382 - loss: 0.1760 - val_accuracy: 0.9587 - val_loss: 0.1670
Epoch 26/30
[1m71/72[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 59ms/step - accuracy: 0.9367 - loss: 0.1925
Epoch 26: val_accuracy did not improve from 0.95866
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 62ms/step - accuracy: 0.9366 - loss: 0.1925 - val_accuracy: 0.9409 - val_loss: 0.1946
Epoch 27/30
[1m71/72[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 60ms/step - accuracy: 0.9407 - loss: 0.1837
Epoch 27: val_accuracy did not improve from 0.95866
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 64ms/step - accuracy: 0.9408 - loss: 0.1834 - val_accuracy: 0.9429 - val_loss: 0.2594
Epoch 2

In [23]:
preds = model.predict(x_test) # label scores 

classpreds = np.argmax(preds, axis=1) # predicted classes 

y_testclass = np.argmax(y_test, axis=1) # true classes

n_classes=8 # number of classes

[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step


In [24]:
print(confusion_matrix(y_testclass, classpreds))

[[159   0   0   0   0   0   0   0]
 [  0 159   0   0   0   0   0   0]
 [  0   0 158   0   0   0   0   0]
 [  1   5   1 126   8   0  16   2]
 [  0   0   0   0 159   0   0   0]
 [  0   0   0   0   0 159   0   0]
 [  0   0   0   0   5   0 153   0]
 [  0   0   0   0   0   0   0 158]]


In [25]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, 
                       verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9643349647521973
Testing Accuracy:  0.9700551629066467


In [28]:
import numpy as np
import librosa
from keras.models import load_model
from keras.utils import to_categorical

# Load the trained model (make sure to specify the correct model path)
#model = load_model('mymodel3_best.keras')  # Change the model filename as needed

def preprocess_audio(audio_file):
    """Preprocess the input audio file."""
    sr_new = 16000  # Target sample rate
    x, sr = librosa.load(audio_file, sr=sr_new)

    # Padding or truncating to 5 seconds
    max_len = 5 * sr_new
    if x.shape[0] < max_len:
        pad_width = max_len - x.shape[0]
        x = np.pad(x, (0, pad_width))
    elif x.shape[0] > max_len:
        x = x[:max_len]

    # Extract MFCC features
    mfcc_features = librosa.feature.mfcc(y=x, sr=sr_new)
    mfcc_features = np.expand_dims(mfcc_features, axis=-1)  # Add channel dimension
    mfcc_features = mfcc_features.reshape((-1, 20, 157, 1))  # Reshape for model input

    return mfcc_features

def predict_disease(audio_file):
    """Predict the disease based on the audio input."""
    # Preprocess the audio file
    mfcc_input = preprocess_audio(audio_file)

    # Make prediction
    prediction = model.predict(mfcc_input)

    # Get the class index with the highest probability
    predicted_class_index = np.argmax(prediction, axis=1)

    # Map index to class labels
    class_labels = encoder.classes_  # Use the encoder from your training phase
    predicted_label = class_labels[predicted_class_index[0]]

    return predicted_label


audio_file_path = r"C:\Users\Ankit yadav\Desktop\stetho\Respiratory_Sound_Database\Respiratory_Sound_Database\audio_and_txt_files\130_1p2_Ar_mc_AKGC417L.wav"
  
predicted_disease = predict_disease(audio_file_path)

print(f"The predicted disease is: {predicted_disease}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
The predicted disease is: Bronchiolitis


In [29]:
pip install joblib


Note: you may need to restart the kernel to use updated packages.


In [30]:
import joblib

# Save the model
joblib.dump(model, 'model.pkl')

# Save the encoder (LabelEncoder) used during training
joblib.dump(encoder, 'encoder.pkl')


['encoder.pkl']

In [31]:
import pickle

# Saving the model as a pickle file
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# To load the model back for use later:
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)


In [32]:
model.save('mymodel3_best.keras')  # Save your trained model


In [33]:
np.save('label_encoder_classes.npy', encoder.classes_)
