In [46]:
pip install colorama



In [47]:
import pandas as pd
import tensorflow as tf

In [48]:
sampled_data = pd.read_csv('/content/model_training_dataset_balanced.csv')
sampled_data.head()

Unnamed: 0,uuid,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,...,chr11,chr12,con1,con2,con3,con4,con5,con6,con7,status
0,f67d8450-f76e-4c47-bbbe-85418e320acb,-175.359161,150.759949,-80.375107,24.647184,-37.443634,-19.256956,-10.821563,-10.466436,-36.51321,...,0.569912,0.467737,9.418224,18.591966,19.014428,21.428777,22.717462,27.339486,50.507028,COVID-19
1,5fb358c3-76bc-4adc-93ac-02f367e38739,-243.806778,108.23716,-41.346554,27.0959,-45.752834,23.579901,-22.477478,6.968425,-20.385113,...,0.688207,0.722375,16.771968,9.403812,11.771956,14.365618,14.628152,16.779275,55.377236,COVID-19
2,e39dac66-46e0-4d6f-b09c-58026674c71f,-134.321823,106.624237,-31.094105,23.059395,-33.958996,20.828333,-24.265173,3.744247,-29.960234,...,0.63493,0.675052,18.115177,12.29523,12.388998,15.294764,17.262722,16.29955,55.485166,COVID-19
3,21aee478-6d13-45ea-be4d-4f29fd244798,-328.1073,116.169724,-65.582581,20.001492,-50.012051,1.997218,-26.9753,1.41907,-9.564116,...,0.779288,0.822351,15.998029,10.901328,16.835226,16.941303,17.46149,18.593721,45.432327,COVID-19
4,bae0c88f-852c-4f3f-8141-d037c777459e,-311.12973,37.726364,-30.410118,26.099421,6.797897,37.092037,-31.4816,-12.676159,-22.569769,...,0.764333,0.602115,14.258618,15.093659,15.253623,17.969472,19.550257,18.709066,46.60069,COVID-19


In [49]:
import tensorflow as tf
tf.keras.backend.clear_session()

In [50]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import numpy as np
from keras import models, layers, regularizers, Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
import joblib
import pandas as pd
from colorama import Fore, Style
from tabulate import tabulate
import os


class CovidModelTrainer:
    def __init__(self, save_dir='model_artifacts'):
        self.save_dir = save_dir
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        self.scaler = MinMaxScaler(feature_range=(-1, 1))
        self.label_encoder = LabelEncoder()
        self.model = None
        self.height = 1
        self.width = None
        self.channels = 1

    def preprocess_data(self, sampled_data):
        # Define X (features) and y (target)
        y_label = 'status'
        X = sampled_data.drop(columns=[y_label, 'uuid'])
        y = sampled_data[y_label]

        # Split dataset
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=612, stratify=y
        )

        # Normalize features
        X_train_norm = self.scaler.fit_transform(X_train)
        X_test_norm = self.scaler.transform(X_test)

        # Encode target labels
        y_train_enc = self.label_encoder.fit_transform(y_train.ravel())
        y_test_enc = self.label_encoder.transform(y_test.ravel())

        # Handle NaN values
        nan_rows_train = np.isnan(X_train_norm).any(axis=1)
        X_train_norm = X_train_norm[~nan_rows_train]
        y_train_enc = y_train_enc[~nan_rows_train]

        nan_rows_test = np.isnan(X_test_norm).any(axis=1)
        X_test_norm = X_test_norm[~nan_rows_test]
        y_test_enc = y_test_enc[~nan_rows_test]

        # Set width based on feature count
        self.width = X_train_norm.shape[1]

        # Reshape for CNN
        X_train_norm = X_train_norm.reshape(-1, self.height, self.width, self.channels)
        X_test_norm = X_test_norm.reshape(-1, self.height, self.width, self.channels)

        return X_train_norm, X_test_norm, y_train_enc, y_test_enc

    def build_model(self):
        model = Sequential([
            Conv2D(64, kernel_size=(1, 3), activation='relu',
                   input_shape=(self.height, self.width, self.channels), padding='same'),
            BatchNormalization(),
            Conv2D(64, kernel_size=(1, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(1, 2)),
            Dropout(0.25),

            Conv2D(128, kernel_size=(1, 3), activation='relu', padding='same'),
            BatchNormalization(),
            Conv2D(128, kernel_size=(1, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(1, 2)),
            Dropout(0.3),

            Conv2D(256, kernel_size=(1, 3), activation='relu', padding='same'),
            BatchNormalization(),
            Conv2D(256, kernel_size=(1, 3), activation='relu', padding='same'),
            BatchNormalization(),
            MaxPooling2D(pool_size=(1, 2)),
            Dropout(0.35),

            Flatten(),
            Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
            BatchNormalization(),
            Dropout(0.4),
            Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
            BatchNormalization(),
            Dropout(0.4),
            Dense(3, activation='softmax')
        ])

        model.compile(optimizer='nadam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        self.model = model
        return model

    def train(self, sampled_data, epochs=100, batch_size=128):
        # Preprocess data
        X_train_norm, X_test_norm, y_train_enc, y_test_enc = self.preprocess_data(
            sampled_data
        )

        # Build model if not already built
        if self.model is None:
            self.build_model()

        # Handle data repetition if needed
        repetition_factor = X_train_norm.shape[0] // y_train_enc.shape[0]
        y_train_enc_repeated = np.repeat(y_train_enc, repetition_factor)

        remainder = X_train_norm.shape[0] % y_train_enc.shape[0]
        if remainder > 0:
            y_train_enc_repeated = np.concatenate(
                [y_train_enc_repeated, y_train_enc[:remainder]]
            )

        # Train model
        history = self.model.fit(
            X_train_norm,
            y_train_enc_repeated,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.15
        )

        # Save artifacts
        self.save_artifacts()

        return history

    def save_artifacts(self):
        """Save all necessary model artifacts"""
        self.model.save(f'{self.save_dir}/covid_model.keras')
        joblib.dump(self.scaler, f'{self.save_dir}/scaler.pkl')
        joblib.dump(self.label_encoder, f'{self.save_dir}/label_encoder.pkl')
        # Save model parameters
        model_params = {
            'height': self.height,
            'width': self.width,
            'channels': self.channels
        }
        joblib.dump(model_params, f'{self.save_dir}/model_params.pkl')


class CovidPredictor:
    def __init__(self, model_dir='model_artifacts'):
        self.model_dir = model_dir
        self.load_artifacts()

    def load_artifacts(self):
        """Load all necessary model artifacts"""
        self.model = tf.keras.models.load_model(f'{self.model_dir}/covid_model.keras')
        self.scaler = joblib.load(f'{self.model_dir}/scaler.pkl')
        self.label_encoder = joblib.load(f'{self.model_dir}/label_encoder.pkl')
        model_params = joblib.load(f'{self.model_dir}/model_params.pkl')
        self.height = model_params['height']
        self.width = model_params['width']
        self.channels = model_params['channels']

    def predict(self, dataset_status, n_samples, random_state=42):
        """Make predictions on random samples from the dataset"""
        # Select random rows
        random_rows = dataset_status.sample(n=n_samples, random_state=random_state)

        # Store original values
        uuids = random_rows['uuid']
        original_status = random_rows['status']

        # Prepare features
        features = random_rows.drop(columns=['uuid', 'status'])

        # Normalize features
        features_normalized = self.scaler.transform(features)

        # Reshape for CNN
        features_reshaped = features_normalized.reshape(-1, self.height, self.width, self.channels)

        # Make predictions
        predictions = self.model.predict(features_reshaped)
        predicted_classes = np.argmax(predictions, axis=1)

        # Convert numerical predictions to labels
        predicted_labels = self.label_encoder.inverse_transform(predicted_classes)

        # Create results DataFrame
        predictions_df = pd.DataFrame({
            'UUID': uuids,
            'Original Status': original_status,
            'Predicted Status': predicted_labels
        })

        # Color formatting
        def color_status(status):
            color_map = {
                'COVID-19': Fore.RED,
                'healthy': Fore.GREEN,
                'symptomatic': Fore.BLUE
            }
            return f"{color_map.get(status, '')}{status}{Style.RESET_ALL}"

        # Apply color formatting
        predictions_df['Original Status'] = predictions_df['Original Status'].apply(
            color_status
        )
        predictions_df['Predicted Status'] = predictions_df['Predicted Status'].apply(
            color_status
        )

        # Calculate accuracy
        accuracy = np.mean(original_status == predicted_labels) * 100

        # Display results
        print("\nPrediction Results:")
        print(tabulate(predictions_df, headers='keys', tablefmt='pretty'))
        print(f"\nBatch Accuracy: {accuracy:.2f}%")

        return predictions_df


In [51]:
# Training
trainer = CovidModelTrainer()
trainer.train(sampled_data)

Epoch 1/100


  y_train_enc = self.label_encoder.fit_transform(y_train.ravel())
  y_test_enc = self.label_encoder.transform(y_test.ravel())
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 429ms/step - accuracy: 0.3385 - loss: 7.7270 - val_accuracy: 0.3451 - val_loss: 6.7525
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - accuracy: 0.3500 - loss: 7.1545 - val_accuracy: 0.2854 - val_loss: 6.4527
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.3738 - loss: 6.5642 - val_accuracy: 0.2854 - val_loss: 6.0628
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.4006 - loss: 5.9135 - val_accuracy: 0.2854 - val_loss: 5.5012
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3887 - loss: 5.3746 - val_accuracy: 0.2854 - val_loss: 4.9179
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.4244 - loss: 4.7928 - val_accuracy: 0.2854 - val_loss: 4.4403
Epoch 7/100
[1m24/24[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7aee2f8be4d0>

In [52]:
# Prediction
predictor = CovidPredictor()
results = predictor.predict(sampled_data, n_samples=10)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 855ms/step

Prediction Results:
+------+--------------------------------------+-----------------+------------------+
|      |                 UUID                 | Original Status | Predicted Status |
+------+--------------------------------------+-----------------+------------------+
| 996  | cfbda12e-fdc6-4749-a7ec-3a81c3106317 |    [31mCOVID-19[0m     |     [31mCOVID-19[0m     |
| 1295 | c941e62c-e260-4ee9-959e-af91663ac58c |    [31mCOVID-19[0m     |     [31mCOVID-19[0m     |
| 848  | c9c4445b-fbd3-4f4d-9ca2-282a669d79c4 |    [31mCOVID-19[0m     |     [32mhealthy[0m      |
| 3411 | 70ea9638-f7e3-4f16-bb97-7d066a0e6cdf |   [34msymptomatic[0m   |   [34msymptomatic[0m    |
|  79  | aa345328-b50f-4bf8-9a8d-cf0063b04e6b |    [31mCOVID-19[0m     |     [32mhealthy[0m      |
| 2478 | 1de44182-c6f6-4059-bb54-1e8552999e03 |     [32mhealthy[0m     |     [32mhealthy[0m      |
| 3997 | 8f0052b4-731b-4822-8e4b-