In [6]:
import csv
import random

def generate_dataset(filename):
    features = ['Body Temperature', 'Heart Rate', 'Respiratory Rate', 'White Blood Cell Count', 'Lactate Levels',
                'Blood Pressure', 'Oxygen Saturation', 'Organ Dysfunction', 'Inflammatory Markers', 'Microbiological Cultures']
    
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(features + ['Sepsis'])
        
        for _ in range(50000):
            sample = []
            
            # Generate random values for the features
            body_temp = round(random.uniform(36.5, 39.5), 1)
            heart_rate = random.randint(60, 120)
            resp_rate = random.randint(12, 25)
            wbc_count = random.randint(4000, 15000)
            lactate_levels = round(random.uniform(0.5, 4.0), 1)
            blood_pressure = random.randint(80, 130)
            oxygen_saturation = random.randint(90, 100)
            organ_dysfunction = random.randint(0, 1)
            inflammatory_markers = round(random.uniform(0.1, 10.0), 1)
            microbiology_cultures = random.randint(0, 1)
            
            # Assign sepsis based on the relationship with the features
            if (body_temp > 39.0 or body_temp < 36.0) and heart_rate > 100 and resp_rate > 20 and (wbc_count > 12000 or wbc_count < 4000) and lactate_levels > 2.0:
                sepsis = 1
            elif organ_dysfunction == 1:
                sepsis = 1
            elif microbiology_cultures == 1:
                sepsis = 1
            else:
                sepsis = 0
            
            # Append the values to the sample list
            sample.extend([body_temp, heart_rate, resp_rate, wbc_count, lactate_levels, blood_pressure,
                           oxygen_saturation, organ_dysfunction, inflammatory_markers, microbiology_cultures, sepsis])
            
            writer.writerow(sample)

# Generate the dataset and save it as a CSV file
filename = 'sepsis_dataset.csv'
generate_dataset(filename)
print(f"Dataset created and saved as {filename}.")


Dataset created and saved as sepsis_dataset.csv.


In [390]:
import csv
import random
from datetime import datetime, timedelta
from faker import Faker
import pandas as pd

def generate_dataset(filename):
    fake = Faker()
    
    features = ['Patient ID', 'Name', 'Date', 'Time', 'Body Temperature', 'Heart Rate', 'Respiratory Rate',
                'White Blood Cell Count', 'Lactate Levels', 'Blood Pressure', 'Oxygen Saturation',
                'Organ Dysfunction', 'Inflammatory Markers', 'Microbiological Cultures', 'Sepsis']
    
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(features)
        
        start_date = datetime(2022, 7, 5)  # Starting date for test
        patient_id = 10000
        
        for _ in range(50000):
            sample = []
            
            # Generate random values for the features
            body_temp = round(random.uniform(36.5, 39.5), 1)
            heart_rate = random.randint(60, 120)
            resp_rate = random.randint(12, 25)
            wbc_count = random.randint(4000, 15000)
            lactate_levels = round(random.uniform(0.5, 4.0), 1)
            blood_pressure = random.randint(80, 130)
            oxygen_saturation = random.randint(90, 100)
            organ_dysfunction = random.randint(0, 1)
            inflammatory_markers = round(random.uniform(0.1, 10.0), 1)
            microbiology_cultures = random.randint(0, 1)
            
            # Assign sepsis based on the relationship with the features
            if (body_temp > 39.0 or body_temp < 36.0) and heart_rate > 100 and resp_rate > 20 and \
               (wbc_count > 12000 or wbc_count < 4000) and lactate_levels > 2.0:
                sepsis = 1
            elif organ_dysfunction == 1:
                sepsis = 1
            elif microbiology_cultures == 1:
                sepsis = 1
            else:
                sepsis = 0
            
            # Generate patient ID, name, date, and time
            patient_id += 1
            name = fake.name()
            test_date = start_date + timedelta(days=random.randint(0, 365))
            test_time = random.randint(0, 23), random.randint(0, 59)
            
            # Append the values to the sample list
            sample.extend([patient_id, name, pd.to_datetime(test_date.strftime("%Y-%m-%d")).date(), f"{test_time[0]:02d}:{test_time[1]:02d}",
                           body_temp, heart_rate, resp_rate, wbc_count, lactate_levels, blood_pressure,
                           oxygen_saturation, organ_dysfunction, inflammatory_markers, microbiology_cultures, sepsis])
            
            writer.writerow(sample)

# Generate the dataset and save it as a CSV file
filename = '..\data\sepsis_dataset.csv'
generate_dataset(filename)
print(f"Dataset created and saved as {filename}.")


Dataset created and saved as ..\data\sepsis_dataset.csv.


In [391]:
import pandas as pd
data = pd.read_csv("..\data\sepsis_dataset.csv")

new_data = data[['Body Temperature', 'Heart Rate',
       'Respiratory Rate', 'White Blood Cell Count', 'Lactate Levels',
       'Blood Pressure', 'Oxygen Saturation', 'Organ Dysfunction',
       'Inflammatory Markers', 'Microbiological Cultures', 'Sepsis']]

new_data.columns

Index(['Body Temperature', 'Heart Rate', 'Respiratory Rate',
       'White Blood Cell Count', 'Lactate Levels', 'Blood Pressure',
       'Oxygen Saturation', 'Organ Dysfunction', 'Inflammatory Markers',
       'Microbiological Cultures', 'Sepsis'],
      dtype='object')

In [392]:
# Training the model

from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestClassifier

target,features = new_data["Sepsis"],new_data.drop(["Sepsis"],axis=1)

x_train, x_test, y_train, y_test = train_test_split(features, target)

model = RandomForestClassifier(n_estimators=200)

model.fit(x_train,y_train)

### Evaluating the model

from sklearn.metrics import accuracy_score, confusion_matrix

predicted = model.predict(x_test)

print("accuracy_score Score is : ",accuracy_score(predicted,y_test))

print("confusion_matrix is : ",confusion_matrix(predicted,y_test))

import pickle

pickle.dump(model,open("..\models\Sepsis_Model.pickle","wb"))

accuracy_score Score is :  0.99952
confusion_matrix is :  [[3136    6]
 [   0 9358]]


In [393]:
# Making Prediction
sample = {i:j for i, j  in zip(x_train.columns,[0 for _ in range(len(x_train.columns))])}

In [396]:
from datetime import datetime
import numpy as np

def add_sample():

    samples = []

    condition_met = {"Sepsis":0,"Non-Sepsis":0}

    while (condition_met["Sepsis"]!=1 or  condition_met["Non-Sepsis"]!=1):

        existing_data  = pd.read_csv("..\data\sepsis_dataset.csv")

        fake = Faker()
        
        features = ['Patient ID', 'Name', 'Date', 'Time', 'Body Temperature', 'Heart Rate', 'Respiratory Rate',
                    'White Blood Cell Count', 'Lactate Levels', 'Blood Pressure', 'Oxygen Saturation',
                    'Organ Dysfunction', 'Inflammatory Markers', 'Microbiological Cultures', 'Sepsis']
        
            
        start_date = datetime.today().date()
        patient_id = existing_data["Patient ID"].max()+1
        

        sample = []
        
        # Generate random values for the features
        body_temp = round(random.uniform(36.5, 39.5), 1)
        heart_rate = random.randint(60, 120)
        resp_rate = random.randint(12, 25)
        wbc_count = random.randint(4000, 15000)
        lactate_levels = round(random.uniform(0.5, 4.0), 1)
        blood_pressure = random.randint(80, 130)
        oxygen_saturation = random.randint(90, 100)
        organ_dysfunction = random.randint(0, 1)
        inflammatory_markers = round(random.uniform(0.1, 10.0), 1)
        microbiology_cultures = random.randint(0, 1)
        
        # Assign sepsis based on the relationship with the features
        if (body_temp > 39.0 or body_temp < 36.0) and heart_rate > 100 and resp_rate > 20 and \
            (wbc_count > 12000 or wbc_count < 4000) and lactate_levels > 2.0:
            sepsis = 1
        elif organ_dysfunction == 1:
            sepsis = 1
        elif microbiology_cultures == 1:
            sepsis = 1
        else:
            sepsis = 0
        
        # Generate patient ID, name, date, and time
        patient_id += 1
        name = fake.name()
        test_date = start_date
        test_time = datetime.today().time().hour, datetime.today().time().minute
        
        # Append the values to the sample list
        sample.extend([patient_id, name, test_date.strftime("%Y-%m-%d"), f"{test_time[0]:02d}:{test_time[1]:02d}",
                        body_temp, heart_rate, resp_rate, wbc_count, lactate_levels, blood_pressure,
                        oxygen_saturation, organ_dysfunction, inflammatory_markers, microbiology_cultures, sepsis])
        
        final_sample = {i:j for i , j in zip(features,sample)}

        if condition_met["Sepsis"]!=1 and final_sample["Sepsis"] == 1:

            condition_met["Sepsis"] = 1

            samples.append(final_sample)

        elif condition_met["Non-Sepsis"]!=1 and final_sample["Sepsis"] == 0:

            condition_met["Non-Sepsis"] = 1

            samples.append(final_sample)

        else:
            pass

    
    final_sample = list(random.choices(samples,weights=[2,1])[0].values())

    existing_data.loc[len(existing_data)] = final_sample

    existing_data.to_csv('..\data\sepsis_dataset.csv',index=False)

    return {"Added new test result":final_sample}

In [405]:
add_sample()

{'Added new test result': [60006,
  'Sarah Douglas',
  '2023-07-06',
  '14:29',
  39.0,
  116,
  24,
  12056,
  2.2,
  113,
  99,
  0,
  7.4,
  0,
  0]}

In [406]:
def get_sepsis_alert():

    existing_data  = pd.read_csv("..\data\sepsis_dataset.csv")
    existing_data["Date"] = pd.to_datetime(existing_data["Date"])
    existing_data["Date"] = existing_data["Date"].apply(lambda x: x.date())
    todays_data = existing_data[existing_data["Date"]>=datetime.today().date()]

    return todays_data

In [418]:
import pickle
model = pickle.load(open("..\models\Sepsis_Model.pickle","rb"))

def make_sepsis_prediction(data):
    

    new_data = data[['Body Temperature', 'Heart Rate',
       'Respiratory Rate', 'White Blood Cell Count', 'Lactate Levels',
       'Blood Pressure', 'Oxygen Saturation', 'Organ Dysfunction',
       'Inflammatory Markers', 'Microbiological Cultures', 'Sepsis']]
    
    target,features = new_data["Sepsis"],new_data.drop(["Sepsis"],axis=1)

    prediction = model.predict(features)

    alert = data[["Patient ID","Name","Date","Time"]]

    alert.loc[alert.index,"Alert"] = prediction

    final_alert = {"Total Tests":len(alert),"No of Sepsis Alert":len(alert[alert["Alert"]==1.0]),"Patient Id":list(alert[alert["Alert"]==1.0]["Patient ID"])}

    return final_alert

make_sepsis_prediction(d)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  alert.loc[alert.index,"Alert"] = prediction


{'Total Tests': 3, 'No of Sepsis Alert': 2, 'Patient Id': [60002, 60004]}