In [1]:
import pandas as pd

# Load the dataset
file_path = 'healthcare-dataset-stroke-data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [10]:
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Checking for missing values in the dataset
missing_values = data.isnull().sum()

# Impute missing values for 'bmi' using the median
imputer = SimpleImputer(strategy='median')
data['bmi'] = imputer.fit_transform(data[['bmi']])

# Encode categorical columns using LabelEncoder
label_encoder = LabelEncoder()
categorical_columns = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']

for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

# Display the cleaned dataset summary
data.info(), data.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5110 entries, 0 to 5109
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 5110 non-null   int64  
 1   gender             5110 non-null   int64  
 2   age                5110 non-null   float64
 3   hypertension       5110 non-null   int64  
 4   heart_disease      5110 non-null   int64  
 5   ever_married       5110 non-null   int64  
 6   work_type          5110 non-null   int64  
 7   Residence_type     5110 non-null   int64  
 8   avg_glucose_level  5110 non-null   float64
 9   bmi                5110 non-null   float64
 10  smoking_status     5110 non-null   int64  
 11  stroke             5110 non-null   int64  
dtypes: float64(3), int64(9)
memory usage: 479.2 KB


(None,
       id  gender   age  hypertension  heart_disease  ever_married  work_type  \
 0   9046       1  67.0             0              1             1          2   
 1  51676       0  61.0             0              0             1          3   
 2  31112       1  80.0             0              1             1          2   
 3  60182       0  49.0             0              0             1          2   
 4   1665       0  79.0             1              0             1          3   
 
    Residence_type  avg_glucose_level        bmi  smoking_status  stroke  
 0               1             228.69  36.600000               1       1  
 1               0             202.21  28.893237               2       1  
 2               0             105.92  32.500000               2       1  
 3               1             171.23  34.400000               3       1  
 4               0             174.12  24.000000               2       1  )

In [11]:
# Defining risk levels based on specific criteria
def risk_level(row):
    # High risk: stroke, high glucose level, and presence of hypertension or heart disease
    if row['stroke'] == 1 or row['avg_glucose_level'] > 200 or (row['hypertension'] == 1 or row['heart_disease'] == 1):
        return 2  # High risk
    # Medium risk: moderately high glucose level and age above 50
    elif row['avg_glucose_level'] > 140 and row['age'] > 50:
        return 1  # Medium risk
    else:
        return 0  # Low risk

# Apply the function to create the 'risk_level' column
data['risk_level'] = data.apply(risk_level, axis=1)

# Drop the 'stroke' column as we now have 'risk_level' for classification
data = data.drop(columns=['stroke', 'id'])

# Display the distribution of risk levels
risk_distribution = data['risk_level'].value_counts()
risk_distribution


risk_level
0    3895
2    1114
1     101
Name: count, dtype: int64

In [13]:
# Redefining risk levels based on available columns
def risk_level(row):
    # High risk: high glucose level or presence of hypertension or heart disease
    if row['avg_glucose_level'] > 200 or (row['hypertension'] == 1 or row['heart_disease'] == 1):
        return 2  # High risk
    # Medium risk: moderately high glucose level and age above 50
    elif row['avg_glucose_level'] > 140 and row['age'] > 50:
        return 1  # Medium risk
    else:
        return 0  # Low risk

# Apply the function to create the 'risk_level' column
data['risk_level'] = data.apply(risk_level, axis=1)

# Display the updated distribution of risk levels
risk_distribution = data['risk_level'].value_counts()
risk_distribution


risk_level
0    4002
2     989
1     119
Name: count, dtype: int64

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target variable
X = data.drop('risk_level', axis=1)
y = data['risk_level']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Display the shape of the training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((4088, 10), (1022, 10), (4088,), (1022,))

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Convert target variable to categorical (one-hot encoding for multiclass classification)
y_train_categorical = to_categorical(y_train, num_classes=3)
y_test_categorical = to_categorical(y_test, num_classes=3)

# Building the ANN model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')  # Output layer for 3 classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_categorical, validation_split=0.2, epochs=25, batch_size=32, verbose=1)


Epoch 1/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 795us/step - accuracy: 0.8046 - loss: 0.5847 - val_accuracy: 0.9719 - val_loss: 0.1102
Epoch 2/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step - accuracy: 0.9692 - loss: 0.0943 - val_accuracy: 0.9731 - val_loss: 0.0692
Epoch 3/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 363us/step - accuracy: 0.9725 - loss: 0.0691 - val_accuracy: 0.9756 - val_loss: 0.0565
Epoch 4/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 357us/step - accuracy: 0.9715 - loss: 0.0637 - val_accuracy: 0.9804 - val_loss: 0.0491
Epoch 5/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 344us/step - accuracy: 0.9815 - loss: 0.0424 - val_accuracy: 0.9804 - val_loss: 0.0434
Epoch 6/25
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354us/step - accuracy: 0.9856 - loss: 0.0391 - val_accuracy: 0.9804 - val_loss: 0.0442
Epoch 7/25
[1m1

In [19]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Test Accuracy: 99.02%


In [20]:
# Save the trained model
model.save('risk_stratification_model.h5')




In [21]:
import joblib

# Save the fitted scaler to a file for deployment purposes
scaler_filename = 'scaler.pkl'
joblib.dump(scaler, scaler_filename)

scaler_filename


'scaler.pkl'

In [22]:
from flask import Flask, request, jsonify
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib

# Load the trained model
model = tf.keras.models.load_model('risk_stratification_model.h5')

# Load the scaler used for preprocessing (save it using joblib after fitting)
scaler = joblib.load('scaler.pkl')

app = Flask(__name__)

# Define the API endpoint for prediction
@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get the JSON data from the request
        data = request.json
        # Extract the features from the JSON
        features = np.array([[
            data['gender'],
            data['age'],
            data['hypertension'],
            data['heart_disease'],
            data['ever_married'],
            data['work_type'],
            data['Residence_type'],
            data['avg_glucose_level'],
            data['bmi'],
            data['smoking_status']
        ]])
        
        # Scale the features using the pre-fitted scaler
        features_scaled = scaler.transform(features)
        
        # Predict the risk level
        prediction = model.predict(features_scaled)
        risk_level = np.argmax(prediction, axis=1)[0]

        # Map the risk level to a string response
        risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
        risk_str = risk_mapping[risk_level]

        return jsonify({'risk_level': risk_str})

    except Exception as e:
        return jsonify({'error': str(e)})

if __name__ == '__main__':
    app.run(debug=True)




 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with watchdog (fsevents)
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packag

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [23]:
import joblib

# Fit the scaler on the training data and save it
scaler = StandardScaler()
scaler.fit(X_train)
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [34]:
import numpy as np
import tensorflow as tf
import joblib

# Load the trained model
model = tf.keras.models.load_model('risk_stratification_model.h5')

# Load the scaler
scaler = joblib.load('scaler.pkl')

# Define a test input (ensure it matches the format of your training data)
test_input = np.array([[
    1,    # gender (encoded as integer)
    65.0, # age
    1,    # hypertension
    0,    # heart_disease
    1,    # ever_married
    2,    # work_type
    1,    # Residence_type
    180.0, # avg_glucose_level
    32.5,  # bmi
    2     # smoking_status
]])

# Scale the test input
test_input_scaled = scaler.transform(test_input)

# Make a prediction
prediction = model.predict(test_input_scaled)

# Get the predicted class
predicted_class = np.argmax(prediction, axis=1)[0]

# Map the class to the risk level
risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
risk_str = risk_mapping[predicted_class]

print(f'Predicted Risk Level: {risk_str}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted Risk Level: High Risk


In [35]:
test_inputs = np.array([
    [1, 65.0, 1, 0, 1, 2, 1, 180.0, 32.5, 2],  # Sample 1
    [0, 45.0, 0, 0, 0, 1, 0, 130.0, 25.0, 0]   # Sample 2
])

# Scale the test inputs
test_inputs_scaled = scaler.transform(test_inputs)

# Make predictions
predictions = model.predict(test_inputs_scaled)

# Convert predictions to risk levels
predicted_classes = np.argmax(predictions, axis=1)
risk_levels = [risk_mapping[cls] for cls in predicted_classes]

for i, risk in enumerate(risk_levels):
    print(f'Sample {i+1}: Predicted Risk Level - {risk}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Sample 1: Predicted Risk Level - High Risk
Sample 2: Predicted Risk Level - High Risk


In [36]:
# Low Risk Input
low_risk_input = np.array([[
    0,    # gender (Female)
    30.0, # age
    0,    # hypertension
    0,    # heart_disease
    0,    # ever_married
    0,    # work_type
    0,    # Residence_type
    80, # avg_glucose_level
    22.0,  # bmi
    0     # smoking_status
]])

# Medium Risk Input
medium_risk_input = np.array([[
    1,    # gender (Male)
    60.0, # age
    0,    # hypertension
    0,    # heart_disease
    1,    # ever_married
    2,    # work_type
    1,    # Residence_type
    160.0, # avg_glucose_level
    28.0,  # bmi
    2     # smoking_status
]])


In [37]:
# Scale the low and medium risk inputs
low_risk_input_scaled = scaler.transform(low_risk_input)
medium_risk_input_scaled = scaler.transform(medium_risk_input)

# Make predictions
low_risk_prediction = model.predict(low_risk_input_scaled)
medium_risk_prediction = model.predict(medium_risk_input_scaled)

# Get the predicted classes
low_risk_class = np.argmax(low_risk_prediction, axis=1)[0]
medium_risk_class = np.argmax(medium_risk_prediction, axis=1)[0]

# Map the classes to risk levels
low_risk_str = risk_mapping[low_risk_class]
medium_risk_str = risk_mapping[medium_risk_class]

print(f'Low Risk Sample: Predicted Risk Level - {low_risk_str}')
print(f'Medium Risk Sample: Predicted Risk Level - {medium_risk_str}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Low Risk Sample: Predicted Risk Level - High Risk
Medium Risk Sample: Predicted Risk Level - High Risk


In [38]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import joblib

# Load the dataset
data = pd.read_csv('healthcare-dataset-stroke-data.csv')  # Update the path

# Impute missing values for 'bmi' using the median
data['bmi'].fillna(data['bmi'].median(), inplace=True)

# Encode categorical columns using LabelEncoder
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
categorical_columns = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']
for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

# Define the risk levels based on the criteria
def risk_level(row):
    if row['avg_glucose_level'] > 200 or (row['hypertension'] == 1 or row['heart_disease'] == 1):
        return 2  # High risk
    elif row['avg_glucose_level'] > 140 and row['age'] > 50:
        return 1  # Medium risk
    else:
        return 0  # Low risk

data['risk_level'] = data.apply(risk_level, axis=1)

# Drop unnecessary columns
data = data.drop(columns=['id'])

# Features and target variable
X = data.drop('risk_level', axis=1)
y = data['risk_level']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the scaler for deployment
joblib.dump(scaler, 'scaler.pkl')

# Convert target variable to categorical (one-hot encoding for multiclass classification)
y_train_categorical = to_categorical(y_train, num_classes=3)
y_test_categorical = to_categorical(y_test, num_classes=3)

# Calculate class weights for handling imbalanced data
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Building the ANN model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')  # Output layer for 3 classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with class weights
history = model.fit(
    X_train, y_train_categorical, 
    validation_split=0.2, 
    epochs=50, 
    batch_size=32, 
    class_weight=class_weights_dict, 
    verbose=1
)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Print confusion matrix and classification report
print(confusion_matrix(y_test, y_pred_classes))
print(classification_report(y_test, y_pred_classes, target_names=['Low Risk', 'Medium Risk', 'High Risk']))

# Save the trained model for deployment
model.save('risk_stratification_model.h5')


Epoch 1/50


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['bmi'].fillna(data['bmi'].median(), inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 837us/step - accuracy: 0.3381 - loss: 0.9356 - val_accuracy: 0.8301 - val_loss: 0.5981
Epoch 2/50
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step - accuracy: 0.8915 - loss: 0.4407 - val_accuracy: 0.9389 - val_loss: 0.1934
Epoch 3/50
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343us/step - accuracy: 0.9464 - loss: 0.2222 - val_accuracy: 0.9584 - val_loss: 0.1262
Epoch 4/50
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339us/step - accuracy: 0.9665 - loss: 0.1626 - val_accuracy: 0.9548 - val_loss: 0.1166
Epoch 5/50
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342us/step - accuracy: 0.9597 - loss: 0.1331 - val_accuracy: 0.9670 - val_loss: 0.0788
Epoch 6/50
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340us/step - accuracy: 0.9716 - loss: 0.0923 - val_accuracy: 0.9621 - val_loss: 0.0756
Epoch 7/50
[1m103/103[0m 



[[791   5   4]
 [  1  21   2]
 [  2   1 195]]
              precision    recall  f1-score   support

    Low Risk       1.00      0.99      0.99       800
 Medium Risk       0.78      0.88      0.82        24
   High Risk       0.97      0.98      0.98       198

    accuracy                           0.99      1022
   macro avg       0.91      0.95      0.93      1022
weighted avg       0.99      0.99      0.99      1022



In [39]:
# Low Risk Input
low_risk_input = np.array([[
    0,    # gender (Female)
    30.0, # age
    0,    # hypertension
    0,    # heart_disease
    0,    # ever_married
    1,    # work_type
    0,    # Residence_type
    120.0, # avg_glucose_level
    22.0,  # bmi
    1     # smoking_status
]])

# Medium Risk Input
medium_risk_input = np.array([[
    1,    # gender (Male)
    60.0, # age
    0,    # hypertension
    0,    # heart_disease
    1,    # ever_married
    2,    # work_type
    1,    # Residence_type
    160.0, # avg_glucose_level
    28.0,  # bmi
    2     # smoking_status
]])


In [41]:
import numpy as np
import tensorflow as tf
import joblib

# Load the trained model and scaler
model = tf.keras.models.load_model('risk_stratification_model.h5')
scaler = joblib.load('scaler.pkl')

# Define sample inputs for low and medium risk
# Ensure all 11 features are present as used during training
low_risk_input = np.array([
    [0, 30.0, 0, 0, 0, 1, 0, 120.0, 22.0, 1, 0]  # Low risk sample, added placeholder for the 11th feature
])

medium_risk_input = np.array([
    [1, 60.0, 0, 0, 1, 2, 1, 160.0, 28.0, 2, 0]  # Medium risk sample, added placeholder for the 11th feature
])

# Scale the low and medium risk inputs
low_risk_input_scaled = scaler.transform(low_risk_input)
medium_risk_input_scaled = scaler.transform(medium_risk_input)

# Make predictions
low_risk_prediction = model.predict(low_risk_input_scaled)
medium_risk_prediction = model.predict(medium_risk_input_scaled)

# Get the predicted classes
low_risk_class = np.argmax(low_risk_prediction, axis=1)[0]
medium_risk_class = np.argmax(medium_risk_prediction, axis=1)[0]

# Map the classes to risk levels
risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
low_risk_str = risk_mapping[low_risk_class]
medium_risk_str = risk_mapping[medium_risk_class]

print(f'Low Risk Sample: Predicted Risk Level - {low_risk_str}')
print(f'Medium Risk Sample: Predicted Risk Level - {medium_risk_str}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Low Risk Sample: Predicted Risk Level - Low Risk
Medium Risk Sample: Predicted Risk Level - Medium Risk




In [42]:
import numpy as np
import tensorflow as tf
import joblib

# Load the trained model and scaler
model = tf.keras.models.load_model('risk_stratification_model.h5')
scaler = joblib.load('scaler.pkl')

# Define sample inputs for low, medium, and high risk
low_risk_input = np.array([
    [0, 30.0, 0, 0, 0, 1, 0, 120.0, 22.0, 1, 0]  # Low risk sample
])

medium_risk_input = np.array([
    [1, 60.0, 0, 0, 1, 2, 1, 160.0, 28.0, 2, 0]  # Medium risk sample
])

high_risk_input = np.array([
    [1, 70.0, 1, 1, 1, 3, 1, 250.0, 35.0, 3, 0]  # High risk sample
])

# Scale the inputs
low_risk_input_scaled = scaler.transform(low_risk_input)
medium_risk_input_scaled = scaler.transform(medium_risk_input)
high_risk_input_scaled = scaler.transform(high_risk_input)

# Make predictions
low_risk_prediction = model.predict(low_risk_input_scaled)
medium_risk_prediction = model.predict(medium_risk_input_scaled)
high_risk_prediction = model.predict(high_risk_input_scaled)

# Get the predicted classes
low_risk_class = np.argmax(low_risk_prediction, axis=1)[0]
medium_risk_class = np.argmax(medium_risk_prediction, axis=1)[0]
high_risk_class = np.argmax(high_risk_prediction, axis=1)[0]

# Map the classes to risk levels
risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
low_risk_str = risk_mapping[low_risk_class]
medium_risk_str = risk_mapping[medium_risk_class]
high_risk_str = risk_mapping[high_risk_class]

print(f'Low Risk Sample: Predicted Risk Level - {low_risk_str}')
print(f'Medium Risk Sample: Predicted Risk Level - {medium_risk_str}')
print(f'High Risk Sample: Predicted Risk Level - {high_risk_str}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Low Risk Sample: Predicted Risk Level - Low Risk
Medium Risk Sample: Predicted Risk Level - Medium Risk
High Risk Sample: Predicted Risk Level - High Risk




In [43]:
import numpy as np
import tensorflow as tf
import joblib

# Load the trained model and scaler
model = tf.keras.models.load_model('risk_stratification_model.h5')
scaler = joblib.load('scaler.pkl')

# Define multiple sample inputs for different risk levels
sample_inputs = np.array([
    [0, 45.0, 0, 0, 1, 0, 1, 100.0, 24.0, 0, 0],  # Low Risk Sample 2
    [1, 25.0, 0, 0, 0, 4, 0, 110.0, 27.5, 1, 0],  # Low Risk Sample 3
    [0, 55.0, 0, 0, 1, 2, 1, 170.0, 30.0, 2, 0],  # Medium Risk Sample 2
    [1, 62.0, 0, 0, 1, 3, 0, 150.0, 26.0, 3, 0],  # Medium Risk Sample 3
    [0, 75.0, 1, 0, 1, 1, 1, 220.0, 33.0, 3, 0],  # High Risk Sample 2
    [1, 55.0, 1, 1, 1, 3, 0, 210.0, 31.0, 2, 0],  # High Risk Sample 3
])

# Scale the inputs
sample_inputs_scaled = scaler.transform(sample_inputs)

# Make predictions
predictions = model.predict(sample_inputs_scaled)

# Get the predicted classes
predicted_classes = np.argmax(predictions, axis=1)

# Map the classes to risk levels
risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
predicted_risks = [risk_mapping[cls] for cls in predicted_classes]

# Print the predicted risk levels for each sample
for i, risk in enumerate(predicted_risks):
    print(f'Sample {i+1}: Predicted Risk Level - {risk}')








[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Sample 1: Predicted Risk Level - Low Risk
Sample 2: Predicted Risk Level - Low Risk
Sample 3: Predicted Risk Level - Medium Risk
Sample 4: Predicted Risk Level - Medium Risk
Sample 5: Predicted Risk Level - High Risk
Sample 6: Predicted Risk Level - High Risk


In [45]:
import numpy as np
import tensorflow as tf
import joblib

# Load the trained model and scaler
model = tf.keras.models.load_model('risk_stratification_model.h5')
scaler = joblib.load('scaler.pkl')

# Define multiple sample inputs for different risk levels
sample_inputs = np.array([
    [1, 66.0, 1, 0, 1, 4, 0, 225.0, 35.0, 3, 0], # High Risk Sample 6
    [1, 52.0, 0, 0, 1, 0, 1, 145.0, 28.0, 3, 0], # Medium Risk Sample 6
    [1, 50.0, 1, 1, 1, 3, 1, 200.0, 34.0, 2, 0], # High Risk Sample 5
    [1, 58.0, 0, 0, 1, 2, 1, 165.0, 29.0, 2, 0], # Medium Risk Sample 4
    [0, 64.0, 0, 0, 1, 3, 0, 155.0, 32.0, 3, 0], # Medium Risk Sample 5
    [0, 28.0, 0, 0, 0, 1, 0, 90.0, 21.0, 1, 0],  # Low Risk Sample 4
    [0, 78.0, 1, 0, 1, 1, 0, 210.0, 30.0, 2, 0], # High Risk Sample 4
    [1, 50.0, 1, 1, 1, 3, 1, 200.0, 34.0, 2, 0], # High Risk Sample 5
    [1, 22.0, 0, 0, 0, 3, 1, 105.0, 23.0, 1, 0], # Low Risk Sample 5
    [0, 72.0, 1, 1, 1, 2, 1, 250.0, 37.0, 2, 0], # High Risk Sample 7
])

# Scale the inputs
sample_inputs_scaled = scaler.transform(sample_inputs)

# Make predictions
predictions = model.predict(sample_inputs_scaled)

# Get the predicted classes
predicted_classes = np.argmax(predictions, axis=1)

# Map the classes to risk levels
risk_mapping = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
predicted_risks = [risk_mapping[cls] for cls in predicted_classes]

# Print the predicted risk levels for each sample
for i, risk in enumerate(predicted_risks):
    print(f'Sample {i+1}: Predicted Risk Level - {risk}')




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Sample 1: Predicted Risk Level - High Risk
Sample 2: Predicted Risk Level - Medium Risk
Sample 3: Predicted Risk Level - High Risk
Sample 4: Predicted Risk Level - Medium Risk
Sample 5: Predicted Risk Level - Medium Risk
Sample 6: Predicted Risk Level - Low Risk
Sample 7: Predicted Risk Level - High Risk
Sample 8: Predicted Risk Level - High Risk
Sample 9: Predicted Risk Level - Low Risk
Sample 10: Predicted Risk Level - High Risk


