In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings 
warnings.filterwarnings('ignore')

# Load dataset
main_data = pd.read_csv("medical_data.csv")

# Data Preprocessing
main_data.dropna(inplace=True)  # Removing missing values
encoder = LabelEncoder()
main_data['Gender'] = encoder.fit_transform(main_data['Gender'])  # Encode categorical data

# Selecting features and target variables
features = ['Age', 'Weight (kg)', 'Systolic_BP', 'Diastolic_BP']
X = main_data[features]
y_hypertension = main_data['Diagnosis'].str.contains('Hypertension', case=False, na=False).astype(int)
#y_hypertension = (main_data['Diagnosis'] == 'Hypertension').astype(int)
#y_diabetes = (main_data['Diagnosis'] == 'Diabetes').astype(int)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_scaled, y_hypertension, test_size=0.2, random_state=42)
# X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_scaled, y_diabetes, test_size=0.2, random_state=42)

# Train RandomForest models
model_hypertension = RandomForestClassifier(n_estimators=100, random_state=42)
model_hypertension.fit(X_train_h, y_train_h)

# model_diabetes = RandomForestClassifier(n_estimators=100, random_state=42)
# model_diabetes.fit(X_train_d, y_train_d)

# Predictions
y_pred_h = model_hypertension.predict(X_test_h)
#y_pred_d = model_diabetes.predict(X_test_d)

# Model Evaluation
print("Hypertension Prediction Results:")
print(classification_report(y_test_h, y_pred_h))
# print("Diabetes Prediction Results:")
# print(classification_report(y_test_d, y_pred_d))

#Testing
# Example new patient data (Age, Weight, Systolic_BP, Cholesterol, Diastolic_BP)
new_patient = np.array([[50, 75, 130, 150]])  # Adjusting values as needed

# Ensuring scaler is used correctly
new_patient_scaled = scaler.transform(new_patient)  # Using the previously fitted scaler

# Make predictions
hypertension_pred = model_hypertension.predict(new_patient_scaled)
#diabetes_pred = model_diabetes.predict(new_patient_scaled)

# Display results
print(f"Hypertension Prediction: {'Yes' if hypertension_pred[0] == 1 else 'No'}")





Hypertension Prediction Results:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.00      0.00      0.00         0

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2

Hypertension Prediction: No
