In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
from imblearn.over_sampling import SMOTE
import joblib

In [3]:
# Load dataset
df = pd.read_csv("fever.csv")
print("Columns:", df.columns)
print("Sample data:\n", df.head())

Columns: Index(['Temperature', 'Fever_Severity', 'Age', 'Gender', 'BMI', 'Headache',
       'Body_Ache', 'Fatigue', 'Chronic_Conditions', 'Allergies',
       'Smoking_History', 'Alcohol_Consumption', 'Humidity', 'AQI',
       'Physical_Activity', 'Diet_Type', 'Heart_Rate', 'Blood_Pressure',
       'Previous_Medication', 'Recommended_Medication'],
      dtype='object')
Sample data:
    Temperature Fever_Severity  Age  Gender   BMI Headache Body_Ache Fatigue  \
0         36.1         Normal   89  Female  24.0       No       Yes      No   
1         37.5     Mild Fever   94    Male  26.6       No        No      No   
2         36.4         Normal   92    Male  27.8       No        No      No   
3         39.8     High Fever   66    Male  18.7       No       Yes      No   
4         39.3     High Fever   28    Male  21.0       No       Yes      No   

  Chronic_Conditions Allergies Smoking_History Alcohol_Consumption  Humidity  \
0                 No       Yes              No              

In [4]:
numeric_cols = ["Temperature", "Age", "BMI", "Humidity", "AQI", "Heart_Rate"]
categorical_cols = ["Gender", "Headache", "Body_Ache", "Fatigue", "Chronic_Conditions",
                    "Allergies", "Smoking_History", "Alcohol_Consumption", "Physical_Activity",
                    "Diet_Type", "Blood_Pressure", "Previous_Medication"]


In [5]:
target_severity_col = "Fever_Severity"
target_risk_col = "Fever_Risk"

In [6]:
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

In [7]:
le_dict = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le

In [8]:
severity_le = LabelEncoder()
df[target_severity_col] = severity_le.fit_transform(df[target_severity_col])
print("Severity classes:", severity_le.classes_)

Severity classes: ['High Fever' 'Mild Fever' 'Normal']


In [9]:
if target_risk_col not in df.columns:
    df[target_risk_col] = (
        (df["Temperature"] - 35) * 10
        + (df["Heart_Rate"] - 60) * 0.5
        + df[["Headache", "Body_Ache", "Fatigue", "Chronic_Conditions"]].sum(axis=1) * 5
    ).clip(0, 100)

In [10]:
X = df[numeric_cols + categorical_cols]
y_severity = df[target_severity_col]
y_risk = df[target_risk_col]

In [11]:
X_train, X_test, y_train_sev, y_test_sev, y_train_risk, y_test_risk = train_test_split(
    X, y_severity, y_risk, test_size=0.2, random_state=42, stratify=y_severity
)

In [12]:
scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()
X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])


In [13]:
sm = SMOTE(random_state=42)
X_res, y_res_sev = sm.fit_resample(X_train_scaled, y_train_sev)


In [14]:
severity_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=10,
    random_state=42,
    class_weight="balanced"
)
severity_model.fit(X_res, y_res_sev)

0,1,2
,n_estimators,300
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [15]:
risk_model = RandomForestRegressor(
    n_estimators=300,
    max_depth=12,
    random_state=42
)
risk_model.fit(X_train_scaled, y_train_risk)

0,1,2
,n_estimators,300
,criterion,'squared_error'
,max_depth,12
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [16]:
y_pred_sev = severity_model.predict(X_test_scaled)
acc = accuracy_score(y_test_sev, y_pred_sev)
print(f"Severity Test Accuracy: {acc*100:.2f}%")
print("Classification Report:")
print(classification_report(y_test_sev, y_pred_sev, target_names=severity_le.classes_))


Severity Test Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

  High Fever       1.00      1.00      1.00       106
  Mild Fever       1.00      1.00      1.00        40
      Normal       1.00      1.00      1.00        54

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [17]:
y_pred_risk = risk_model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test_risk, y_pred_risk))
print(f"Risk Test RMSE: {rmse:.2f}")

Risk Test RMSE: 3.70


In [18]:
# Save models and encoders
joblib.dump(severity_model, "fever_severity_model.pkl")
joblib.dump(risk_model, "fever_risk_model.pkl")
joblib.dump(scaler, "fever_scaler.pkl")
joblib.dump(le_dict, "fever_label_encoders.pkl")
joblib.dump(severity_le, "fever_target_encoder.pkl")
print("Models, scaler, and encoders saved!")

Models, scaler, and encoders saved!
