In [1]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


In [4]:
df = pd.read_csv("patient_adherence_dataset (1).csv")
print(df.head())


   Age Gender Medication_Type  Dosage_mg  Previous_Adherence Education_Level  \
0   57   Male           TypeA        136                   1     High School   
1   47   Male           TypeA        134                   1     High School   
2   59   Male           TypeC         89                   1     High School   
3   72   Male           TypeB        240                   0    Postgraduate   
4   46   Male           TypeA        241                   1    Postgraduate   

   Income Social_Support_Level Condition_Severity  Comorbidities_Count  \
0  634934               Medium             Severe                    3   
1  297954                 High           Moderate                    3   
2  789337                 High           Moderate                    1   
3  267352                  Low               Mild                    2   
4  718447               Medium               Mild                    1   

  Healthcare_Access Mental_Health_Status  Insurance_Coverage  Adherence  


In [5]:
X = df.drop("Adherence", axis=1)
y = df["Adherence"]


In [6]:
num_cols = [
    "Age",
    "Dosage_mg",
    "Income",
    "Comorbidities_Count",
    "Previous_Adherence",
    "Insurance_Coverage"
]


In [7]:
cat_cols = [
    "Gender",
    "Medication_Type",
    "Education_Level",
    "Social_Support_Level",
    "Condition_Severity",
    "Healthcare_Access",
    "Mental_Health_Status"
]


In [8]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
    ]
)


In [9]:
model = SVC(
    kernel="rbf",
    probability=True,
    random_state=42
)


In [10]:
pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", model)
])


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [12]:
pipeline.fit(X_train, y_train)


In [13]:
y_pred = pipeline.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


Accuracy: 0.648

Classification Report:

              precision    recall  f1-score   support

           0       0.67      0.71      0.69       543
           1       0.62      0.58      0.60       457

    accuracy                           0.65      1000
   macro avg       0.64      0.64      0.64      1000
weighted avg       0.65      0.65      0.65      1000



In [14]:
joblib.dump(pipeline, "best_patient_adherence_model.pkl")
print("✅ Model saved successfully as best_patient_adherence_model.pkl")


✅ Model saved successfully as best_patient_adherence_model.pkl
