In [None]:
import pandas as pd

df = pd.read_csv("households_complex.csv")

# Select features relevant for PMT logic
features = [
    "num_members", "num_children", "num_elderly",
    "dependency_ratio", "income_formal", "income_informal",
    "rent_cost", "total_expenditure",
    "debt_amount", "shock_last_6m",
    "coping_crisis_score", "coping_emergency_score",
    "legal_residency_rate"
]

# Remove rows with missing target or features
df = df.dropna(subset=features + ["vulnerability_flag"]).copy()

In [None]:
df = pd.get_dummies(
    df,
    columns=["governorate", "area_type", "shelter_type", "head_gender","head_education"],
    drop_first=True
)

In [None]:
print(df.isna().sum())

In [None]:
df.drop(columns=["education_expenditure"],inplace=True)

In [None]:
from sklearn.model_selection import train_test_split

y = df["vulnerability_flag"]
X = df.drop(columns=["vulnerability_flag", "household_id", "pmt_score"])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions
pred = model.predict(X_test)
pred_proba = model.predict_proba(X_test)[:, 1]   # PMT-like score

In [None]:
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)

accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
f1 = f1_score(y_test, pred)
auc = roc_auc_score(y_test, pred_proba)

print("Accuracy:", round(accuracy, 3))
print("Precision:", round(precision, 3))
print("Recall:", round(recall, 3))
print("F1-score:", round(f1, 3))
print("AUC:", round(auc, 3))
print(confusion_matrix(y_test, pred))


In [None]:
import pandas as pd

coef_df = pd.DataFrame({
    "feature": X.columns,
    "coef": model.coef_[0]
}).sort_values(by="coef", ascending=False)

coef_df.head(15)


In [None]:
df["predicted_pmt_score"] = model.predict_proba(X)[:, 1]
df["predicted_eligible"] = (df["predicted_pmt_score"] >= 0.60).astype(int)

In [None]:
eligibility_list = df[["household_id", "predicted_pmt_score", "predicted_eligible"]]
eligibility_list.head()


In [None]:
eligibility_list.to_csv("eligibility_output.csv", index=False)