In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("synthetic_payment_data_past_6_months.csv")

In [None]:
df["due_date"] = pd.to_datetime(df["due_date"])

X = df[["customer_name", "amount", "due_date"]]
y = df["label"]

le_customer = LabelEncoder()
X.loc[:, "customer_name"] = le_customer.fit_transform(X["customer_name"])

reference_date = pd.Timestamp("2024-01-01")
X["days_since_reference"] = (X["due_date"] - reference_date).dt.days

X = X.drop(columns=["due_date"])

X.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

model = DecisionTreeClassifier(random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

In [None]:
import joblib

model_filename = 'payment_classification_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved to {model_filename}")