In [11]:
!pip -q install xgboost lightgbm imbalanced-learn joblib optuna --upgrade


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/400.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/247.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.4/247.4 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [12]:
import os, warnings
warnings.filterwarnings("ignore")

BASE_DIR   = "/content"                      # çıktı klasörü
CHURN_PATH = "/content/CHURN_DATA.csv"       # veri dosyan
BOLGE_PATH = "/content/BOLGE.csv"            # varsa

print("BASE_DIR:", BASE_DIR)


BASE_DIR: /content


In [13]:
from google.colab import files
print("CHURN_DATA.csv (+opsiyonel: BOLGE.csv) seçin (çoklu seçebilirsiniz).")
_ = files.upload()

import os
assert os.path.exists(CHURN_PATH), "CHURN_DATA.csv bulunamadı."


CHURN_DATA.csv (+opsiyonel: BOLGE.csv) seçin (çoklu seçebilirsiniz).


Saving BOLGE.csv to BOLGE (1).csv
Saving CHURN_DATA.csv to CHURN_DATA (1).csv
Saving Veri_Sozlugu.xlsx to Veri_Sozlugu (1).xlsx


In [14]:
import pandas as pd
import numpy as np

def read_semicolon_csv(path):
    for enc in ["utf-8", "latin-1", "cp1254"]:
        try:
            return pd.read_csv(path, sep=";", encoding=enc), enc
        except Exception:
            pass
    return pd.read_csv(path, sep=";", engine="python"), "unknown"

def merge_region(df, bolge_path):
    if not os.path.exists(bolge_path):
        return df
    bolge_df, _ = read_semicolon_csv(bolge_path)
    if "SUBE" in df.columns and "SUBE_KODU" in bolge_df.columns:
        df2 = df.copy()
        df2["SUBE"] = pd.to_numeric(df2["SUBE"], errors="coerce").astype("Int64")
        bolge_df["SUBE_KODU"] = pd.to_numeric(bolge_df["SUBE_KODU"], errors="coerce").astype("Int64")
        keep = [c for c in ["SUBE_KODU","BOLGE","IL_ADI"] if c in bolge_df.columns]
        df2 = df2.merge(bolge_df[keep], left_on="SUBE", right_on="SUBE_KODU", how="left")
        if "SUBE_KODU" in df2.columns: df2 = df2.drop(columns=["SUBE_KODU"])
        return df2
    return df

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

def metrics_from_preds(y_true, y_pred, y_prob=None):
    cm = confusion_matrix(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec  = recall_score(y_true, y_pred, zero_division=0)
    f1   = f1_score(y_true, y_pred, zero_division=0)
    out = {"precision":prec, "recall":rec, "f1":f1, "FP":int(cm[0,1]), "FN":int(cm[1,0]), "confusion_matrix":cm}
    if y_prob is not None:
        out["roc_auc"] = roc_auc_score(y_true, y_prob)
        out["pr_auc"]  = average_precision_score(y_true, y_prob)
    return out


In [15]:
import joblib
import optuna
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, roc_curve, precision_recall_curve
from imblearn.over_sampling import SMOTE
import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt

class BankingChurnPredictor:
    def __init__(self):
        self.models = {}
        self.feature_importance = {}
        self.label_encoders = {}
        self.scaler = StandardScaler()
        self.optimal_threshold = 0.5
        self.feature_names = []
        self.X_test = None
        self.y_test = None
        self._last_train_columns = None

    # ----- Veri -----
    def load_data(self, file_path, sep=";"):
        df = pd.read_csv(file_path, delimiter=sep)
        if "T+3_CHURN" not in df.columns:
            raise ValueError("Hedef sütun 'T+3_CHURN' yok.")
        df = df[df["T+3_CHURN"].notna()].copy()
        df["T+3_CHURN"] = pd.to_numeric(df["T+3_CHURN"], errors="coerce").astype(int)
        return df

    # ----- Feature engineering -----
    def feature_engineering(self, df):
        df_fe = df.copy()

        for col, fmt in [('KART_GIRIS_TARIHI','%d.%m.%Y'),
                         ('LAST_TXN_DT','%d.%m.%Y'),
                         ('FRST_TXN_DT','%d.%m.%Y')]:
            if col in df_fe.columns:
                df_fe[col] = pd.to_datetime(df_fe[col], format=fmt, errors='coerce')
        ref = pd.to_datetime('2023-12-01')
        if 'KART_GIRIS_TARIHI' in df_fe.columns:
            df_fe['CARD_AGE_MONTHS'] = (ref - df_fe['KART_GIRIS_TARIHI']).dt.days / 30.44
        if 'LAST_TXN_DT' in df_fe.columns:
            df_fe['DAYS_SINCE_LAST_TXN'] = (ref - df_fe['LAST_TXN_DT']).dt.days

        if {'LIMIT','KULLANILABILIR_LIMIT'}.issubset(df_fe.columns):
            denom = df_fe['LIMIT'].replace(0, np.nan)
            df_fe['LIMIT_UTILIZATION'] = (df_fe['LIMIT'] - df_fe['KULLANILABILIR_LIMIT']) / denom
            df_fe['LIMIT_UTILIZATION'] = df_fe['LIMIT_UTILIZATION'].fillna(0)

        if {'TXN_ADET_SON_3AY','TXN_ADET_SON_6AY'}.issubset(df_fe.columns):
            df_fe['TXN_TREND_3M_6M'] = df_fe['TXN_ADET_SON_3AY'] / (df_fe['TXN_ADET_SON_6AY'] + 1)
        if {'TXN_ADET_SON_1AY','TXN_ADET_SON_3AY'}.issubset(df_fe.columns):
            df_fe['TXN_TREND_1M_3M'] = df_fe['TXN_ADET_SON_1AY'] / (df_fe['TXN_ADET_SON_3AY'] + 1)
        if {'TXN_TUTAR_SON_3AY','TXN_TUTAR_SON_6AY'}.issubset(df_fe.columns):
            df_fe['AMOUNT_TREND_3M_6M'] = df_fe['TXN_TUTAR_SON_3AY'] / (df_fe['TXN_TUTAR_SON_6AY'] + 1)
        if {'TXN_TUTAR_SON_1AY','TXN_TUTAR_SON_3AY'}.issubset(df_fe.columns):
            df_fe['AMOUNT_TREND_1M_3M'] = df_fe['TXN_TUTAR_SON_1AY'] / (df_fe['TXN_TUTAR_SON_3AY'] + 1)

        if {'TXN_TUTAR_SON_3AY','TXN_ADET_SON_3AY'}.issubset(df_fe.columns):
            df_fe['AVG_TXN_AMOUNT_3M'] = df_fe['TXN_TUTAR_SON_3AY'] / (df_fe['TXN_ADET_SON_3AY'] + 1)
        if {'TXN_TUTAR_SON_1YIL','TXN_ADET_SON_1YIL'}.issubset(df_fe.columns):
            df_fe['AVG_TXN_AMOUNT_1Y'] = df_fe['TXN_TUTAR_SON_1YIL'] / (df_fe['TXN_ADET_SON_1YIL'] + 1)

        parts = []
        if 'PESIN_ALISVERIS_ADET' in df_fe.columns: parts.append((df_fe['PESIN_ALISVERIS_ADET']>0).astype(int))
        if 'TAKSITLI_ALISVERIS_ADET' in df_fe.columns: parts.append((df_fe['TAKSITLI_ALISVERIS_ADET']>0).astype(int))
        if 'NAKIT_ADET' in df_fe.columns: parts.append((df_fe['NAKIT_ADET']>0).astype(int))
        if parts: df_fe['TXN_DIVERSITY'] = sum(parts)

        score = 0
        if 'TXN_ADET_SON_1AY' in df_fe.columns: score = score + (df_fe['TXN_ADET_SON_1AY']==0).astype(int)*3
        if {'TXN_ADET_SON_3AY','TXN_ADET_SON_6AY'}.issubset(df_fe.columns):
            score = score + (df_fe['TXN_ADET_SON_3AY'] < df_fe['TXN_ADET_SON_6AY']/2).astype(int)*2
        if 'DAYS_SINCE_LAST_TXN' in df_fe.columns: score = score + (df_fe['DAYS_SINCE_LAST_TXN']>60).astype(int)*1
        if isinstance(score, (pd.Series, np.ndarray)): df_fe['DECLINING_ACTIVITY_RISK'] = score

        cat_cols = [c for c in ['KART_STATUSU','AKTIFLIK_DURUM','BOLGE','IL_ADI'] if c in df_fe.columns]
        for c in cat_cols:
            if c not in self.label_encoders:
                self.label_encoders[c] = LabelEncoder()
                df_fe[f'{c}_ENCODED'] = self.label_encoders[c].fit_transform(df_fe[c].fillna("Unknown"))
            else:
                df_fe[f'{c}_ENCODED'] = self.label_encoders[c].transform(df_fe[c].fillna("Unknown"))

        for feature in ['LAST_TXN_AMT','FRST_TXN_DT']:
            if feature in df_fe.columns:
                df_fe[f'{feature}_MISSING'] = df_fe[feature].isna().astype(int)

        return df_fe

    # ----- Feature matrisi -----
    def prepare_features(self, df):
        cand = [
            'TXN_ADET_SON_1AY','TXN_ADET_SON_3AY','TXN_ADET_SON_6AY',
            'TXN_TUTAR_SON_1AY','TXN_TUTAR_SON_3AY','TXN_TUTAR_SON_6AY',
            'PESIN_ALISVERIS_ADET','PESIN_ALISVERIS_TUTAR',
            'LAST_TXN_AMT','LIMIT','KULLANILABILIR_LIMIT',
            'CARD_AGE_MONTHS','DAYS_SINCE_LAST_TXN','LIMIT_UTILIZATION',
            'TXN_TREND_3M_6M','TXN_TREND_1M_3M','AMOUNT_TREND_3M_6M','AMOUNT_TREND_1M_3M',
            'AVG_TXN_AMOUNT_3M','AVG_TXN_AMOUNT_1Y','TXN_DIVERSITY','DECLINING_ACTIVITY_RISK',
            'KART_STATUSU_ENCODED','AKTIFLIK_DURUM_ENCODED','BOLGE_ENCODED','IL_ADI_ENCODED',
            'NEVERACTIVE','SON_1SENE_INAKTIF','LAST_TXN_AMT_MISSING'
        ]
        avail = [c for c in cand if c in df.columns]
        X = df[avail].copy()
        imp = SimpleImputer(strategy="median")
        X = pd.DataFrame(imp.fit_transform(X), columns=avail, index=df.index)
        self.feature_names = X.columns.tolist()
        self._last_train_columns = self.feature_names.copy()
        return X

    # ----- Eğitim -----
    def train_models(self, X, y, use_smote=True, random_state=42):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state, stratify=y
        )
        if use_smote:
            sm = SMOTE(random_state=random_state); X_train, y_train = sm.fit_resample(X_train, y_train)
        X_train_scaled = self.scaler.fit_transform(X_train); X_test_scaled  = self.scaler.transform(X_test)

        models = {
            'XGBoost': xgb.XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.08,
                                         subsample=0.9, colsample_bytree=0.9, random_state=random_state, eval_metric='logloss'),
            'LightGBM': lgb.LGBMClassifier(n_estimators=300, learning_rate=0.08,
                                           subsample=0.9, colsample_bytree=0.9, random_state=random_state, verbose=-1),
            'RandomForest': RandomForestClassifier(n_estimators=400, n_jobs=-1, random_state=random_state,
                                                   min_samples_split=4, min_samples_leaf=2),
            'LogisticRegression': LogisticRegression(max_iter=2000, class_weight='balanced', random_state=random_state)
        }

        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
        from sklearn.metrics import roc_auc_score, average_precision_score

        results = {}
        print("Model eğitimi başlıyor...")
        for name, model in models.items():
            print(f"\n{name} eğitiliyor...")
            if name == "LogisticRegression":
                model.fit(X_train_scaled, y_train)
                y_prob = model.predict_proba(X_test_scaled)[:,1]
                cv = cross_val_score(model, X_train_scaled, y_train, cv=skf, scoring='roc_auc')
            else:
                model.fit(X_train, y_train)
                y_prob = model.predict_proba(X_test)[:,1]
                cv = cross_val_score(model, X_train, y_train, cv=skf, scoring='roc_auc')

            auc = roc_auc_score(y_test, y_prob); pr = average_precision_score(y_test, y_prob)
            print(f"Test AUC: {auc:.4f} | PR AUC: {pr:.4f}")
            print(f"CV AUC : {cv.mean():.4f} (+/- {cv.std()*2:.4f})")
            results[name] = {"model":model, "predictions":y_prob, "test_auc":float(auc), "avg_precision":float(pr)}

            if hasattr(model, "feature_importances_"):
                self.feature_importance[name] = dict(zip(self.feature_names, model.feature_importances_))
            elif name=="LogisticRegression" and hasattr(model, "coef_"):
                import numpy as np
                self.feature_importance[name] = dict(zip(self.feature_names, np.abs(model.coef_[0])))

        self.models, self.X_test, self.y_test = results, X_test, y_test
        return results

    # ----- Optuna ile XGBoost tuning -----
    def tune_xgb_optuna(self, X, y, n_trials=30, random_state=42):
        X_train, X_valid, y_train, y_valid = train_test_split(
            X, y, test_size=0.2, random_state=random_state, stratify=y
        )

        def objective(trial):
            params = {
                "n_estimators": trial.suggest_int("n_estimators", 200, 800),
                "max_depth": trial.suggest_int("max_depth", 3, 10),
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
                "subsample": trial.suggest_float("subsample", 0.6, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
                "min_child_weight": trial.suggest_float("min_child_weight", 1.0, 10.0),
                "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 10.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
                "random_state": random_state,
                "eval_metric": "logloss",
                "tree_method": "hist"
            }
            model = xgb.XGBClassifier(**params)
            model.fit(X_train, y_train)
            y_prob = model.predict_proba(X_valid)[:,1]
            return roc_auc_score(y_valid, y_prob)

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials, show_progress_bar=False)
        best_params = study.best_params
        print("Optuna en iyi AUC:", study.best_value)
        print("Optuna en iyi parametreler:", best_params)
        return best_params

    # ----- Eşik -----
    def set_threshold(self, value: float):
        self.optimal_threshold = float(value)
        print(f"Eşik (threshold): {self.optimal_threshold:.2f}")

    # ----- Performans -----
    def get_model_performance(self, model_name):
        y_prob = self.models[model_name]['predictions']
        y_pred = (y_prob >= self.optimal_threshold).astype(int)
        print(f"\n=== {model_name} ===")
        print(f"Threshold: {self.optimal_threshold:.2f}")
        print("\nClassification Report:")
        print(classification_report(self.y_test, y_pred, digits=4))
        print("\nConfusion Matrix:")
        print(confusion_matrix(self.y_test, y_pred))
        return metrics_from_preds(self.y_test, y_pred, y_prob)

    # ----- Tahmin -----
    def predict(self, df_new, model_name, return_scores=True):
        df_fe = self.feature_engineering(df_new)
        X_new = self.prepare_features(df_fe)
        for c in self._last_train_columns:
            if c not in X_new.columns: X_new[c] = 0.0
        X_new = X_new[self._last_train_columns]
        model = self.models[model_name]['model']
        if isinstance(model, LogisticRegression):
            probs = model.predict_proba(self.scaler.transform(X_new))[:,1]
        else:
            probs = model.predict_proba(X_new)[:,1]
        preds = (probs >= self.optimal_threshold).astype(int)
        return (preds, probs) if return_scores else preds

    # ----- Hibrit eşik -----
    @staticmethod
    def apply_hybrid_thresholds(df, score_col="churn_score",
                                high_value_mask=None,
                                t_hv=0.35, t_lv=0.50):
        pred = np.zeros(len(df), dtype=int)
        if high_value_mask is None:
            high_value_mask = np.zeros(len(df), dtype=bool)
        pred[(~high_value_mask) & (df[score_col] >= t_lv)] = 1
        pred[( high_value_mask) & (df[score_col] >= t_hv)] = 1
        return pred

    # ----- Kampanya türü -----
    @staticmethod
    def assign_campaign(score):
        if score > 0.60: return "Güçlü Teşvik"
        if score >= 0.38: return "Düşük Maliyetli"
        return "Yok"

    # ----- Kaydet/Yükle -----
    def save(self, path):
        bundle = {
            "models": self.models,
            "feature_importance": self.feature_importance,
            "label_encoders": self.label_encoders,
            "scaler": self.scaler,
            "optimal_threshold": self.optimal_threshold,
            "feature_names": self.feature_names,
            "_last_train_columns": self._last_train_columns
        }
        joblib.dump(bundle, path); print(f"Model paketi kaydedildi: {path}"); return path

    @staticmethod
    def load(path):
        bundle = joblib.load(path)
        obj = BankingChurnPredictor()
        obj.models = bundle["models"]; obj.feature_importance = bundle["feature_importance"]
        obj.label_encoders = bundle["label_encoders"]; obj.scaler = bundle["scaler"]
        obj.optimal_threshold = bundle["optimal_threshold"]; obj.feature_names = bundle["feature_names"]
        obj._last_train_columns = bundle["_last_train_columns"]
        print(f"Model paketi yüklendi: {path}")
        return obj


In [16]:
# Oku
churn_df, enc = read_semicolon_csv(CHURN_PATH)
print("CHURN_DATA:", churn_df.shape, "| encoding:", enc)

# Bölge birleştir (varsa)
churn_df = merge_region(churn_df, BOLGE_PATH)

# Hedef
assert 'T+3_CHURN' in churn_df.columns, "T+3_CHURN yok!"
churn_df = churn_df[churn_df['T+3_CHURN'].notna()].copy()
churn_df['T+3_CHURN'] = pd.to_numeric(churn_df['T+3_CHURN'], errors='coerce').astype(int)

# Modelle
predictor = BankingChurnPredictor()
df_fe = predictor.feature_engineering(churn_df)
X = predictor.prepare_features(df_fe)
y = df_fe['T+3_CHURN'].astype(int).values

results = predictor.train_models(X, y, use_smote=True)
best_model_name = max(results.keys(), key=lambda k: results[k]['test_auc'])
print("En iyi model (baseline):", best_model_name, "| AUC:", results[best_model_name]['test_auc'])


CHURN_DATA: (3559, 44) | encoding: utf-8
Model eğitimi başlıyor...

XGBoost eğitiliyor...
Test AUC: 0.7232 | PR AUC: 0.5469
CV AUC : 0.8904 (+/- 0.0140)

LightGBM eğitiliyor...
Test AUC: 0.7051 | PR AUC: 0.5398
CV AUC : 0.8931 (+/- 0.0155)

RandomForest eğitiliyor...
Test AUC: 0.7360 | PR AUC: 0.5535
CV AUC : 0.8961 (+/- 0.0175)

LogisticRegression eğitiliyor...
Test AUC: 0.6258 | PR AUC: 0.4001
CV AUC : 0.6425 (+/- 0.0180)
En iyi model (baseline): RandomForest | AUC: 0.7360277569392348


In [17]:
# Sadece XGBoost için tuning yapalım; daha iyi ise en iyi model olarak güncelleriz.
best_params = predictor.tune_xgb_optuna(X, y, n_trials=40)  # istersen 60-100'e çıkar
xgb_best = xgb.XGBClassifier(**best_params, eval_metric="logloss", tree_method="hist", random_state=42)

# Train-test split & fit (SMOTE'lu aynı düzen)
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train_res, y_train_res = SMOTE(random_state=42).fit_resample(X_train, y_train)
xgb_best.fit(X_train_res, y_train_res)
y_prob = xgb_best.predict_proba(X_test)[:,1]

# Modeli predictor'a yaz
predictor.models["XGBoost_Tuned"] = {
    "model": xgb_best,
    "predictions": y_prob,
    "test_auc": float(roc_auc_score(y_test, y_prob)),
    "avg_precision": float(average_precision_score(y_test, y_prob)),
}

# En iyi modeli yeniden seç
best_model_name = max(predictor.models.keys(), key=lambda k: predictor.models[k]['test_auc'])
print("En iyi model (tuning sonrası):", best_model_name, "| AUC:", predictor.models[best_model_name]['test_auc'])


[I 2025-08-28 12:18:22,348] A new study created in memory with name: no-name-a4342964-ea5b-4dd8-9414-12e25b0e9c21
[I 2025-08-28 12:18:23,133] Trial 0 finished with value: 0.6845086271567893 and parameters: {'n_estimators': 377, 'max_depth': 8, 'learning_rate': 0.014633493067243535, 'subsample': 0.6075352454009557, 'colsample_bytree': 0.6244978530333996, 'min_child_weight': 7.542083754309131, 'reg_lambda': 0.9897523543085507, 'reg_alpha': 3.335319368837544}. Best is trial 0 with value: 0.6845086271567893.
[I 2025-08-28 12:18:23,745] Trial 1 finished with value: 0.6765566391597899 and parameters: {'n_estimators': 553, 'max_depth': 4, 'learning_rate': 0.023753223745868926, 'subsample': 0.9597223480715591, 'colsample_bytree': 0.7021790884551177, 'min_child_weight': 9.823237466207107, 'reg_lambda': 6.046390829478245, 'reg_alpha': 1.253785877998694}. Best is trial 0 with value: 0.6845086271567893.
[I 2025-08-28 12:18:25,558] Trial 2 finished with value: 0.6632408102025507 and parameters: {'n

Optuna en iyi AUC: 0.7064422355588897
Optuna en iyi parametreler: {'n_estimators': 354, 'max_depth': 10, 'learning_rate': 0.13372919088677368, 'subsample': 0.6870789117711058, 'colsample_bytree': 0.9775882799074648, 'min_child_weight': 1.0016221567780157, 'reg_lambda': 1.9718881895880513, 'reg_alpha': 1.185674845823839}
En iyi model (tuning sonrası): RandomForest | AUC: 0.7360277569392348


In [18]:
predictor.set_threshold(0.45)
global_perf = predictor.get_model_performance(best_model_name)
global_perf


Eşik (threshold): 0.45

=== RandomForest ===
Threshold: 0.45

Classification Report:
              precision    recall  f1-score   support

           0     0.7857    0.7540    0.7695       496
           1     0.4809    0.5256    0.5022       215

    accuracy                         0.6850       711
   macro avg     0.6333    0.6398    0.6359       711
weighted avg     0.6935    0.6850    0.6887       711


Confusion Matrix:
[[374 122]
 [102 113]]


{'precision': 0.4808510638297872,
 'recall': 0.5255813953488372,
 'f1': 0.5022222222222222,
 'FP': 122,
 'FN': 102,
 'confusion_matrix': array([[374, 122],
        [102, 113]]),
 'roc_auc': np.float64(0.7360277569392348),
 'pr_auc': np.float64(0.5534721545768642)}

In [19]:
# Skorları al
_, probs = predictor.predict(churn_df, model_name=best_model_name, return_scores=True)
work = churn_df.copy()
work["churn_score"] = probs
work["true_y"] = work["T+3_CHURN"]

# Segment (iş kuralına göre değiştirilebilir)
high_value = (work["LIMIT"] > 20000) | (work.get("TXN_TUTAR_SON_6AY", 0) > 10000)

# Hibrit karar
work["pred_hybrid"] = BankingChurnPredictor.apply_hybrid_thresholds(
    work, score_col="churn_score", high_value_mask=high_value, t_hv=0.35, t_lv=0.50
)

# Global karar (0.45)
work["pred_global045"] = (work["churn_score"] >= 0.45).astype(int)

# Kampanya türü (skora göre)
work["campaign_type"] = work["churn_score"].apply(BankingChurnPredictor.assign_campaign)

# Metrikler
hyb_perf = metrics_from_preds(work["true_y"], work["pred_hybrid"], work["churn_score"])
glob_perf = metrics_from_preds(work["true_y"], work["pred_global045"], work["churn_score"])

import pandas as pd
cmp = pd.DataFrame([
    {"setup":"Global_0.45", **{k:glob_perf[k] for k in ["precision","recall","f1","FP","FN"]}},
    {"setup":"Hybrid(HV=0.35/LV=0.50)", **{k:hyb_perf[k] for k in ["precision","recall","f1","FP","FN"]}},
])
cmp


Unnamed: 0,setup,precision,recall,f1,FP,FN
0,Global_0.45,0.886758,0.904097,0.895343,124,103
1,Hybrid(HV=0.35/LV=0.50),0.822368,0.931099,0.873362,216,74


In [21]:
# --- 1) Gerekli skor ve global karar (varsa atla) ---
assert "churn_score" in work.columns, "Önce work['churn_score'] kolonunu üretmelisin."

work["churn_pred_global_045"] = (work["churn_score"] >= 0.45).astype(int)

# --- 2) Segment maskesi (iş kuralına göre dilersen değiştir) ---
high_value = (work["LIMIT"] > 20000) | (work.get("TXN_TUTAR_SON_6AY", 0) > 10000)

# --- 3) Segment bazlı eşik ile karar üret ---
# (eğer sınıfta apply_hybrid_thresholds fonksiyonu yoksa aşağıdaki elif bloğu alternatif yolu çalıştırır)
if hasattr(BankingChurnPredictor, "apply_hybrid_thresholds"):
    work["churn_pred_segmented"] = BankingChurnPredictor.apply_hybrid_thresholds(
        work, score_col="churn_score", high_value_mask=high_value, t_hv=0.35, t_lv=0.50
    )
else:
    # Alternatif: fonksiyon yoksa doğrudan kuralla üret
    pred = np.zeros(len(work), dtype=int)
    pred[ high_value & (work["churn_score"] >= 0.35)] = 1
    pred[~high_value & (work["churn_score"] >= 0.50)] = 1
    work["churn_pred_segmented"] = pred

# --- 4) Kampanya tipi (yoksa üret) ---
if "campaign_type" not in work.columns:
    def _assign_campaign(score):
        if score > 0.60: return "Güçlü Teşvik"
        if score >= 0.38: return "Düşük Maliyetli"
        return "Yok"
    work["campaign_type"] = work["churn_score"].apply(_assign_campaign)

# --- 5) Çıktıyı güvenli şekilde kaydet ---
id_cols = [c for c in ["ID", "KART_NO", "MUSTERI_ID"] if c in work.columns]
cols_out = id_cols + ["churn_score", "churn_pred_global_045", "churn_pred_segmented", "campaign_type"]

missing = [c for c in cols_out if c not in work.columns]
assert not missing, f"Eksik kolon(lar): {missing}. Üstteki adımları kontrol et."

out_path = os.path.join(BASE_DIR, "churn_scored_segmented.csv")
work[cols_out].to_csv(out_path, index=False)
print("Kaydedildi:", out_path)

# (opsiyonel) hızlı kontrol
print(work[cols_out].head(3))


Kaydedildi: /content/churn_scored_segmented.csv
         ID   KART_NO  churn_score  churn_pred_global_045  \
0  383696.0  198542.0     0.098089                      0   
1   83241.0   97042.0     0.143304                      0   
2  340367.0  182196.0     0.069935                      0   

   churn_pred_segmented campaign_type  
0                     0           Yok  
1                     0           Yok  
2                     0           Yok  


In [22]:
id_cols = [c for c in ['ID','KART_NO','MUSTERI_ID'] if c in work.columns]
cols_out = id_cols + ["churn_score","pred_global045","pred_hybrid","campaign_type"]
out_path = os.path.join(BASE_DIR, "churn_scored_hybrid.csv")
work[cols_out].to_csv(out_path, index=False)
print("Kaydedildi:", out_path)


Kaydedildi: /content/churn_scored_hybrid.csv
