In [1]:
# train_model.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import joblib

# 1. Veriyi Y√ºkle
df = pd.read_csv("CC GENERAL.csv")

# 2. Veri √ñn ƒ∞≈üleme
# CUST_ID gereksiz, atalƒ±m
if 'CUST_ID' in df.columns:
    df.drop('CUST_ID', axis=1, inplace=True)

# Eksik verileri doldurma (Medyan ile)
df['MINIMUM_PAYMENTS'].fillna(df['MINIMUM_PAYMENTS'].median(), inplace=True)
df['CREDIT_LIMIT'].fillna(df['CREDIT_LIMIT'].median(), inplace=True)

# √ñl√ßeklendirme (StandardScaler)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

# 3. En ƒ∞yi K Deƒüerini Bulma (Silhouette Score ile)
best_k = 2
best_score = -1
best_model = None

print("En iyi k√ºme sayƒ±sƒ± aranƒ±yor (Bu i≈ülem biraz s√ºrebilir)...")
for k in range(2, 7): # 2 ile 6 k√ºme arasƒ±nƒ± dene
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    score = silhouette_score(X_scaled, labels)
    
    print(f"k={k} i√ßin Silhouette Score: {score:.4f}")
    
    if score > best_score:
        best_score = score
        best_k = k
        best_model = kmeans

print("-" * 30)
print(f"üèÜ EN ƒ∞Yƒ∞ MODEL: k={best_k}")
print(f"üìä BA≈ûARI SKORU (Silhouette): {best_score:.4f}")
print("Yorum: 1.0'a ne kadar yakƒ±nsa o kadar iyi, 0'a yakƒ±nsa k√ºmeler birbirine girmi≈ü demektir.")

# 4. En ƒ∞yi Modeli Uygula ve Kaydet
df['Cluster'] = best_model.labels_

# PCA ile 2 boyuta indirge (G√∂rselle≈ütirme i√ßin)
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_scaled)
df['PCA1'] = principal_components[:, 0]
df['PCA2'] = principal_components[:, 1]

# Dosyalarƒ± Kaydet
df.to_csv("credit_card_clustered.csv", index=False)
joblib.dump(best_model, "cc_kmeans_model.pkl")
joblib.dump(scaler, "cc_scaler.pkl")

# Skoru da bir dosyaya yazalƒ±m ki uygulamada g√∂sterebilelim
with open("model_score.txt", "w") as f:
    f.write(f"{best_score:.4f}")

print("Model ve i≈ülenmi≈ü veriler kaydedildi.")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['MINIMUM_PAYMENTS'].fillna(df['MINIMUM_PAYMENTS'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['CREDIT_LIMIT'].fillna(df['CREDIT_LIMIT'].median(), inplace=True)


En iyi k√ºme sayƒ±sƒ± aranƒ±yor (Bu i≈ülem biraz s√ºrebilir)...
k=2 i√ßin Silhouette Score: 0.2100
k=3 i√ßin Silhouette Score: 0.2510
k=4 i√ßin Silhouette Score: 0.1977
k=5 i√ßin Silhouette Score: 0.1931
k=6 i√ßin Silhouette Score: 0.2029
------------------------------
üèÜ EN ƒ∞Yƒ∞ MODEL: k=3
üìä BA≈ûARI SKORU (Silhouette): 0.2510
Yorum: 1.0'a ne kadar yakƒ±nsa o kadar iyi, 0'a yakƒ±nsa k√ºmeler birbirine girmi≈ü demektir.
Model ve i≈ülenmi≈ü veriler kaydedildi.
