In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd

PATH = "/content/drive/MyDrive/islemler_final.csv"

df = pd.read_csv(
    PATH,
    sep=";",              # TR Excel çoğunlukla ; kullanır
    encoding="utf-8-sig",
    decimal=",",          # ondalık virgül olasılığı
    low_memory=False
)

df.head()


In [None]:
df.info()


In [None]:
import pandas as pd

# Güvenli kopya
df_clean = df.copy()

# 1) ID alanlarını int yap
id_cols = ["islem_id", "kullanici_id"]
for col in id_cols:
    df_clean[col] = df_clean[col].astype(int)

# 2) tarih + saat -> datetime
df_clean["tarih"] = pd.to_datetime(df_clean["tarih"], errors="coerce")

df_clean["datetime"] = pd.to_datetime(
    df_clean["tarih"].dt.strftime("%Y-%m-%d") + " " + df_clean["saat"],
    errors="coerce"
)

# Saat feature'ı çıkar
df_clean["hour"] = df_clean["datetime"].dt.hour

# 3) tutar eksik değerleri doldur (median = en güvenlisi)
df_clean["tutar"] = df_clean["tutar"].fillna(df_clean["tutar"].median())

# 4) binary alanları int yap
binary_cols = [
    "ayni_gun_islem_sayisi",
    "son_30dk_islem_sayisi",
    "daha_once_reddedildi_mi",
    "fraud"
]
for col in binary_cols:
    df_clean[col] = df_clean[col].astype(int)

# Kontrol
df_clean.info()


In [None]:
# hour null olanları median saat ile doldur
median_hour = df_clean["hour"].median()

df_clean["hour"] = df_clean["hour"].fillna(median_hour)

# datetime null olanları tarih + hour ile yeniden oluştur
df_clean["datetime"] = df_clean["datetime"].fillna(
    df_clean["tarih"] + pd.to_timedelta(df_clean["hour"], unit="h")
)

# kontrol
df_clean[["datetime", "hour"]].isnull().sum()


In [None]:
df_clean.shape


In [None]:
df_clean.columns


In [None]:
df_clean.dtypes


In [None]:
df_clean.isnull().sum()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,4))
plt.bar(df_clean.columns, df_clean.isnull().sum())
plt.xticks(rotation=90)
plt.title("Kolon Bazlı Eksik Veri Sayısı")
plt.show()


In [None]:
plt.figure(figsize=(6,4))
df_clean["tutar"].hist(bins=50)
plt.title("İşlem Tutarı Dağılımı")
plt.xlabel("Tutar")
plt.ylabel("Frekans")
plt.show()


In [None]:
plt.figure(figsize=(4,4))
plt.boxplot(df_clean["tutar"])
plt.title("İşlem Tutarı Boxplot")
plt.ylabel("Tutar")
plt.show()


In [None]:
q1 = df_clean["tutar"].quantile(0.25)
q3 = df_clean["tutar"].quantile(0.75)
iqr = q3 - q1

lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr

df_clean[(df_clean["tutar"] < lower) | (df_clean["tutar"] > upper)].shape


In [None]:
df_clean["hour"].value_counts().sort_index().plot(kind="bar")
plt.title("Saatlere Göre İşlem Sayısı")
plt.xlabel("Saat")
plt.ylabel("İşlem Sayısı")
plt.show()


In [None]:
df_clean.groupby("hour")["tutar"].mean().plot()
plt.title("Saatlere Göre Ortalama Tutar")
plt.xlabel("Saat")
plt.ylabel("Ortalama Tutar")
plt.show()


In [None]:
df_clean["cihaz_tipi"].value_counts().plot(kind="bar")
plt.title("Cihaz Tipine Göre İşlem Sayısı")
plt.show()


In [None]:
df_clean["banka_tipi"].value_counts().plot(kind="bar")
plt.title("Banka Tipine Göre İşlem Sayısı")
plt.show()


In [None]:
df_clean["islem_kategorisi"].value_counts().plot(kind="bar")
plt.title("İşlem Kategorisine Göre Dağılım")
plt.show()


In [None]:
import seaborn as sns

plt.figure(figsize=(10,6))
sns.heatmap(df_clean.select_dtypes(include=["int64","float64"]).corr(), annot=True, cmap="coolwarm")
plt.title("Sayısal Değişkenler Korelasyon Matrisi")
plt.show()


In [None]:
X = df_clean.drop(columns=["fraud", "datetime", "tarih", "saat"])
y = df_clean["fraud"]


In [None]:
X = pd.get_dummies(X, drop_first=True)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

y_pred_lr = log_reg.predict(X_test)

print("Logistic Regression Sonuçları")
print(classification_report(y_test, y_pred_lr))


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    class_weight="balanced"
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Random Forest Sonuçları")
print(classification_report(y_test, y_pred_rf))


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)

print("Gradient Boosting Sonuçları")
print(classification_report(y_test, y_pred_gb))


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

def get_metrics(y_true, y_pred):
    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1": f1_score(y_true, y_pred)
    }

comparison = pd.DataFrame({
    "Logistic Regression": get_metrics(y_test, y_pred_lr),
    "Random Forest": get_metrics(y_test, y_pred_rf),
    "Gradient Boosting": get_metrics(y_test, y_pred_gb)
})

comparison


In [None]:
import joblib

joblib.dump(gb, "/content/drive/MyDrive/4LU_PROJE/fraud_model.joblib")
print("Nihai model (Gradient Boosting) kaydedildi.")


In [None]:
import os

os.makedirs("/content/drive/MyDrive/4LU_PROJE", exist_ok=True)


In [None]:
import joblib

joblib.dump(gb, "/content/drive/MyDrive/4LU_PROJE/fraud_model.joblib")
print("Nihai model (Gradient Boosting) kaydedildi.")
