In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

import pandas as pd

# Veri dosyasını oku
df = pd.read_excel("/content/finalemlak2.xlsx")


# Temizleme işlemleri
df['subCategory'] = df['subCategory'].replace("İmarlı - Konut", "Bahçe")
df['subCategory'] = df['subCategory'].replace("Konut+Ticaret", "Bahçe")
df['subCategory'] = df['subCategory'].replace("Köy Evi", "Müstakil Ev")
df['subCategory'] = df['subCategory'].replace("Bahçe", "Tarla")
df['subCategory'] = df['subCategory'].replace("Bungalov", "Villa")
df['subCategory'] = df['subCategory'].replace("Köşk", "Villa")
df['subCategory'] = df['subCategory'].replace("Prefabrik", "Müstakil Ev")
df['subCategory'] = df['subCategory'].replace("Çiftlik Evi", "Villa")

df.loc[(df['subCategory'] == 'Yazlık') & (df['price'] > 2_500_000), 'subCategory'] = 'Villa'
df.loc[(df['subCategory'] == 'Yazlık') & (df['price'] <= 2_500_000), 'subCategory'] = 'Müstakil Ev'


df.loc[df['subCategory'] == "Tarla", 'residence'] = "Tarla"


#bina atamalari
df.loc[df['residence'] == "Müstakil Ev", 'subCategory'] = "Müstakil Ev"
df.loc[df['residence'] == "Daire", 'subCategory'] = "Daire"
df.loc[df['residence'] == "Tripleks", 'subCategory'] = "Villa"
df['subCategory'] = df['subCategory'].replace("Bina", "Villa")

#ozellik ekleme
df['toplam_oda'] = df['room'] + df['livingRoom'] + df['bathRoom']


# netSqm_div_price hesapla
df['netSqm_div_price'] = df['price'] / df['netSqm']

# Filtre: subCategory 'Tarla' değilse, netSqm_div_price 1000-60000 arası olsun
mask = (df['subCategory'] != "Tarla") & (df['netSqm_div_price'] > 1000) & (df['netSqm_div_price'] < 60000)

# Tarla olanlar ise olduğu gibi bırak
mask_tarla = (df['subCategory'] == "Tarla")

# İki koşulu birleştir
df = df[mask | mask_tarla]

df = df.drop(columns=['netSqm_div_price']) #ezberleme olmasin diye sil

# Aykırı değer temizleme
Q1 = df['price'].quantile(0.05)
Q3 = df['price'].quantile(0.95)
IQR = Q3 - Q1
mask = (df['price'] >= Q1 - 1.5 * IQR) & (df['price'] <= Q3 + 1.5 * IQR)
df = df[mask]


# Alt kategorilere göre böl
sub_df_dict = {subcat: df[df['subCategory'] == subcat].copy() for subcat in df['subCategory'].unique()}

# Kategorik ve sayısal sütunlar
categorical_cols = df.select_dtypes(include='object').drop(columns=['subCategory']).columns.tolist()
numeric_cols = df.select_dtypes(include='number').drop(columns=['price']).columns.tolist()

# Model listesi
models = {
    'DecisionTree': DecisionTreeRegressor(    max_depth=6,
    min_samples_leaf=4,
    min_samples_split=2,
    random_state=42
                                              ),
    'RandomForest': RandomForestRegressor(),
    'LinearRegression': LinearRegression(),
    'KNN': KNeighborsRegressor(),
}

# Sonuçları tut
results = []

for subcat, sub_df in sub_df_dict.items():
    if len(sub_df) < 2:
        print(f"⚠️  {subcat} kategorisi atlandı (yetersiz veri: {len(sub_df)} satır)")
        continue

    print(f"\n🔍 Alt Kategori: {subcat}")

    X = sub_df.drop(columns=['price'])
    y = sub_df['price']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Sayısal ve kategorik sütunları ayarla (alt kategoriye özel)
    categorical_cols = X.select_dtypes(include='object').columns.tolist()
    numeric_cols = X.select_dtypes(include='number').columns.tolist()

    # Ön işleme
    preprocessor = ColumnTransformer(transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

    for model_name in models:
        # Eğitim kümesindeki örnek sayısı
        n_samples_train = len(X_train)

        # Dinamik model oluştur (KNN özel durumu)
        if model_name == 'KNN':
            neighbors = min(5, n_samples_train)
            model = KNeighborsRegressor(n_neighbors=neighbors)
        else:
            model = models[model_name]

        # Pipeline oluştur
        pipeline = Pipeline(steps=[
            ('preprocess', preprocessor),
            ('model', model)
        ])

        # Eğit ve tahmin et
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)

        # Metrikler
        r2 = r2_score(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        mean_price = y_test.mean()
        perc_error = (mae / mean_price) * 100 if mean_price != 0 else None

        # Sonuçları kaydet
        results.append({
         'AltKategori': subcat,
         'Model': model_name,
         'R2_Score': round(r2, 4),
         'MAE (Milyon)': round(mae / 1_000_000, 4),
         'MAE_Yuzde': round(perc_error, 2) if perc_error is not None else None
})

# Sonuçları DataFrame'e çevir
result_df = pd.DataFrame(results)

# Tablo olarak yazdır
from tabulate import tabulate
print(tabulate(result_df, headers='keys', tablefmt='fancy_grid'))





🔍 Alt Kategori: Daire

🔍 Alt Kategori: Villa

🔍 Alt Kategori: Müstakil Ev

🔍 Alt Kategori: Tarla
╒════╤═══════════════╤══════════════════╤════════════╤════════════════╤═════════════╕
│    │ AltKategori   │ Model            │   R2_Score │   MAE (Milyon) │   MAE_Yuzde │
╞════╪═══════════════╪══════════════════╪════════════╪════════════════╪═════════════╡
│  0 │ Daire         │ DecisionTree     │     0.6192 │         0.4876 │       18.44 │
├────┼───────────────┼──────────────────┼────────────┼────────────────┼─────────────┤
│  1 │ Daire         │ RandomForest     │     0.7576 │         0.3924 │       14.84 │
├────┼───────────────┼──────────────────┼────────────┼────────────────┼─────────────┤
│  2 │ Daire         │ LinearRegression │     0.7133 │         0.4438 │       16.79 │
├────┼───────────────┼──────────────────┼────────────┼────────────────┼─────────────┤
│  3 │ Daire         │ KNN              │     0.5727 │         0.5219 │       19.74 │
├────┼───────────────┼──────────────────┼─

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_absolute_error

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

# Veriyi oku
df = pd.read_excel("/content/finalemlak2.xlsx")


# Temizleme işlemleri
df['subCategory'] = df['subCategory'].replace("İmarlı - Konut", "Bahçe")
df['subCategory'] = df['subCategory'].replace("Konut+Ticaret", "Bahçe")
df['subCategory'] = df['subCategory'].replace("Köy Evi", "Müstakil Ev")
df['subCategory'] = df['subCategory'].replace("Bahçe", "Tarla")
df['subCategory'] = df['subCategory'].replace("Bungalov", "Villa")
df['subCategory'] = df['subCategory'].replace("Köşk", "Villa")
df['subCategory'] = df['subCategory'].replace("Prefabrik", "Müstakil Ev")
df['subCategory'] = df['subCategory'].replace("Çiftlik Evi", "Villa")

df.loc[(df['subCategory'] == 'Yazlık') & (df['price'] > 2_500_000), 'subCategory'] = 'Villa'
df.loc[(df['subCategory'] == 'Yazlık') & (df['price'] <= 2_500_000), 'subCategory'] = 'Müstakil Ev'


df.loc[df['subCategory'] == "Tarla", 'residence'] = "Tarla"


#bina atamalari
df.loc[df['residence'] == "Müstakil Ev", 'subCategory'] = "Müstakil Ev"
df.loc[df['residence'] == "Daire", 'subCategory'] = "Daire"
df.loc[df['residence'] == "Tripleks", 'subCategory'] = "Villa"
df['subCategory'] = df['subCategory'].replace("Bina", "Villa")

#ozellik ekleme
df['toplam_oda'] = df['room'] + df['livingRoom'] + df['bathRoom']


# netSqm_div_price hesapla
df['netSqm_div_price'] = df['price'] / df['netSqm']

# Filtre: subCategory 'Tarla' değilse, netSqm_div_price 1000-60000 arası olsun
mask = (df['subCategory'] != "Tarla") & (df['netSqm_div_price'] > 1000) & (df['netSqm_div_price'] < 60000)

# Tarla olanlar ise olduğu gibi bırak
mask_tarla = (df['subCategory'] == "Tarla")

# İki koşulu birleştir
df = df[mask | mask_tarla]

df = df.drop(columns=['netSqm_div_price']) #ezberleme olmasin diye sil

# Aykırı değer temizleme
Q1 = df['price'].quantile(0.05)
Q3 = df['price'].quantile(0.95)
IQR = Q3 - Q1
mask = (df['price'] >= Q1 - 1.5 * IQR) & (df['price'] <= Q3 + 1.5 * IQR)
df = df[mask]

# --- Model listesi ---
model_map = {
    'DecisionTree': DecisionTreeRegressor(),
    'RandomForest': RandomForestRegressor(),
    'LinearRegression': LinearRegression(),
    'KNN': KNeighborsRegressor()
}

# --- Her alt kategoriye karşılık en iyi model ismi ---
best_model_dict = {
    'Daire': 'RandomForest',
    'Villa': 'RandomForest',
    'Müstakil Ev': 'RandomForest',
    'Tarla': 'LinearRegression',
}


# --- Sonuçları sakla ---
final_results = []

# --- Alt kategorilere göre işle ---
for subcat, group in df.groupby("subCategory"):
    if subcat not in best_model_dict or len(group) < 2:
        continue

    model_name = best_model_dict[subcat]
    model = model_map[model_name]

    X = group.drop(columns=["price"])
    y = group["price"]

    # Train/test böl
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Sütun türlerine göre ayır
    categorical_cols = X.select_dtypes(include='object').columns.tolist()
    numeric_cols = X.select_dtypes(include='number').columns.tolist()

    # Ön işlemci
    preprocessor = ColumnTransformer([
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

    # Pipeline kur
    pipe = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('model', model)
    ])

    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mean_price = y_test.mean()
    mae_percent = (mae / mean_price) * 100 if mean_price != 0 else None

    final_results.append({
        "AltKategori": subcat,
        "UygulananModel": model_name,
        "R2_Score": round(r2, 4),
        "MAE (Milyon)": round(mae / 1_000_000, 4),
        "MAE_Yuzde": round(mae_percent, 2)
    })

# Sonuçları göster
result_df = pd.DataFrame(final_results)
print(result_df)

# ---------------- GENEL ORTALAMA METRİKLER ----------------
# Her kategoriye karşılık düşen örnek sayısını al
sample_counts = df['subCategory'].value_counts().to_dict()

# Her kategoriye karşılık veri sayısı ekleyelim
result_df["Sample_Count"] = result_df["AltKategori"].map(sample_counts)

# Ağırlıklı ortalamaları hesapla
total_samples = result_df["Sample_Count"].sum()

weighted_r2 = (result_df["R2_Score"] * result_df["Sample_Count"]).sum() / total_samples
weighted_mae = (result_df["MAE (Milyon)"] * result_df["Sample_Count"]).sum() / total_samples
weighted_mae_yuzde = (result_df["MAE_Yuzde"] * result_df["Sample_Count"]).sum() / total_samples

# Yazdır
print("\n📊 AĞIRLIKLI (GERÇEKÇİ) GENEL PERFORMANS:")
print(f"🔹 Ağırlıklı Ortalama R²           : {weighted_r2:.4f}")
print(f"🔹 Ağırlıklı Ortalama MAE (Milyon) : {weighted_mae:.4f}")
print(f"🔹 Ağırlıklı Ortalama MAE (%)      : {weighted_mae_yuzde:.2f}")


   AltKategori    UygulananModel  R2_Score  MAE (Milyon)  MAE_Yuzde
0        Daire      RandomForest    0.7797        0.3798      14.37
1  Müstakil Ev      RandomForest    0.2633        1.8413      29.74
2        Tarla  LinearRegression    0.7463        0.4194      15.53
3        Villa      RandomForest    0.6835        1.0600      17.75

📊 AĞIRLIKLI (GERÇEKÇİ) GENEL PERFORMANS:
🔹 Ağırlıklı Ortalama R²           : 0.7222
🔹 Ağırlıklı Ortalama MAE (Milyon) : 0.6378
🔹 Ağırlıklı Ortalama MAE (%)      : 16.20


In [None]:
# -------------------- SAMPLE TAHMİN --------------------
sample_data = {
    'district': 'Neviye',
    'county': 'Arifiye',
    'residence': 'Daire',
    'heating': 'Yerden Isıtma',
    'fuel': 'Doğalgaz',
    'build': 'Betonarme',
    'buildState': 'Sıfır',
    'furnished': 'False',
    'subCategory': 'Daire',
    'netSqm': 95,
    'grossSqm': 110,
    'age': 0,
    'room': 2,
    'bathRoom': 2,
    'livingRoom': 1,
    'bahceli': 1,
    'bina_kat_sayisi': 3,
    'daire_kati': 2,
    'kuzey': 1,
    'guney': 1,
    'bati': 1,
    'dogu': 1,
    'goldeniz': 0,
    'bahceli': 0,
    'havuzlu': 0
}
# 🔄 UYUMLULUK için kategoriyi düzelt
sample_data['subCategory'] = 'Daire'

# DataFrame'e çevir
sample_df = pd.DataFrame([sample_data])

# Eğitim verisini al
subcat = sample_df['subCategory'].iloc[0]
model_name = best_model_dict[subcat]
model = model_map[model_name]
train_group = df[df['subCategory'] == subcat]
X_train = train_group.drop(columns=["price"])
y_train = train_group["price"]

# Eksik sütunları tamamla
for col in X_train.columns:
    if col not in sample_df.columns:
        sample_df[col] = 'Hayır' if X_train[col].dtype == 'object' else 0

sample_df = sample_df[X_train.columns]

# Sütun ayır ve pipeline oluştur
categorical_cols = X_train.select_dtypes(include='object').columns.tolist()
numeric_cols = X_train.select_dtypes(include='number').columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

pipe = Pipeline(steps=[
    ('preprocess', preprocessor),
    ('model', model)
])

# Eğit ve tahmin et
pipe.fit(X_train, y_train)
predicted_price = pipe.predict(sample_df)[0]

print(f"🏠 Bu örnek dairenin tahmini fiyatı: {round(predicted_price):,} TL")


🏠 Bu örnek dairenin tahmini fiyatı: 2,869,840 TL
