In [None]:
#!pip install tensorflow
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

def preprocess_dataset_train(file_path):
    """
    Train veri setini hazÄ±rlar: Eksik deÄŸerleri doldurur, kategorik deÄŸiÅŸkenleri iÅŸler,
    log dÃ¶nÃ¼ÅŸÃ¼mÃ¼ uygular, dÃ¼ÅŸÃ¼k korelasyonlu sÃ¼tunlarÄ± temizler ve IQR yÃ¶ntemiyle aykÄ±rÄ±
    deÄŸerleri sÄ±nÄ±rlar.
    """
    # ðŸ“Œ 1. CSV dosyasÄ±nÄ± oku
    df = pd.read_csv(file_path)
    print(f"âœ… Veri yÃ¼klendi: {df.shape}")

    # ðŸŸ¢ 2. Log dÃ¶nÃ¼ÅŸÃ¼mÃ¼nÃ¼ uygula (Ã–NEMLÄ°!)
    df["SalePrice"] = np.log1p(df["SalePrice"])
    print("âœ… Log dÃ¶nÃ¼ÅŸÃ¼mÃ¼ uygulandÄ±!")

    # ðŸŸ¢ 3. Eksik deÄŸerleri doldur
    df["GarageYrBlt"].fillna(0, inplace=True)
    df["LotFrontage"] = df.groupby("Neighborhood")["LotFrontage"].transform(lambda x: x.fillna(x.median()))
    df["MasVnrArea"].fillna(0, inplace=True)
    print("âœ… Eksik deÄŸerler dolduruldu!")

    # ðŸŸ¢ 4. Ordinal Encoding Uygula
    ordinal_features = {
        "ExterQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "ExterCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "BsmtQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "BsmtCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "KitchenQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "FireplaceQu": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "GarageQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "GarageCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "PoolQC": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "NA": 0},
        "Fence": {"GdPrv": 4, "MnPrv": 3, "GdWo": 2, "MnWw": 1, "NA": 0}
    }
    
    for col, mapping in ordinal_features.items():
        df[col] = df[col].map(mapping)

    print("âœ… Ordinal Encoding tamamlandÄ±!")

    # ðŸŸ¢ 5. One-Hot Encoding
    categorical_cols = df.select_dtypes(include=['object']).columns
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
    print("âœ… One-Hot Encoding tamamlandÄ±!")

    # ðŸŸ¢ 6. SalePrice ile dÃ¼ÅŸÃ¼k korelasyonlu sÃ¼tunlarÄ± kaldÄ±r
    correlation_threshold = 0.05
    corr_with_saleprice = df.corr()["SalePrice"].abs()
    low_corr_features = corr_with_saleprice[corr_with_saleprice < correlation_threshold].index
    df.drop(columns=low_corr_features, inplace=True)
    print(f"âœ… DÃ¼ÅŸÃ¼k korelasyonlu {len(low_corr_features)} sÃ¼tun kaldÄ±rÄ±ldÄ±!")

    # ðŸŸ¢ 7. SayÄ±sal deÄŸiÅŸkenlerdeki eksik deÄŸerleri MEDIAN ile doldur
    numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
    missing_before = df[numerical_cols].isna().sum().sum()
    df[numerical_cols] = df[numerical_cols].apply(lambda x: x.fillna(x.median()))
    print(f"âœ… SayÄ±sal eksik deÄŸerler median ile dolduruldu! Toplam deÄŸiÅŸtirilen hÃ¼cre sayÄ±sÄ±: {missing_before}")

    # ðŸŸ¢ 8. AykÄ±rÄ± DeÄŸerleri IQR ile KÄ±rp
    def remove_outliers_iqr(df, columns):
        for col in columns:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            df[col] = np.clip(df[col], lower_bound, upper_bound)
        return df

    df = remove_outliers_iqr(df, numerical_cols)
    print("âœ… AykÄ±rÄ± deÄŸerler IQR yÃ¶ntemiyle kÄ±rpÄ±ldÄ±!")

    return df

In [None]:
# Train veri setini iÅŸle
train_prepared = preprocess_dataset_train("train.csv")

In [None]:
from sklearn.linear_model import BayesianRidge, ARDRegression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def train_cnn(train_df):
    """
    Bayesian Ridge, ARD Regression ve Gaussian Process Regression kullanarak eÄŸitim yapar.
    """
    y = train_df["SalePrice"]
    X = train_df.drop(columns=["SalePrice"])

    # Train-Test AyÄ±rma
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    # SayÄ±sal veriyi Ã¶lÃ§eklendir
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # ðŸ“Œ **1. Bayesian Ridge Regression**
    bayesian_ridge = BayesianRidge()
    bayesian_ridge.fit(X_train_scaled, y_train)

    # ðŸ“Œ **2. ARD Regression**
    ard_regression = ARDRegression()
    ard_regression.fit(X_train_scaled, y_train)

    # ðŸ“Œ **3. Gaussian Process Regression (GPR)**
    kernel = RationalQuadratic()  # GPR iÃ§in uygun kernel seÃ§imi
    gpr = GaussianProcessRegressor(kernel=kernel, random_state=42)
    gpr.fit(X_train_scaled, y_train)

    # ðŸ“Œ **Model Tahminleri (Averaging)**
    y_pred_ridge = bayesian_ridge.predict(X_test_scaled)
    y_pred_ard = ard_regression.predict(X_test_scaled)
    y_pred_gpr = gpr.predict(X_test_scaled)

    # 3 modelin ortalamasÄ±nÄ± alarak ensembled tahmin yapalÄ±m
    final_preds = (y_pred_ridge + y_pred_ard + y_pred_gpr) / 3

    # R^2 skorlarÄ±nÄ± yazdÄ±r
    r2_ridge = r2_score(y_test, y_pred_ridge)
    r2_ard = r2_score(y_test, y_pred_ard)
    r2_gpr = r2_score(y_test, y_pred_gpr)
    r2_final = r2_score(y_test, final_preds)

    print(f"âœ… Bayesian Ridge RÂ²: {r2_ridge:.4f}")
    print(f"âœ… ARD Regression RÂ²: {r2_ard:.4f}")
    print(f"âœ… Gaussian Process Regression RÂ²: {r2_gpr:.4f}")
    print(f"âœ… Ensemble Model RÂ²: {r2_final:.4f}")

    # Modeli dÃ¶ndÃ¼r
    return {
        "bayesian_ridge": bayesian_ridge,
        "ard_regression": ard_regression,
        "gpr": gpr,
        "scaler": scaler
    }


In [None]:
import numpy as np
import pandas as pd

def preprocess_dataset_test(file_path, train_columns):
    """
    Test veri setini iÅŸler ve train setindeki sÃ¼tunlarla uyumlu hale getirir.
    - Eksik deÄŸerleri doldurur.
    - Kategorik deÄŸiÅŸkenleri iÅŸler.
    - One-Hot Encoding uygular.
    - Eksik sÃ¼tunlarÄ± sÄ±fÄ±r ile doldurur.
    """
    # ðŸ“Œ 1. CSV dosyasÄ±nÄ± oku
    df = pd.read_csv(file_path)
    print(f"âœ… Test verisi yÃ¼klendi: {df.shape}")

    # ðŸŸ¢ 2. GarageYrBlt, LotFrontage ve MasVnrArea eksik deÄŸerlerini doldur
    df["GarageYrBlt"].fillna(0, inplace=True)
    df["LotFrontage"] = df.groupby("Neighborhood")["LotFrontage"].transform(lambda x: x.fillna(x.median()))
    df["MasVnrArea"].fillna(0, inplace=True)

    print("âœ… Eksik deÄŸerler dolduruldu!")

    # ðŸŸ¢ 3. Ordinal Encoding Uygula
    ordinal_features = {
        "ExterQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "ExterCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "BsmtQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "BsmtCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "KitchenQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1},
        "FireplaceQu": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "GarageQual": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "GarageCond": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po": 1, "NA": 0},
        "PoolQC": {"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "NA": 0},
        "Fence": {"GdPrv": 4, "MnPrv": 3, "GdWo": 2, "MnWw": 1, "NA": 0}
    }
    
    for col, mapping in ordinal_features.items():
        if col in df.columns:
            df[col] = df[col].map(mapping)

    print("âœ… Ordinal Encoding tamamlandÄ±!")

    # ðŸŸ¢ 4. One-Hot Encoding Uygula
    categorical_cols = df.select_dtypes(include=['object']).columns
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
    print("âœ… One-Hot Encoding tamamlandÄ±!")

    # ðŸŸ¢ 5. Train veri setindeki sÃ¼tunlarla test verisini uyumlu hale getir
    missing_cols = set(train_columns) - set(df.columns)
    for col in missing_cols:
        df[col] = 0  # Eksik sÃ¼tunlarÄ± sÄ±fÄ±r ile doldur
    
    df = df[train_columns]  # Fazla olan sÃ¼tunlarÄ± kaldÄ±r

    print(f"âœ… Test seti, train setiyle uyumlu hale getirildi! Yeni ÅŸekil: {df.shape}")

    # ðŸŸ¢ 6. Eksik SayÄ±sal DeÄŸerleri Median ile Doldur
    numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
    missing_before = df[numerical_cols].isna().sum().sum()
    df[numerical_cols] = df[numerical_cols].apply(lambda x: x.fillna(x.median()))
    print(f"âœ… SayÄ±sal eksik deÄŸerler median ile dolduruldu! Toplam deÄŸiÅŸtirilen hÃ¼cre sayÄ±sÄ±: {missing_before}")

    return df

In [None]:
import numpy as np
import pandas as pd

def test_cnn(models, test_df, test_csv_path, output_csv="bayesian-submission.csv"):
    """
    Bayesian model kombinasyonlarÄ± ile test verisini tahmin eder.
    Log dÃ¶nÃ¼ÅŸÃ¼mÃ¼nÃ¼ ters Ã§evirerek gerÃ§ek deÄŸerleri verir ve aÅŸÄ±rÄ± bÃ¼yÃ¼k deÄŸerleri sÄ±nÄ±rlayarak `inf` hatasÄ±nÄ± engeller.
    """
    # ðŸ“Œ 1. Test verisini yÃ¼kle
    test_raw = pd.read_csv(test_csv_path)
    test_ids = test_raw["Id"]  # Orijinal test setindeki ID'leri al

    # ðŸ“Œ 2. Veriyi Ã¶lÃ§eklendir
    test_df_scaled = models["scaler"].transform(test_df)

    # ðŸ“Œ 3. Modeller ile tahmin yap
    pred_ridge = models["bayesian_ridge"].predict(test_df_scaled)
    pred_ard = models["ard_regression"].predict(test_df_scaled)
    pred_gpr = models["gpr"].predict(test_df_scaled)

    # ðŸ“Œ 4. 3 modelin ortalama tahminini hesapla
    predictions_log = (pred_ridge + pred_ard + pred_gpr) / 3

    # ðŸ“Œ 5. Log dÃ¶nÃ¼ÅŸÃ¼mÃ¼nÃ¼ geri al
    predictions = np.expm1(predictions_log)

    # ðŸ“Œ 6. `inf` hatasÄ±nÄ± Ã¶nlemek iÃ§in tahminleri sÄ±nÄ±rlÄ± aralÄ±ÄŸa Ã§ek
    predictions = np.nan_to_num(predictions, nan=0.0, posinf=700000, neginf=0.0)

    # ðŸ“Œ 7. Tahminleri kaydet
    submission = pd.DataFrame({"Id": test_ids, "SalePrice": predictions})
    submission.to_csv(output_csv, index=False)

    print(f"âœ… Tahminler {output_csv} dosyasÄ±na kaydedildi! (Ä°Ã§erik KontrolÃ¼: {submission.describe()})")


In [None]:
train_cnn_model = train_cnn(train_prepared)

In [None]:
# ðŸ“Œ 1. Test veri setini iÅŸle (train veri setiyle uyumlu hale getir)
test_prepared = preprocess_dataset_test("test.csv", train_prepared.drop(columns=["SalePrice"]).columns)

In [None]:
# ðŸ“Œ 1. Test verisini hazÄ±rla ve modeli test et
test_cnn(train_cnn_model, test_prepared, "test.csv")