In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

# 📌 1. Veri Hazırlama
def preprocess_data(file_path, is_train=True):
    df = pd.read_csv(file_path)
    print(f"✅ Yüklendi: {file_path} - {df.shape}")

    X = df.drop(columns=["id"])
    if is_train:
        y = X.pop("rainfall")
    else:
        y = None

    for col in X.select_dtypes(include="object").columns:
        X[col] = LabelEncoder().fit_transform(X[col].astype(str))

    return (X, y) if is_train else (X, df["id"])

# 📌 2. Tahmin ve Submission
def predict_and_submit(cat_model, xgb_model, test_file, output_file="catboost-submission.csv"):
    X_test, ids = preprocess_data(test_file, is_train=False)

    cat_preds = cat_model.predict_proba(X_test)[:, 1]
    xgb_preds = xgb_model.predict_proba(X_test)[:, 1]

    final_preds = (cat_preds + xgb_preds) / 2  # Basit ortalama

    submission = pd.DataFrame({"id": ids, "rainfall": final_preds})
    submission.to_csv(output_file, index=False)
    print(f"✅ Tahminler kaydedildi: {output_file}")

# 📌 Ana Akış
if __name__ == "__main__":
    train_path = "train.csv"
    test_path = "test.csv"

    X_train, y_train = preprocess_data(train_path, is_train=True)

    # CatBoost
    cat_model = CatBoostClassifier(verbose=0, random_state=42)
    cat_model.fit(X_train, y_train)

    # XGBoost
    xgb_model = XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)
    xgb_model.fit(X_train, y_train)

    # Tahmin
    predict_and_submit(cat_model, xgb_model, test_path)


✅ Yüklendi: train.csv - (2190, 13)
✅ Yüklendi: test.csv - (730, 12)
✅ Tahminler kaydedildi: catboost-submission.csv
