In [1]:
#!pip install pytorch-tabnet

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-4.1.0-py3-none-any.whl.metadata (15 kB)
Downloading pytorch_tabnet-4.1.0-py3-none-any.whl (44 kB)
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-4.1.0


In [3]:
import numpy as np
import pandas as pd
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
import torch

# 📌 **Veri Ön İşleme**
def preprocess_data(file_path, is_train=True):
    df = pd.read_csv(file_path)
    
    if is_train:
        y = df["rainfall"]
        X = df.drop(columns=["id", "rainfall"])
    else:
        X = df.drop(columns=["id"])
        y = None
    
    # Eksik değerleri median ile doldur
    X.fillna(X.median(), inplace=True)
    
    # Kategorik değişkenleri sayısala çevir
    for col in X.select_dtypes(include=['object']).columns:
        X[col] = LabelEncoder().fit_transform(X[col])

    print(f"✅ Veri hazırlandı: {X.shape}")
    
    if is_train:
        return X, y
    return X

# 📌 **TabNet Modeli ile Eğitim**
def train_tabnet(X_train, y_train):
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

    model = TabNetClassifier(
        optimizer_fn=torch.optim.Adam,
        optimizer_params={'lr': 2e-2},
        scheduler_params={"step_size":10, "gamma":0.9},
        scheduler_fn=torch.optim.lr_scheduler.StepLR,
        seed=42,
        device_name='cuda' if torch.cuda.is_available() else 'cpu'  # GPU desteği
    )

    # Modeli eğit
    model.fit(
        X_train=X_train.values, y_train=y_train.values,
        eval_set=[(X_val.values, y_val.values)],
        eval_metric=['auc'],
        batch_size=1024, virtual_batch_size=128,
        max_epochs=100, patience=10
    )

    print("✅ TabNet modeli eğitildi!")
    return model

# 📌 **Test verisini işle ve tahmin yap**
def predict_and_save(model, test_file, output_file="submission_tabnet_fixed.csv"):
    test_df = pd.read_csv(test_file)  # Orijinal test dosyasını yükle
    test_ids = test_df["id"]  # Test dosyasındaki ID'leri al
    
    X_test = preprocess_data(test_file, is_train=False)  # Test verisini işle
    
    y_pred = model.predict_proba(X_test.values)[:, 1]  # Yağmur olma olasılığı
    
    # ID'leri olduğu gibi koruyarak tahminleri kaydet
    submission = pd.DataFrame({"id": test_ids, "rainfall": y_pred})
    submission.to_csv(output_file, index=False)
    
    print(f"✅ Tahminler {output_file} dosyasına kaydedildi!")

# 📌 **Ana Çalıştırma Kısmı**
if __name__ == "__main__":
    train_file = "train.csv"
    test_file = "test.csv"

    # Veriyi yükle
    X_train, y_train = preprocess_data(train_file, is_train=True)

    # Modeli eğit
    best_tabnet_model = train_tabnet(X_train, y_train)

    # Test seti üzerinde tahmin yap
    predict_and_save(best_tabnet_model, test_file)


✅ Veri hazırlandı: (2190, 11)




epoch 0  | loss: 1.00755 | val_0_auc: 0.49783 |  0:00:00s
epoch 1  | loss: 0.82151 | val_0_auc: 0.47154 |  0:00:01s
epoch 2  | loss: 0.64298 | val_0_auc: 0.59315 |  0:00:02s
epoch 3  | loss: 0.5722  | val_0_auc: 0.5532  |  0:00:04s
epoch 4  | loss: 0.52792 | val_0_auc: 0.70846 |  0:00:05s
epoch 5  | loss: 0.49607 | val_0_auc: 0.71369 |  0:00:07s
epoch 6  | loss: 0.46105 | val_0_auc: 0.66813 |  0:00:07s
epoch 7  | loss: 0.43713 | val_0_auc: 0.63501 |  0:00:08s
epoch 8  | loss: 0.41047 | val_0_auc: 0.55749 |  0:00:09s
epoch 9  | loss: 0.39312 | val_0_auc: 0.45293 |  0:00:09s
epoch 10 | loss: 0.38621 | val_0_auc: 0.51308 |  0:00:10s
epoch 11 | loss: 0.38376 | val_0_auc: 0.51731 |  0:00:11s
epoch 12 | loss: 0.36239 | val_0_auc: 0.52974 |  0:00:11s
epoch 13 | loss: 0.35739 | val_0_auc: 0.58152 |  0:00:12s
epoch 14 | loss: 0.35558 | val_0_auc: 0.5476  |  0:00:13s
epoch 15 | loss: 0.35973 | val_0_auc: 0.53023 |  0:00:14s

Early stopping occurred at epoch 15 with best_epoch = 5 and best_val_0_

