In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [8]:
# Veri setini yükle
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [9]:
# Özellik mühendisliği fonksiyonu
def apply_feature_engineering(df):
    # Mevcut özellikler...
    df['Sex'] = df['Sex'].map({'male': 0, 'female': 1}).astype(int)
    
    # 1. BMI: Vücut kitle endeksi
    df["BMI"] = df["Weight"] / ((df["Height"] / 100) ** 2)
    
    # 2. Max_HR: Maksimum kalp atış hızı
    df["Max_HR"] = 220 - df["Age"]
    
    # 3. Intensity_Percent: Egzersiz yoğunluk yüzdesi
    df["Intensity_Percent"] = (df["Heart_Rate"] / df["Max_HR"]) * 100
    
    # 4. Intensity_Level_Num: Yoğunluk düzeyi sayısal (0–3)
    def intensity_level(percent):
        if percent < 50:
            return 0
        elif percent < 70:
            return 1
        elif percent < 85:
            return 2
        else:
            return 3
    df["Intensity_Level_Num"] = df["Intensity_Percent"].apply(intensity_level)
    
    # 5. Age_Group_Num: Yaş grubu
    df["Age_Group"] = pd.cut(
        df["Age"],
        bins=[0, 18, 30, 45, 60, 100],
        labels=[0, 1, 2, 3, 4]
    ).astype(int)
    
    # 6. Effort: Nabız × Süre
    df["Effort"] = df["Heart_Rate"] * df["Duration"]
    
    # 7. Weight_to_Height: Kilo / Boy
    df["Weight_to_Height"] = df["Weight"] / df["Height"]

    return df

In [10]:

# Veri seti class'ı
class CalorieDataset(Dataset):
    def __init__(self, features, targets=None):
        self.features = torch.tensor(features, dtype=torch.float32)
        if targets is not None:
            self.targets = torch.tensor(targets, dtype=torch.float32).reshape(-1, 1)
        else:
            self.targets = None
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        if self.targets is not None:
            return self.features[idx], self.targets[idx]
        else:
            return self.features[idx]

In [11]:
# Basit PyTorch modeli
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        return self.layers(x)


In [12]:
# Residual bağlantıları olan karmaşık model
class ResidualNN(nn.Module):
    def __init__(self, input_size):
        super(ResidualNN, self).__init__()
        
        # İlk katman
        self.layer1 = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Residual bağlantı için boyut uyumlaştırma
        self.shortcut = nn.Linear(256, 128)
        
        # İkinci katman
        self.layer2 = nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # Son katmanlar
        self.layer3 = nn.Sequential(
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            
            nn.Linear(64, 32),
            nn.ReLU(),
            
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        out = self.layer1(x)
        shortcut = self.shortcut(out)
        out = self.layer2(out)
        out = out + shortcut  # Residual bağlantı
        out = self.layer3(out)
        return out


In [13]:
# Model eğitim fonksiyonu
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, patience=20):
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        # Eğitim modu
        model.train()
        running_loss = 0.0
        for features, targets in train_loader:
            features, targets = features.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(features)
            loss = criterion(outputs, targets)
            
            # Backward pass ve optimizasyon
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * features.size(0)
        
        # Epoch loss
        epoch_train_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_train_loss)
        
        # Değerlendirme modu
        model.eval()
        running_val_loss = 0.0
        with torch.no_grad():
            for features, targets in val_loader:
                features, targets = features.to(device), targets.to(device)
                outputs = model(features)
                val_loss = criterion(outputs, targets)
                running_val_loss += val_loss.item() * features.size(0)
        
        # Validation loss
        epoch_val_loss = running_val_loss / len(val_loader.dataset)
        val_losses.append(epoch_val_loss)
        
        # Early stopping kontrolü
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1
        
        # Her 10 epoch'ta ilerlemeyi yazdır
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_train_loss:.4f}, Val Loss: {epoch_val_loss:.4f}')
        
        # Patience aşıldıysa erken durdur
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch+1}')
            break
    
    # En iyi modeli yükle
    model.load_state_dict(best_model_state)
    
    return model, train_losses, val_losses


In [14]:
# Verileri işle
df = apply_feature_engineering(train)
df_test = apply_feature_engineering(test)

# Eğitim ve doğrulama veri setlerini ayırma
X = df.drop(['Calories'], axis=1)
y = df['Calories']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
# Veriyi standartlaştırma
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
df_test_scaled = scaler.transform(df_test)

In [16]:
# Dataset ve DataLoader oluşturma
train_dataset = CalorieDataset(X_train_scaled, y_train.values)
val_dataset = CalorieDataset(X_val_scaled, y_val.values)
test_dataset = CalorieDataset(df_test_scaled)


In [17]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


In [18]:
# Device seçimi (GPU varsa kullan)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cpu


In [22]:
# Model parametreleri
input_size = X_train_scaled.shape[1]
learning_rate = 0.001
num_epochs = 20

# Basit model eğitimi
print("\n=== Training Simple Neural Network ===")
simple_model = SimpleNN(input_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(simple_model.parameters(), lr=learning_rate)

simple_model, simple_train_losses, simple_val_losses = train_model(
    simple_model, train_loader, val_loader, 
    criterion, optimizer, num_epochs, device
)


=== Training Simple Neural Network ===


KeyboardInterrupt: 

In [21]:
# Karmaşık model eğitimi
print("\n=== Training Residual Neural Network ===")
residual_model = ResidualNN(input_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(residual_model.parameters(), lr=learning_rate)

residual_model, residual_train_losses, residual_val_losses = train_model(
    residual_model, train_loader, val_loader, 
    criterion, optimizer, num_epochs, device
)


=== Training Residual Neural Network ===


KeyboardInterrupt: 

In [None]:
# Model performanslarını görselleştirme
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(simple_train_losses, label='Train Loss')
plt.plot(simple_val_losses, label='Validation Loss')
plt.title('Simple Model Loss')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(residual_train_losses, label='Train Loss')
plt.plot(residual_val_losses, label='Validation Loss')
plt.title('Residual Model Loss')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Modelleri değerlendirme
def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_targets = []
    
    with torch.no_grad():
        for features, targets in data_loader:
            features, targets = features.to(device), targets.to(device)
            outputs = model(features)
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
    
    all_preds = np.vstack(all_preds).flatten()
    all_targets = np.vstack(all_targets).flatten()
    
    rmse = np.sqrt(mean_squared_error(all_targets, all_preds))
    mae = mean_absolute_error(all_targets, all_preds)
    r2 = r2_score(all_targets, all_preds)
    
    return rmse, mae, r2

# Her iki modeli değerlendir
simple_rmse, simple_mae, simple_r2 = evaluate_model(simple_model, val_loader, device)
residual_rmse, residual_mae, residual_r2 = evaluate_model(residual_model, val_loader, device)

print("\n=== Model Performances ===")
print(f"Simple Model - RMSE: {simple_rmse:.2f}, MAE: {simple_mae:.2f}, R2: {simple_r2:.4f}")
print(f"Residual Model - RMSE: {residual_rmse:.2f}, MAE: {residual_mae:.2f}, R2: {residual_r2:.4f}")


In [None]:
# En iyi modeli seç
best_model = simple_model if simple_rmse < residual_rmse else residual_model
print(f"\nBest model: {'Simple Model' if simple_rmse < residual_rmse else 'Residual Model'}")

# Test verileri üzerinde tahmin yap
best_model.eval()
test_preds = []

In [None]:
with torch.no_grad():
    for features in test_loader:
        if isinstance(features, list):
            features = features[0]
        features = features.to(device)
        outputs = best_model(features)
        test_preds.append(outputs.cpu().numpy())

# Tahminleri birleştir ve submission dosyası oluştur
test_preds = np.vstack(test_preds).flatten()
submission = pd.DataFrame({
    'id': test['id'],
    'Calories': test_preds
})

# CSV dosyası olarak kaydet
submission.to_csv('pytorch_submission.csv', index=False)

print(f"\nPyTorch submission dosyası oluşturuldu. İlk 5 satır:")
print(submission.head())


print("\nTahmin edilen Calories değerlerinin istatistikleri:")
print(f"Ortalama: {submission['Calories'].mean():.2f}")
print(f"Minimum: {submission['Calories'].min():.2f}")
print(f"Maksimum: {submission['Calories'].max():.2f}")
print(f"Standart Sapma: {submission['Calories'].std():.2f}")

# Modeli kaydet
torch.save(best_model.state_dict(), 'best_pytorch_model.pth')
print("\nEn iyi model 'best_pytorch_model.pth' olarak kaydedildi.")