In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playground-series-s5e5/sample_submission.csv
/kaggle/input/playground-series-s5e5/train.csv
/kaggle/input/playground-series-s5e5/test.csv


In [2]:
train=pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750000 entries, 0 to 749999
Data columns (total 9 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   id          750000 non-null  int64  
 1   Sex         750000 non-null  object 
 2   Age         750000 non-null  int64  
 3   Height      750000 non-null  float64
 4   Weight      750000 non-null  float64
 5   Duration    750000 non-null  float64
 6   Heart_Rate  750000 non-null  float64
 7   Body_Temp   750000 non-null  float64
 8   Calories    750000 non-null  float64
dtypes: float64(6), int64(2), object(1)
memory usage: 51.5+ MB


In [3]:
test=pd.read_csv("/kaggle/input/playground-series-s5e5/test.csv")
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250000 entries, 0 to 249999
Data columns (total 8 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   id          250000 non-null  int64  
 1   Sex         250000 non-null  object 
 2   Age         250000 non-null  int64  
 3   Height      250000 non-null  float64
 4   Weight      250000 non-null  float64
 5   Duration    250000 non-null  float64
 6   Heart_Rate  250000 non-null  float64
 7   Body_Temp   250000 non-null  float64
dtypes: float64(5), int64(2), object(1)
memory usage: 15.3+ MB


In [4]:
sample_sub=pd.read_csv("/kaggle/input/playground-series-s5e5/sample_submission.csv")
sample_sub.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250000 entries, 0 to 249999
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   id        250000 non-null  int64  
 1   Calories  250000 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 3.8 MB


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [6]:

df = pd.read_csv("/kaggle/input/playground-series-s5e5/train.csv")

# Drop ID
df.drop(columns=['id'], inplace=True)

# One-hot encode 'Sex'
df = pd.get_dummies(df, columns=['Sex'], drop_first=True)

# Split features and target
X = df.drop(columns=['Calories']).values
y = df['Calories'].values.reshape(-1, 1)

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

# DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

# Feature count
input_dim = X_train_tensor.shape[1]


In [7]:
class CaloriesMLP(nn.Module):
    def __init__(self, input_dim):
        super(CaloriesMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.ReLU()  # Ensures non-negative predictions for RMSLE
        )

    def forward(self, x):
        return self.model(x)


In [8]:
class RMSLELoss(nn.Module):
    def __init__(self):
        super(RMSLELoss, self).__init__()

    def forward(self, y_pred, y_true):
        y_pred = torch.clamp(y_pred, min=0)
        y_true = torch.clamp(y_true, min=0)
        return torch.sqrt(torch.mean((torch.log1p(y_pred) - torch.log1p(y_true)) ** 2))


In [9]:
# Instantiate
model = CaloriesMLP(input_dim)
criterion = RMSLELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.size(0)

    train_loss /= len(train_loader.dataset)

    # Validation loss
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item() * X_batch.size(0)
    val_loss /= len(val_loader.dataset)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train RMSLE: {train_loss:.4f}, Val RMSLE: {val_loss:.4f}")


Epoch [1/30], Train RMSLE: 0.3154, Val RMSLE: 0.0888
Epoch [2/30], Train RMSLE: 0.0752, Val RMSLE: 0.0737
Epoch [3/30], Train RMSLE: 0.0665, Val RMSLE: 0.0652
Epoch [4/30], Train RMSLE: 0.0637, Val RMSLE: 0.0621
Epoch [5/30], Train RMSLE: 0.0623, Val RMSLE: 0.0617
Epoch [6/30], Train RMSLE: 0.0613, Val RMSLE: 0.0604
Epoch [7/30], Train RMSLE: 0.0608, Val RMSLE: 0.0607
Epoch [8/30], Train RMSLE: 0.0603, Val RMSLE: 0.0596
Epoch [9/30], Train RMSLE: 0.0599, Val RMSLE: 0.0590
Epoch [10/30], Train RMSLE: 0.0596, Val RMSLE: 0.0598
Epoch [11/30], Train RMSLE: 0.0594, Val RMSLE: 0.0583
Epoch [12/30], Train RMSLE: 0.0592, Val RMSLE: 0.0590
Epoch [13/30], Train RMSLE: 0.0590, Val RMSLE: 0.0590
Epoch [14/30], Train RMSLE: 0.0588, Val RMSLE: 0.0582
Epoch [15/30], Train RMSLE: 0.0587, Val RMSLE: 0.0583
Epoch [16/30], Train RMSLE: 0.0587, Val RMSLE: 0.0581
Epoch [17/30], Train RMSLE: 0.0585, Val RMSLE: 0.0587
Epoch [18/30], Train RMSLE: 0.0585, Val RMSLE: 0.0579
Epoch [19/30], Train RMSLE: 0.0584, V

In [10]:
# Load test data
test_df = test  # Replace with your path

# Store IDs for submission
test_ids = test_df['id'].values

# Drop 'id' column
test_df = test_df.drop(columns=['id'])

# One-hot encode 'Sex' to match train columns
test_df = pd.get_dummies(test_df, columns=['Sex'], drop_first=True)


# Align column order (same as training data)
test_df = test_df[X_train_tensor.shape[1]:] if test_df.shape[1] > X_train_tensor.shape[1] else test_df
test_features = test_df.values

# Standardize using training scaler
X_test_scaled = scaler.transform(test_features)

# Convert to PyTorch tensor
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)


In [11]:
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor).squeeze().numpy()


In [12]:
submission_df = pd.DataFrame({
    'id': test_ids,
    'Calories': predictions
})

# Save to CSV
submission_df.to_csv("submission.csv", index=False)
