In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

# ======================================================
# STEP 1 — Load & Prepare Data
# ======================================================
embeddings = np.load('/kaggle/input/embeddings/train_embeddings.npy')
df = pd.read_csv('/kaggle/input/train-llm/train.csv')

print(embeddings.shape)
print(df.head())

X = embeddings
Y = df['price'].reset_index(drop=True)

# Convert to numpy if tensors
if isinstance(X, torch.Tensor):
    X = X.detach().cpu().numpy()
if isinstance(Y, torch.Tensor):
    Y = Y.detach().cpu().numpy()

# Optional: clip extreme values to reduce SMAPE impact
Y_clipped = np.clip(Y, 0, 100)  # adjust upper limit if needed
Y_log = np.log1p(Y_clipped)

# Standardize embeddings
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dimensionality Reduction (768 components)
print("Running PCA reduction...")
pca = PCA(n_components=768, random_state=42)
X_reduced = pca.fit_transform(X_scaled)
print("Reduced shape:", X_reduced.shape)

# Split data
X_train, X_val, y_train, y_val = train_test_split(
    X_reduced, Y_log, test_size=0.2, random_state=42
)

# Convert Series to numpy
y_train = np.array(y_train)
y_val = np.array(y_val)

# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ======================================================
# STEP 2 — Define Model (with slightly higher dropout)
# ======================================================
class ResidualBlock(nn.Module):
    def __init__(self, dim, dropout=0.35):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.LayerNorm(dim),
            nn.GELU(),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return x + self.block(x)

class OptimizedMLP(nn.Module):
    def __init__(self, input_dim, dropout=0.35):
        super().__init__()
        self.fc_in = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LayerNorm(256),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        self.res1 = ResidualBlock(256, dropout)
        self.fc_mid = nn.Sequential(
            nn.Linear(256, 128),
            nn.LayerNorm(128),
            nn.GELU(),
            nn.Dropout(dropout)
        )
        self.res2 = ResidualBlock(128, dropout)
        self.fc_out = nn.Linear(128, 1)

    def forward(self, x):
        x = self.fc_in(x)
        x = self.res1(x)
        x = self.fc_mid(x)
        x = self.res2(x)
        return self.fc_out(x)

# ======================================================
# STEP 3 — Training Function (Reusable)
# ======================================================
def train_single_model(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)

    input_dim = X_train.shape[1]
    model = OptimizedMLP(input_dim, dropout=0.35).to(device)

    criterion = nn.HuberLoss()
    optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

    epochs = 400
    batch_size = 128
    early_stop_patience = 40
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        permutation = torch.randperm(X_train.size(0))
        total_loss = 0.0

        for i in range(0, X_train.size(0), batch_size):
            idx = permutation[i:i + batch_size]
            batch_x = X_train[idx].to(device)
            batch_y = y_train[idx].to(device)

            # Slightly increased Gaussian noise
            batch_x = batch_x + 0.015 * torch.randn_like(batch_x)

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Validation
        model.eval()
        with torch.no_grad():
            val_preds = model(X_val.to(device))
            val_loss = criterion(val_preds, y_val.to(device))

        scheduler.step()
        print(f"[Seed {seed}] Epoch [{epoch+1}/{epochs}] "
              f"Train Loss: {total_loss/len(X_train):.6f} | Val Loss: {val_loss.item():.6f}")

        # Early stopping
        if val_loss.item() < best_val_loss - 1e-5:
            best_val_loss = val_loss.item()
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= early_stop_patience:
                print(f"[Seed {seed}] Early stopping at epoch {epoch+1}")
                break

    model.load_state_dict(best_model_state)
    return model

# ======================================================
# STEP 4 — Train 5 Models (Ensemble)
# ======================================================
models = []
seeds = [42, 99, 2025, 123, 777]

for s in seeds:
    print(f"\n=== Training Model (Seed {s}) ===")
    model_s = train_single_model(s)
    models.append(model_s)

# ======================================================
# STEP 5 — Inference (Averaged Ensemble)
# ======================================================
X_full = torch.tensor(X_reduced, dtype=torch.float32).to(device)

ensemble_preds = []
for model in models:
    model.eval()
    with torch.no_grad():
        pred_log = model(X_full).cpu().numpy()
        ensemble_preds.append(np.expm1(pred_log))  # inverse log-transform

# Average predictions
y_pred = np.mean(ensemble_preds, axis=0)
y_pred = np.maximum(y_pred, 0)

# Evaluation on training subset
r2 = r2_score(Y, y_pred[:len(Y)])
mae = mean_absolute_error(Y, y_pred[:len(Y)])

print("\n=====================")
print(f"Final Ensemble R²: {r2:.4f}")
print(f"Final Ensemble MAE: {mae:.4f}")
print("=====================")

# # Save predictions
# np.save("ensemble_5models_predictions.npy", y_pred)
# pd.DataFrame({"price": y_pred.flatten()}).to_csv("submission_5models.csv", index=False)
# print("Saved predictions → ensemble_5models_predictions.npy & submission_5models.csv")


(75000, 2048)
   sample_id                                    catalog_content  \
0      33127  Item Name: La Victoria Green Taco Sauce Mild, ...   
1     198967  Item Name: Salerno Cookies, The Original Butte...   
2     261251  Item Name: Bear Creek Hearty Soup Bowl, Creamy...   
3      55858  Item Name: Judee’s Blue Cheese Powder 11.25 oz...   
4     292686  Item Name: kedem Sherry Cooking Wine, 12.7 Oun...   

                                          image_link  price  
0  https://m.media-amazon.com/images/I/51mo8htwTH...   4.89  
1  https://m.media-amazon.com/images/I/71YtriIHAA...  13.12  
2  https://m.media-amazon.com/images/I/51+PFEe-w-...   1.97  
3  https://m.media-amazon.com/images/I/41mu0HAToD...  30.34  
4  https://m.media-amazon.com/images/I/41sA037+Qv...  66.49  
Running PCA reduction...
Reduced shape: (75000, 768)
Using device: cuda

=== Training Model (Seed 42) ===
[Seed 42] Epoch [1/400] Train Loss: 0.002457 | Val Loss: 0.228029
[Seed 42] Epoch [2/400] Train Loss: 0.0

In [7]:
import numpy as np
import torch
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ======================================================
# STEP 1 — Load Test Embeddings
# ======================================================
test_embeddings = np.load("/kaggle/input/embeddings/full_embeddings.npy")
print("Loaded test embeddings:", test_embeddings.shape)

# ======================================================
# STEP 2 — Apply SAME Scaler & PCA from training
# ======================================================
# ⚠️ Make sure 'scaler' and 'pca' objects are from training
X_test_scaled = scaler.transform(test_embeddings)
X_test_reduced = pca.transform(X_test_scaled)
print("Reduced test shape:", X_test_reduced.shape)

# Convert to tensor
X_test_tensor = torch.tensor(X_test_reduced, dtype=torch.float32).to(device)

# ======================================================
# STEP 3 — Ensemble Predictions (5 models)
# ======================================================
ensemble_preds = []

for idx, model in enumerate(models):
    model.eval()
    with torch.no_grad():
        pred_log = model(X_test_tensor).cpu().numpy()
        pred = np.expm1(pred_log)  # inverse log1p
        ensemble_preds.append(pred)
        print(f"Model {idx+1} done.")

# Average predictions
y_test_pred = np.mean(ensemble_preds, axis=0)
y_test_pred = np.maximum(y_test_pred, 0)  # avoid negatives

print("Final predictions shape:", y_test_pred.shape)

# ======================================================
# STEP 4 — Save Predictions
# ======================================================
# np.save("test_ensemble_5models.npy", y_test_pred)
pd.DataFrame({"price": y_test_pred.flatten()}).to_csv("test_out1.csv", index=False)
print("Saved → test_ensemble_5models.npy & submission_5models.csv")


Using device: cuda
Loaded test embeddings: (75000, 2048)
Reduced test shape: (75000, 768)
Model 1 done.
Model 2 done.
Model 3 done.
Model 4 done.
Model 5 done.
Final predictions shape: (75000, 1)
Saved → test_ensemble_5models.npy & submission_5models.csv


In [None]:
import pandas as pd

df_pred = pd.DataFrame(y_pred, columns=['Predicted_Price'])
print(df_pred.shape)  # Should print (75000, 1)
print(df_pred.head())

In [None]:
df1 = pd.read_csv('test.csv')

In [None]:
combined_df = pd.concat([df1['sample_id'], df_pred['price']],axis =1)


In [None]:
combined_df.to_csv("test_out.csv")