In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playground-series-s5e6/sample_submission.csv
/kaggle/input/playground-series-s5e6/train.csv
/kaggle/input/playground-series-s5e6/test.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import top_k_accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Load data
train = pd.read_csv('/kaggle/input/playground-series-s5e6/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s5e6/test.csv')

# Preprocessing
def preprocess_data(train, test):
    # Encode categorical columns
    cat_cols = train.select_dtypes(include=['object']).columns.tolist()
    cat_cols.remove('Fertilizer Name')
    
    le_dict = {}
    for col in cat_cols:
        le = LabelEncoder()
        train[col] = le.fit_transform(train[col].astype(str))
        test[col] = le.transform(test[col].astype(str))
        le_dict[col] = le
    
    # Encode target
    le_target = LabelEncoder()
    train['Fertilizer Name'] = le_target.fit_transform(train['Fertilizer Name'])
    class_names = le_target.classes_
    
    # Split features and target
    X = train.drop(['id', 'Fertilizer Name'], axis=1)
    y = train['Fertilizer Name']
    test_ids = test['id']
    test = test.drop('id', axis=1)
    
    # Scale numerical features
    scaler = StandardScaler()
    num_cols = X.select_dtypes(include=['number']).columns
    X[num_cols] = scaler.fit_transform(X[num_cols])
    test[num_cols] = scaler.transform(test[num_cols])
    
    return X, y, test, test_ids, class_names, le_target

X, y, test_data, test_ids, class_names, le_target = preprocess_data(train, test)

# Convert target to categorical
y_cat = to_categorical(y)

# Split into train and validation
X_train, X_val, y_train, y_val = train_test_split(X, y_cat, test_size=0.2, random_state=42)

# Neural Network Model
def create_model(input_shape, num_classes):
    model = Sequential([
        Dense(512, activation='relu', input_shape=(input_shape,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.1),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = create_model(X_train.shape[1], y_cat.shape[1])

# Callbacks
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.1, patience=5)
]

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=64,
    callbacks=callbacks,
    verbose=1
)

# Evaluate on validation set
val_probs = model.predict(X_val)
val_top3 = np.argsort(val_probs, axis=1)[:, -3:][:, ::-1]

# Convert back to original labels
val_true = np.argmax(y_val, axis=1)
val_true_labels = le_target.inverse_transform(val_true)
val_pred_labels = [le_target.inverse_transform(pred) for pred in val_top3]

# Calculate MAP@3
def mapk(actual, predicted, k=3):
    return np.mean([1 if a in p[:k] else 0 for a, p in zip(actual, predicted)])

val_score = mapk(val_true_labels, val_pred_labels)
print(f"Validation MAP@3: {val_score:.4f}")

# Predict on test set
test_probs = model.predict(test_data)
test_top3 = np.argsort(test_probs, axis=1)[:, -3:][:, ::-1]

# Create submission
test_pred_labels = [' '.join(le_target.inverse_transform(pred)) for pred in test_top3]
submission = pd.DataFrame({
    'id': test_ids,
    'Fertilizer Name': test_pred_labels
})

submission.to_csv('submission_keras.csv', index=False)

2025-06-01 13:36:45.674030: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748785005.878238      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748785005.935860      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1748785020.308229      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/100


I0000 00:00:1748785025.210031      58 service.cc:148] XLA service 0x7c3268212300 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748785025.210666      58 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1748785025.582746      58 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  65/9375[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 2ms/step - accuracy: 0.1306 - loss: 2.5410

I0000 00:00:1748785027.923463      58 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 3ms/step - accuracy: 0.1496 - loss: 2.0036 - val_accuracy: 0.1542 - val_loss: 1.9445 - learning_rate: 0.0010
Epoch 2/100
[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.1565 - loss: 1.9427 - val_accuracy: 0.1556 - val_loss: 1.9407 - learning_rate: 0.0010
Epoch 3/100
[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.1574 - loss: 1.9410 - val_accuracy: 0.1569 - val_loss: 1.9401 - learning_rate: 0.0010
Epoch 4/100
[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.1569 - loss: 1.9405 - val_accuracy: 0.1582 - val_loss: 1.9400 - learning_rate: 0.0010
Epoch 5/100
[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.1579 - loss: 1.9401 - val_accuracy: 0.1601 - val_loss: 1.9400 - learning_rate: 0.0010
Epoch 6/100
[1m9375/9375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3