In [None]:
import sys
import pandas as pd
import joblib

# Add the path to the uploaded dataset containing the transformers module
# Update this path to match your Kaggle dataset location
MODEL_PATH = '/kaggle/input/gradient-boosting-model'  # Change this to your dataset path
sys.path.insert(0, MODEL_PATH)

## Load the Trained Model

In [None]:
# Load the trained pipeline
model = joblib.load(f'{MODEL_PATH}/gradient_boosting.joblib')
print("Model loaded successfully!")
print(f"Model type: {type(model)}")

## Load Test Data

In [None]:
# Load test data
# Update this path to match your Kaggle data location
test_df = pd.read_csv('/kaggle/input/playground-series-s5e3/test.csv')

print(f"Test data shape: {test_df.shape}")
print(f"\nFirst few rows:")
test_df.head()

## Make Predictions

In [None]:
# The pipeline handles all preprocessing internally
# Just pass the raw test dataframe
predictions = model.predict_proba(test_df)[:, 1]

print(f"Predictions shape: {predictions.shape}")
print(f"Prediction range: [{predictions.min():.4f}, {predictions.max():.4f}]")
print(f"Mean prediction: {predictions.mean():.4f}")

## Create Submission File

In [None]:
# Create submission dataframe
submission = pd.DataFrame({
    'id': test_df['id'],
    'diabetes': predictions
})

# Save to CSV
submission.to_csv('submission.csv', index=False)

print("Submission file created: submission.csv")
print(f"\nSubmission preview:")
submission.head(10)

## Verify Submission Format

In [None]:
# Check submission format
print(f"Submission shape: {submission.shape}")
print(f"Column names: {list(submission.columns)}")
print(f"Any null values: {submission.isnull().any().any()}")
print(f"\nFirst 5 rows:")
print(submission.head())
print(f"\nLast 5 rows:")
print(submission.tail())