In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
import joblib
import warnings
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import log_loss, accuracy_score
from itertools import combinations
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
import wandb
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/aja/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')


In [None]:
class CFG:
    # --- File Paths ---
    data_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'data'))
    TRAIN_PATH = os.path.join(data_path, 'train.csv')  # Path to your training data
    TEST_PATH = os.path.join(data_path, 'test.csv')    # Path to your test data
    
    # --- Model Settings ---
    N_SPLITS = 5              # Number of CV folds
    SEED = 3407               #  Karpathy 
    TARGET_COL = 'Fertilizer Name'
    
    # --- Experiment Tracking ---
    PROJECT_NAME = 'fertilizer-prediction'
    EXPERIMENT_NAME = 'starter_24_6_2025'
    
    # --- Model Selection ---
    USE_LIGHTGBM = True       # model_1
    USE_XGBOOST = True        # model_2  
    USE_CATBOOST = True       # model_3
    
    # --- Feature Engineering ---
    USE_TARGET_ENCODING = True
    USE_ADVANCED_FEATURES = True
    USE_INTERACTION_FEATURES = True

In [None]:
def mapk(actual, predicted, k=3):
    def apk(a, p, k):
        p = p[:k]
        score = 0.0
        hits = 0
        seen = set()
        for i, pred in enumerate(p):
            if pred in a and pred not in seen:
                hits += 1
                score += hits / (i + 1.0)
                seen.add(pred)
        return score / min(len(a), k)
    return np.mean([apk(a, p, k) for a, p in zip(actual, predicted)])

In [None]:
I wan

In [None]:
def create_advanced_features(df):
    """Create domain-specific agricultural features"""
    df_copy = df.copy()
    
    # --- Environmental Stress Indicators ---
    df_copy['temp_stress'] = ((df_copy['Temparature'] < 15) | (df_copy['Temparature'] > 35)).astype(int)
    df_copy['moisture_stress'] = (df_copy['Moisture'] < 30).astype(int)
    df_copy['humidity_stress'] = ((df_copy['Humidity'] < 40) | (df_copy['Humidity'] > 80)).astype(int)
    
    # --- Soil Drainage Categories ---
    drainage_map = {
        'Sandy': 'high_drainage',
        'Loamy': 'medium_drainage', 
        'Black': 'low_drainage',
        'Red': 'medium_drainage',
        'Clayey': 'low_drainage'
    }
    df_copy['drainage_category'] = df_copy['Soil Type'].map(drainage_map)
    
    # --- Crop Categories (based on nutrient needs) ---
    heavy_feeders = ['Sugarcane', 'Cotton', 'Maize', 'Wheat']
    light_feeders = ['Pulses', 'Groundnut']
    
    df_copy['crop_nutrient_demand'] = df_copy['Crop Type'].apply(
        lambda x: 'heavy_feeder' if x in heavy_feeders 
        else 'light_feeder' if x in light_feeders 
        else 'medium_feeder'
    )
    
    # --- pH Suitability (estimated) ---
    # Different crops prefer different pH ranges
    df_copy['ph_suitability'] = 1.0  # Default
    
    # --- Leaching Risk ---
    df_copy['leaching_risk'] = (
        (df_copy['Soil Type'] == 'Sandy') & 
        (df_copy['Moisture'] > 60)
    ).astype(int)
    
    # --- Nutrient Efficiency Ratios ---
    df_copy['N_efficiency'] = df_copy['Nitrogen'] / (df_copy['Temparature'] + df_copy['Humidity'] + 1)
    df_copy['P_efficiency'] = df_copy['Phosphorous'] / (df_copy['Moisture'] + 1)
    df_copy['K_efficiency'] = df_copy['Potassium'] / (df_copy['Temparature'] + 1)
    
    return df_copy

In [None]:

def create_interaction_features(df):
    """Create interaction features between environmental and soil factors"""
    df_copy = df.copy()
    
    # --- Environmental Interactions ---
    df_copy['temp_humidity'] = df_copy['Temparature'] * df_copy['Humidity']
    df_copy['temp_moisture'] = df_copy['Temparature'] * df_copy['Moisture']
    df_copy['humidity_moisture'] = df_copy['Humidity'] * df_copy['Moisture']
    
    # --- Nutrient-Environment Interactions ---
    df_copy['N_temp'] = df_copy['Nitrogen'] * df_copy['Temparature']
    df_copy['P_moisture'] = df_copy['Phosphorous'] * df_copy['Moisture']
    df_copy['K_humidity'] = df_copy['Potassium'] * df_copy['Humidity']
    
    # --- Complex Interactions ---
    df_copy['growing_conditions'] = (
        df_copy['Temparature'] * df_copy['Moisture'] * df_copy['Humidity']
    ) / 1000  # Scale down
    
    return df_copy