In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install lightgbm pandas scikit-learn



In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Update the file path to point to your Google Drive location
# Replace 'path/to/your_dataset_name.csv' with the actual path
file_path = '/content/drive/MyDrive/fertilizer recommendation/fertilizer_advisory_synth_1year.csv'
df = pd.read_csv(file_path)

# Separate features (X) and target (y)
# Adjust these column names to match your dataset
X = df.drop(['Fertilizer_Recommended', 'Fertilizer_Quantity_kg_ha', 'Date'], axis=1)
y = df[['Fertilizer_Recommended', 'Fertilizer_Quantity_kg_ha']]

# Identify categorical and numerical columns
categorical_features = ['State', 'Crop', 'Growth_Stage', 'Soil_Type', 'Previous_Crop']
numerical_features = [
    'Rainfall_mm',
    'Soil_pH',
    'Soil_Nitrogen_kg_ha',
    'Soil_Phosphorus_kg_ha',
    'Soil_Potassium_kg_ha'
]

# Create a preprocessor with OneHotEncoder for categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Apply the preprocessing
X_processed = preprocessor.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [14]:
import lightgbm as lgb

# Separate the target variables for each model
y_type_train = y_train['Fertilizer_Recommended']
y_quantity_train = y_train['Fertilizer_Quantity_kg_ha']
y_type_test = y_test['Fertilizer_Recommended']
y_quantity_test = y_test['Fertilizer_Quantity_kg_ha']

# Model 1: LightGBM Classifier for Fertilizer Recommendation
lgb_classifier = lgb.LGBMClassifier(objective='multiclass', random_state=42)
lgb_classifier.fit(X_train, y_type_train)

# Model 2: LightGBM Regressor for Fertilizer Quantity
lgb_regressor = lgb.LGBMRegressor(objective='regression', metric='rmse', random_state=42)
lgb_regressor.fit(X_train, y_quantity_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004829 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1403
[LightGBM] [Info] Number of data points in the train set: 189216, number of used features: 89
[LightGBM] [Info] Start training from score -1.416489
[LightGBM] [Info] Start training from score -1.415771
[LightGBM] [Info] Start training from score -2.793523
[LightGBM] [Info] Start training from score -2.798457
[LightGBM] [Info] Start training from score -0.934994
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025272 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1403
[LightGBM] [Info] Number of data points in the train set: 189216, number of used features: 89
[LightGBM] [Info] Start training from score 59.533284


In [15]:
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
import numpy as np

# Evaluate the Classifier (Fertilizer Recommendation)
y_type_pred = lgb_classifier.predict(X_test)
accuracy = accuracy_score(y_type_test, y_type_pred)
print(f"Fertilizer Recommendation Accuracy: {accuracy:.2f}")

# Evaluate the Regressor (Fertilizer Quantity)
y_quantity_pred = lgb_regressor.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_quantity_test, y_quantity_pred))
r2 = r2_score(y_quantity_test, y_quantity_pred)

print(f"Fertilizer Quantity Regressor RMSE: {rmse:.2f} kg/ha")
print(f"Fertilizer Quantity Regressor R-squared: {r2:.2f}")



Fertilizer Recommendation Accuracy: 0.75
Fertilizer Quantity Regressor RMSE: 18.72 kg/ha
Fertilizer Quantity Regressor R-squared: 0.44




In [8]:
import joblib

joblib.dump(lgb_classifier, 'fertilizer_type_model.joblib')
joblib.dump(lgb_regressor, 'fertilizer_quantity_model.joblib')
joblib.dump(preprocessor, 'data_preprocessor.joblib')

print("Models and preprocessor saved.")

Models and preprocessor saved.


In [17]:
import joblib
import os
# Specify the path in your Google Drive where you want to save the files.

save_path = '/content/drive/MyDrive/fertilizer recommendation/models/'

if not os.path.exists(save_path):
    os.makedirs(save_path)
    print(f"Created directory: {save_path}")

# Save the trained models and preprocessor to the specified path
joblib.dump(lgb_classifier, save_path + 'fertilizer_type_model.joblib')
joblib.dump(lgb_regressor, save_path + 'fertilizer_quantity_model.joblib')
joblib.dump(preprocessor, save_path + 'data_preprocessor.joblib')

print(f"Models and preprocessor saved to: {save_path}")

Models and preprocessor saved to: /content/drive/MyDrive/fertilizer recommendation/models/
