In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score

In [23]:
data = pd.read_csv('Fertilizer Prediction.csv')
data.head(5)

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [55]:
data.columns = data.columns.str.strip()
data.rename(columns={'Temparature': 'Temperature'}, inplace=True)
print("Cleaned column names:", data.columns)

Cleaned column names: Index(['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')


In [25]:
data.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [27]:
data.duplicated().sum()

0

In [57]:
data= pd.DataFrame(data)

In [59]:
X = data.drop('Fertilizer Name', axis=1)
y = data['Fertilizer Name']

In [61]:
# Define categorical and numerical columns
numerical_columns = ['Temperature','Humidity','Moisture','Nitrogen','Potassium','Phosphorous']
categorical_columns = ['Soil Type', 'Crop Type']

# Create transformers
ohe = OneHotEncoder(drop='first', handle_unknown='ignore', sparse_output=False)
scaler = StandardScaler()

# Define the column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', scaler, numerical_columns),  # Scale numerical columns
        ('cat', ohe, categorical_columns)   # One-hot encode categorical columns
    ]
)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [51]:
print("Column names in the dataset:", data.columns)

# Define features and target variable
X = data.drop('Fertilizer Name', axis=1)
y = data['Fertilizer Name']

# Define numerical and categorical columns
numerical_columns = ['Temperature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
categorical_columns = ['Soil Type', 'Crop Type']

# Debug: Check if columns are correct
print("Numerical columns:", numerical_columns)
print("Categorical columns:", categorical_columns)

# Check if all the numerical columns exist in the dataset
assert all(col in data.columns for col in numerical_columns), "Some numerical columns are missing!"
assert all(col in data.columns for col in categorical_columns), "Some categorical columns are missing!"

Column names in the dataset: Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')
Numerical columns: ['Temperature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
Categorical columns: ['Soil Type', 'Crop Type']


AssertionError: Some numerical columns are missing!

In [63]:
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42, n_estimators=100))
])

In [65]:
model_pipeline.fit(X_train, y_train)

In [67]:
y_pred = model_pipeline.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Classification Report:
              precision    recall  f1-score   support

    10-26-26       1.00      0.50      0.67         2
    14-35-14       1.00      1.00      1.00         1
    17-17-17       0.00      0.00      0.00         0
       20-20       1.00      1.00      1.00         1
       28-28       1.00      1.00      1.00         5
         DAP       1.00      1.00      1.00         5
        Urea       1.00      1.00      1.00         6

    accuracy                           0.95        20
   macro avg       0.86      0.79      0.81        20
weighted avg       1.00      0.95      0.97        20

Accuracy: 95.00%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [85]:
def predict_fertilizer(temp, humidity, moisture, soil_type, crop_type, nitrogen, potassium, phosphorous):
    input_data = pd.DataFrame([[temp, humidity, moisture, soil_type, crop_type, nitrogen, potassium, phosphorous]],
                              columns=['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 
                                       'Nitrogen', 'Potassium', 'Phosphorous'])
    prediction = model_pipeline.predict(input_data)
    return prediction[0]

# Example prediction
predicted_fertilizer = predict_fertilizer(50, 30, 50, 'Black', 'Cotton', 0, 10, 20)
print(f"Predicted Fertilizer: {predicted_fertilizer}")

Predicted Fertilizer: 14-35-14


In [87]:
joblib.dump(model_pipeline, 'fertilizer_recommendation_model.pkl')

print("Model and pipeline saved successfully!")

Model and pipeline saved successfully!
