In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [20]:
fertilizer = pd.read_csv('Fertilizer Prediction.csv')

In [22]:
fertilizer.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [23]:
fertilizer.tail()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
94,25,50,32,Clayey,Pulses,24,0,19,28-28
95,30,60,27,Red,Tobacco,4,17,17,10-26-26
96,38,72,51,Loamy,Wheat,39,0,0,Urea
97,36,60,43,Sandy,Millets,15,0,41,DAP
98,29,58,57,Black,Sugarcane,12,0,10,20-20


In [5]:
fertilizer.describe()

Unnamed: 0,Temparature,Humidity,Moisture,Nitrogen,Potassium,Phosphorous
count,99.0,99.0,99.0,99.0,99.0,99.0
mean,30.282828,59.151515,43.181818,18.909091,3.383838,18.606061
std,3.502304,5.840331,11.271568,11.599693,5.814667,13.476978
min,25.0,50.0,25.0,4.0,0.0,0.0
25%,28.0,54.0,34.0,10.0,0.0,9.0
50%,30.0,60.0,41.0,13.0,0.0,19.0
75%,33.0,64.0,50.5,24.0,7.5,30.0
max,38.0,72.0,65.0,42.0,19.0,42.0


In [8]:
fertilizer.shape

(99, 9)

In [9]:
fertilizer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [10]:
fertilizer.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [14]:
fertilizer.duplicated()

0     False
1     False
2     False
3     False
4     False
      ...  
94    False
95    False
96    False
97    False
98    False
Length: 99, dtype: bool

In [15]:
fertilizer.duplicated().sum()

0

In [16]:
fertilizer.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [24]:
print("Missing Values:\n", fertilizer.isnull().sum())

Missing Values:
 Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64


In [33]:
label_encoders = {}
categorical_columns = ['Soil Type', 'Crop Type']

for column in categorical_columns:
    le = LabelEncoder()
    fertilizer[column] = le.fit_transform(fertilizer[column])
    label_encoders[column] = le

In [34]:
X = fertilizer.drop(columns=['Fertilizer Name'])
y = fertilizer['Fertilizer Name']

In [35]:
le_fertilizer = LabelEncoder()
y = le_fertilizer.fit_transform(y)

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [38]:
y_pred = model.predict(X_test)

In [39]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=le_fertilizer.classes_)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [40]:
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", report)


Model Accuracy: 95.00%

Classification Report:
               precision    recall  f1-score   support

    10-26-26       1.00      0.50      0.67         2
    14-35-14       1.00      1.00      1.00         1
    17-17-17       0.00      0.00      0.00         0
       20-20       1.00      1.00      1.00         1
       28-28       1.00      1.00      1.00         5
         DAP       1.00      1.00      1.00         5
        Urea       1.00      1.00      1.00         6

    accuracy                           0.95        20
   macro avg       0.86      0.79      0.81        20
weighted avg       1.00      0.95      0.97        20



In [41]:
joblib.dump(model, 'fertilizer_prediction_model.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')
joblib.dump(le_fertilizer, 'fertilizer_label_encoder.pkl')
print("Model and encoders saved successfully!")


Model and encoders saved successfully!


In [42]:
model = joblib.load('fertilizer_prediction_model.pkl')
label_encoders = joblib.load('label_encoders.pkl')
le_fertilizer = joblib.load('fertilizer_label_encoder.pkl')
print("Model and encoders loaded successfully!")

Model and encoders loaded successfully!


In [49]:
def predict_fertilizer(input_data):
    """
    Predict the fertilizer based on input features.
    :param input_data: Dictionary with feature values
    :return: Predicted fertilizer name
    """
    # Convert input dictionary to DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Predict using the loaded model
    predicted_class = model.predict(input_df)[0]
    
    # Decode the predicted class to fertilizer name
    fertilizer_name = le_fertilizer.inverse_transform([predicted_class])[0]
    return fertilizer_name

In [51]:
input_data = {
    'Temperature': 30,
    'Humidity': 60,
    'Phosphorous': 15,
    # include all other features
}

input_df = pd.DataFrame([input_data])