In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load the data
data = pd.read_csv('FP.csv')

# Create separate label encoders for each categorical column
le_fertilizer = LabelEncoder()
le_crop = LabelEncoder()
le_soil = LabelEncoder()

# Encode categorical columns
data['Fertilizer'] = le_fertilizer.fit_transform(data['Fertilizer'])
data['Crop_Type'] = le_crop.fit_transform(data['Crop_Type'])
data['Soil_Type'] = le_soil.fit_transform(data['Soil_Type'])

# Prepare features and target
X = data.drop('Fertilizer', axis=1)  # All columns except 'Fertilizer Name' as features
y = data['Fertilizer']  # 'Fertilizer Name' as the target

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")


# Save the model and preprocessing objects (uncomment if needed)
joblib.dump(model, 'random_forest_fertilizer_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(le_fertilizer, 'le_fertilizer.pkl')
joblib.dump(le_crop, 'le_crop.pkl')
joblib.dump(le_soil, 'le_soil.pkl')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   T           99 non-null     int64 
 1   Hum         99 non-null     int64 
 2   Moisture    99 non-null     int64 
 3   Soil_Type   99 non-null     object
 4   Crop_Type   99 non-null     object
 5   N           99 non-null     int64 
 6   K           99 non-null     int64 
 7   P           99 non-null     int64 
 8   Fertilizer  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [15]:
import numpy as np
import pandas as pd
import joblib

# Load the saved objects
model = joblib.load('random_forest_fertilizer_model.pkl')
scaler = joblib.load('scaler.pkl')
le_fertilizer = joblib.load('le_fertilizer.pkl')
le_crop = joblib.load('le_crop.pkl')
le_soil = joblib.load('le_soil.pkl')

print("Known Soil Types:", le_soil.classes_)
print("Known Crop Types:", le_crop.classes_)

# Function to safely encode categorical variables
def safe_transform(encoder, values):
    try:
        return encoder.transform(values)
    except ValueError:
        # If unknown category, assign a new number
        max_val = len(encoder.classes_)
        return np.array([max_val if v not in encoder.classes_ else encoder.transform([v])[0] for v in values])

# Create new data for prediction
new_data = pd.DataFrame({
    'T': [11],
    'Hum': [54],  # Make sure this matches exactly with the training data
    'Moisture': [15],
    'Soil_Type': ['Black'],
    'Crop_Type': ['Barley'],
    'N': [9],
    'K': [1],
    'P': [10]
})

print("\nNew data:")
print(new_data)

# Ensure column names match those used during training
new_data.columns = new_data.columns.str.strip()

# Safely encode categorical variables in new data
new_data['Soil Type'] = safe_transform(le_soil, new_data['Soil_Type'])
new_data['Crop Type'] = safe_transform(le_crop, new_data['Crop_Type'])

# Ensure the order of columns matches the order used during training
expected_columns = ['T', 'Hum', 'Moisture', 'Soil Type', 'Crop Type', 'N', 'K', 'P']
new_data = new_data.reindex(columns=expected_columns)

# Scale the new data
new_data_scaled = scaler.transform(new_data)

# Make predictions
predictions_encoded = model.predict(new_data_scaled)

# Decode the predicted labels to get the actual fertilizer names
predictions = le_fertilizer.inverse_transform(predictions_encoded.astype(int))

# Show the predictions
print("\nPredicted Fertilizer Name:", predictions)

Known Soil Types: ['Black' 'Clayey' 'Loamy' 'Red' 'Sandy']
Known Crop Types: ['Barley' 'Cotton' 'Ground Nuts' 'Maize' 'Millets' 'Oil seeds' 'Paddy'
 'Pulses' 'Sugarcane' 'Tobacco' 'Wheat']

New data:
    T  Hum  Moisture Soil_Type Crop_Type  N  K   P
0  11   54        15     Black    Barley  9  1  10

Predicted Fertilizer Name: ['20-20']
