In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load the CSV file
data = pd.read_csv('/content/crop_fertilizers.csv')

# Print column names and a sample of the data
print("Column names:", data.columns)
print(data.head())

# Strip any extra spaces from column names and correct typos
data.columns = [col.strip().replace('Temparature', 'Temperature') for col in data.columns]

# Verify column names
required_columns = ['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name']
missing_columns = [col for col in required_columns if col not in data.columns]
if missing_columns:
    raise KeyError(f"Missing columns: {missing_columns}")

# Encode categorical features
label_encoders = {}
for column in ['Soil Type', 'Crop Type', 'Fertilizer Name']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Define features and target
X = data[['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous']]
y = data['Fertilizer Name']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred) * 100

# Mean Absolute Error (MAE) and Root Mean Square Error (RMSE) for each instance
errors = np.abs(y_test.values - y_pred)
squared_errors = (y_test.values - y_pred) ** 2

# Compute MAE and RMSE for each instance
mae_per_instance = errors
rmse_per_instance = np.sqrt(squared_errors)

# Compute accuracy for each instance (1 if correct, 0 if incorrect)
accuracy_per_instance = [1 if pred == actual else 0 for pred, actual in zip(y_pred, y_test)]

# Display the results
print(f'Accuracy: {accuracy:.2f}%')
print(f'Mean Absolute Error (MAE): {np.mean(mae_per_instance):.2f}')
print(f'Root Mean Square Error (RMSE): {np.sqrt(np.mean(squared_errors)):.2f}')

# Prepare Results for 10 Instances
results = pd.DataFrame({
    'Instance': range(1, len(y_test) + 1),
    'Predicted Fertilizer': y_pred,
    'Actual Fertilizer': y_test.values,
    'MAE': mae_per_instance,
    'RMSE': rmse_per_instance,
    'Accuracy': accuracy_per_instance
})

# Display first 10 instances
print(results.head(10))

Column names: Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')
   Temparature  Humidity   Moisture Soil Type  Crop Type  Nitrogen  Potassium  \
0           26         52        38     Sandy      Maize        37          0   
1           29         52        45     Loamy  Sugarcane        12          0   
2           34         65        62     Black     Cotton         7          9   
3           32         62        34       Red    Tobacco        22          0   
4           28         54        46    Clayey      Paddy        35          0   

   Phosphorous Fertilizer Name  
0            0            Urea  
1           36             DAP  
2           30        14-35-14  
3           20           28-28  
4            0            Urea  
Accuracy: 97.56%
Mean Absolute Error (MAE): 0.05
Root Mean Square Error (RMSE): 0.31
   Instance  Predicted Fertilizer  Actual Fertilizer  

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load the CSV file
data = pd.read_csv('/content/crop_fertilizers.csv')

# Print column names and a sample of the data
print("Column names:", data.columns)
print(data.head())

# Strip any extra spaces from column names and correct typos
data.columns = [col.strip().replace('Temparature', 'Temperature') for col in data.columns]

# Verify column names
required_columns = ['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name']
missing_columns = [col for col in required_columns if col not in data.columns]
if missing_columns:
    raise KeyError(f"Missing columns: {missing_columns}")

# Encode categorical features
label_encoders = {}
for column in ['Soil Type', 'Crop Type', 'Fertilizer Name']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Define features and target
X = data[['Temperature', 'Humidity', 'Moisture', 'Soil Type', 'Crop Type', 'Nitrogen', 'Potassium', 'Phosphorous']]
y = data['Fertilizer Name']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Decode the numeric predictions and actual values back to original categorical values
y_test_labels = label_encoders['Fertilizer Name'].inverse_transform(y_test)
y_pred_labels = label_encoders['Fertilizer Name'].inverse_transform(y_pred)

# Accuracy
accuracy = accuracy_score(y_test, y_pred) * 100

# Mean Absolute Error (MAE) and Root Mean Square Error (RMSE) for each instance
errors = np.abs(y_test.values - y_pred)
squared_errors = (y_test.values - y_pred) ** 2

# Compute MAE and RMSE for each instance
mae_per_instance = errors
rmse_per_instance = np.sqrt(squared_errors)

# Compute accuracy for each instance (1 if correct, 0 if incorrect)
accuracy_per_instance = [1 if pred == actual else 0 for pred, actual in zip(y_pred_labels, y_test_labels)]

# Display the results
print(f'Accuracy: {accuracy:.2f}%')
print(f'Mean Absolute Error (MAE): {np.mean(mae_per_instance):.2f}')
print(f'Root Mean Square Error (RMSE): {np.sqrt(np.mean(squared_errors)):.2f}')

# Prepare Results for 10 Instances
results = pd.DataFrame({
    'Instance': range(1, len(y_test) + 1),
    'Predicted Fertilizer': y_pred_labels,
    'Actual Fertilizer': y_test_labels,
    'MAE': mae_per_instance,
    'RMSE': rmse_per_instance,
    'Accuracy': accuracy_per_instance
})

# Display first 10 instances
print(results.head(10))


Column names: Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')
   Temparature  Humidity   Moisture Soil Type  Crop Type  Nitrogen  Potassium  \
0           26         52        38     Sandy      Maize        37          0   
1           29         52        45     Loamy  Sugarcane        12          0   
2           34         65        62     Black     Cotton         7          9   
3           32         62        34       Red    Tobacco        22          0   
4           28         54        46    Clayey      Paddy        35          0   

   Phosphorous Fertilizer Name  
0            0            Urea  
1           36             DAP  
2           30        14-35-14  
3           20           28-28  
4            0            Urea  
Accuracy: 97.56%
Mean Absolute Error (MAE): 0.05
Root Mean Square Error (RMSE): 0.31
   Instance Predicted Fertilizer Actual Fertilizer  MA