In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [3]:
data = pd.read_csv('Crop_recommendation.csv')
data.head(5)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [7]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [9]:
data.duplicated().sum()

0

In [11]:
data= pd.DataFrame(data)

In [13]:
X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = data['label']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: Standard scaling for numerical features
scaler = StandardScaler()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', scaler, ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])
    ]
)

# Build the pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Preprocessing step
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))  # RandomForestClassifier
])

In [15]:
model_pipeline.fit(X_train, y_train)

In [17]:
accuracy = model_pipeline.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 99.32%


In [19]:
joblib.dump(model_pipeline, 'crop_recommendation_model.pkl')

print("Model and pipeline saved successfully!")

Model and pipeline saved successfully!


In [25]:
import pandas as pd
import joblib

def predict_crop(N, P, K, temperature, humidity, ph, rainfall):

    # Load the saved model pipeline
    model_pipeline = joblib.load('crop_recommendation_model.pkl')
    
    # Create a DataFrame for the input
    input_data = pd.DataFrame([[N, P, K, temperature, humidity, ph, rainfall]],
                              columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])
    
    # Predict the label
    predicted_label = model_pipeline.predict(input_data)
    
    return predicted_label[0]


In [27]:
# Example input
N = 85
P = 58
K = 41
temperature = 21.5
humidity = 80.3
ph = 6.9
rainfall = 230

# Predict the crop
predicted_crop = predict_crop(N, P, K, temperature, humidity, ph, rainfall)
print(f"Predicted Crop: {predicted_crop}")


Predicted Crop: rice
