In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


file_path = '/content/drive/MyDrive/crop_disease_risk_dataset.csv'
df = pd.read_csv(file_path)

# Clean up column names
df.columns = df.columns.str.strip()

# Separate features (X) and target (y)
X = df.drop('Disease_Risk', axis=1)
y = df['Disease_Risk']

# Identify categorical and numerical columns
categorical_features = ['Crop', 'Location', 'Sowing_Season']
numerical_features = ['Temperature_Max_C', 'Humidity_Avg_%', 'Rainfall_mm']

# Create a preprocessor with OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Apply the preprocessing
X_processed = preprocessor.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)

In [3]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

# Train the model
model.fit(X_train, y_train)

print("Random Forest model trained successfully!")

Random Forest model trained successfully!


In [4]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Display a more detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Model Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

         Low       1.00      1.00      1.00     90378
      Medium       1.00      1.00      1.00      4230

    accuracy                           1.00     94608
   macro avg       1.00      1.00      1.00     94608
weighted avg       1.00      1.00      1.00     94608



In [6]:
import joblib
import os

# Define the directory to save the models
save_path = '/content/drive/MyDrive/models/'

# Check if the directory exists and create it if not
if not os.path.exists(save_path):
    os.makedirs(save_path)

# Save the trained model and the preprocessor
joblib.dump(model, os.path.join(save_path, 'pest_advisory_model.joblib'))
joblib.dump(preprocessor, os.path.join(save_path, 'data_preprocessor_pest.joblib'))

print("Model and preprocessor saved successfully.")

Model and preprocessor saved successfully.
