In [23]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load the data
data = pd.read_csv('cp.csv')
data.info()
# Create separate label encoders for each categorical column
le_crop = LabelEncoder()


# Encode categorical columns
data['label'] = le_crop.fit_transform(data['label'])

# # Prepare features and target
X = data.drop('label', axis=1)  # All columns except 'label' as features
y = data['label']  # 'label' as the target

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")


# # Save the model and preprocessing objects (uncomment if needed)
joblib.dump(model, 'random_forest_model.pkl')
joblib.dump(scaler, 'scaler1.pkl')
joblib.dump(le_crop, 'crop.pkl')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB
Mean Squared Error: 2.40
R-squared: 0.94


['crop.pkl']

In [21]:
import numpy as np
import pandas as pd
import joblib

# Load the model and preprocessing objects
model = joblib.load('random_forest_model.pkl')
scaler = joblib.load('scaler.pkl')
le_crop = joblib.load('crop.pkl')

# Example test data (replace with your actual test data)
test_data = pd.DataFrame({
    'N': [2],  # Example value for Nitrogen
    'P': [3],  # Example value for Phosphorus
    'K': [4],  # Example value for Potassium
    'temperature': [25.0],  # Example value for Temperature
    'humidity': [60.0],     # Example value for Humidity
    'ph': [6.5],            # Example value for pH
    'rainfall': [100]       # Example value for Rainfall
})

# Preprocess the test data
X_test = test_data

# Scale the test data
X_test_scaled = scaler.transform(X_test)

# Make predictions
predictions = model.predict(X_test_scaled)

# Convert numerical predictions back to labels
# Assuming predictions are integer encoded labels
predicted_labels = le_crop.inverse_transform(predictions.astype(int))

# Print the predictions in terms of strings
print("Predicted Labels:", predicted_labels)


Predicted Labels: ['mothbeans']
