In [82]:
from sklearn.ensemble import RandomForestClassifier

class CropPredictor:
    def __init__(self, crop_data):
        self.crop_data = crop_data
        self.label_encoders = {}

    def clean_value(self, value):
        if isinstance(value, str):
            if any(x in value.lower() for x in ['loamy', 'granular', 'sandy', 'reddish', 'dark', 'blocky']):
                return np.nan
            value = re.sub(r'[^0-9%–°.]', '', value)

            if '%' in value:
                return float(value.replace('%', '').replace('–', '').strip()) / 100
            elif '°' in value:
                return float(value.replace('°', '').strip())
            else:
                numbers = re.findall(r'\d+\.?\d*', value)
                if len(numbers) == 2:
                    return sum(map(float, numbers)) / 2
                if numbers:
                    return float(numbers[0])
        return np.nan

    def prepare_data_for_ml(self):
        X = []
        y = []
        categorical_columns = ['Soil Type', 'Soil Color', 'Soil Structure']

        for col in categorical_columns:
            if col not in self.label_encoders:
                self.label_encoders[col] = LabelEncoder()

        for col in categorical_columns:
            unique_values = set()
            for _, row in self.crop_data.iterrows():
                if col in row:
                    unique_values.add(row[col])
            self.label_encoders[col].fit(list(unique_values))

        for _, row in self.crop_data.iterrows():
            X_row = []
            for prop, value in row.items():
                if prop == 'Crop':
                    y.append(value)
                elif isinstance(value, dict):
                    X_row.append(value['min'])
                    X_row.append(value['max'])
                elif prop in categorical_columns:
                    encoded_value = self.label_encoders[prop].transform([value])[0]
                    X_row.append(encoded_value)
                else:
                    cleaned_value = self.clean_value(value)
                    X_row.append(cleaned_value)
            X.append(X_row)
        return pd.DataFrame(X), y

    def train_model(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = RandomForestClassifier(n_estimators=100, random_state=42)  # Random Forest model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        print(f"Model Accuracy: {accuracy:.2f}")
        return model

    def predict(self, model, input_data):
        input_data_cleaned = [self.clean_value(value) for value in input_data]
        prediction = model.predict([input_data_cleaned])
        return prediction


# # Usage Example with More Data

# crop_data = pd.DataFrame({
#     'Soil Type': ['Loamy', 'Sandy', 'Clayey', 'Loamy', 'Sandy'],
#     'Soil Color': ['Dark Brown', 'Light Brown', 'Reddish-Brown', 'Dark Brown', 'Light Brown'],
#     'Soil Structure': ['Granular', 'Loose', 'Blocky', 'Granular', 'Loose'],
#     'Temperature': ['30°', '25°', '28°', '32°', '27°'],
#     'Moisture': ['10%', '15%', '12%', '8%', '11%'],
#     'Crop': ['Rice', 'Wheat', 'Maize', 'Rice', 'Wheat']
# })>

# Initialize CropPredictor
crop_predictor = CropPredictor(crop_data)

# Prepare data for machine learning
X, y = crop_predictor.prepare_data_for_ml()

# Train the model
model = crop_predictor.train_model(X, y)

# Predict a crop based on input data
input_data = ['Sandy', 'Light Brown', 'Loose', '28°', '12%']
predicted_crop = crop_predictor.predict(model, input_data)

# Print the predicted crop
print(f"Predicted Crop: {predicted_crop}")


Model Accuracy: 1.00
Predicted Crop: ['Rice']
