In [368]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from river import tree, metrics, stream
from datetime import datetime
import time
import os

In [369]:
print("="*55)
print('Step 1:DATA LOADING')
print("="*55)

Step 1:DATA LOADING


In [370]:
# Load dataset
df = pd.read_csv('Crop_recommendation.csv')

In [371]:
print("="*55)
print('Step 2:DATA EXPLORATION')
print("="*55)

Step 2:DATA EXPLORATION


In [372]:
# Display dataset info
print("Dataset shape:", df.shape)
print("\nFirst 5 rows:")
display(df.head())
print("\nLabel distribution:")
print(df['label'].value_counts())

Dataset shape: (2200, 8)

First 5 rows:


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice



Label distribution:
label
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: count, dtype: int64


In [373]:
print("="*55)
print('Step 3:DATA PREPROCESSING')
print("="*55)

Step 3:DATA PREPROCESSING


In [374]:
# Preprocessing
# Encode labels
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])
joblib.dump(le, 'label_encoder.joblib')

# Split features and target
X = df.drop(['label', 'label_encoded'], axis=1)
y = df['label_encoded']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

# Scale features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
joblib.dump(scaler, 'scaler.joblib')
print("Data preprocessing complete!")
print(f"Training set: {X_train_scaled.shape}, Test set: {X_test_scaled.shape}")


Data preprocessing complete!
Training set: (1760, 7), Test set: (440, 7)


In [375]:
print("="*55)
print('Step 4:MODEL TRAINING')
print("="*55)

Step 4:MODEL TRAINING


In [376]:
# Model Training
# Initialize and train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Save initial model
joblib.dump(model, 'initial_model.joblib')
joblib.dump(X_train.columns.tolist(),'original_columns.joblib')

# Evaluate model
y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)

print(f"Model trained successfully! Accuracy: {acc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Create historical dataset
df.to_csv('sensor_history.csv', index=False)
print("Historical dataset saved for continuous learning")


Model trained successfully! Accuracy: 0.9886

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        18
   blackgram       1.00      0.94      0.97        17
    chickpea       1.00      1.00      1.00        22
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        21
      cotton       1.00      1.00      1.00        19
      grapes       1.00      1.00      1.00        25
        jute       0.86      1.00      0.92        18
 kidneybeans       1.00      1.00      1.00        21
      lentil       0.94      1.00      0.97        17
       maize       0.94      1.00      0.97        15
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.93      0.97        15
    mungbean       1.00      1.00      1.00        18
   muskmelon       1.00      1.00      1.00        17
      orange

In [377]:
print("="*55)
print('Step 5:CONTINUOUS LEARNING SYSTEM SETUPS')
print("="*55)

Step 5:CONTINUOUS LEARNING SYSTEM SETUPS


In [378]:
print("="*55)
print('Step 5:INITIALIZE LEARNING SYSTEM')
print("="*55)

Step 5:INITIALIZE LEARNING SYSTEM


In [379]:
from warnings import filterwarnings
filterwarnings('ignore')  # Temporarily suppress warnings for cleaner output
from river import tree, metrics, stream

class ContinuousLearningSystem:
    def __init__(self):
        # Load preprocessing objects
        self.scaler = joblib.load('scaler.joblib')
        self.label_encoder = joblib.load('label_encoder.joblib')
        
        # Initialize online model
        self.online_model = tree.HoeffdingTreeClassifier()
        self.metric = metrics.Accuracy()
        self.last_retrain = datetime.now()
        
        # Initialize with historical data if available
        if os.path.exists('sensor_history.csv'):
            self.initialize_with_historical_data()
    
    def initialize_with_historical_data(self):
        """Load historical data and train model incrementally"""
        df_hist = pd.read_csv('sensor_history.csv')
        X_hist = df_hist.drop(['label', 'label_encoded'], axis=1, errors='ignore')
        y_hist = df_hist['label_encoded'] if 'label_encoded' in df_hist else self.label_encoder.transform(df_hist['label'])
        
        # Train online model incrementally
        for i, (xi, yi) in enumerate(stream.iter_pandas(X_hist, y_hist)):
            # Ensure xi is in correct format (dict)
            if not isinstance(xi, dict):
                xi = xi.to_dict() if hasattr(xi, 'to_dict') else dict(zip(X_hist.columns, xi))
            
            xi_scaled = self._scale_features(xi)
            self.online_model.learn_one(xi_scaled, yi)
            
            if i % 100 == 0:
                print(f"Processed {i+1} historical samples")
        
        print(f"Online model initialized with {len(X_hist)} historical samples")
    
    def _scale_features(self, features):
        """Convert features to dict and scale them"""
        # Ensure features is a dictionary
        if not isinstance(features, dict):
            if hasattr(features, 'to_dict'):
                features = features.to_dict()
            else:
                features = dict(zip(self.scaler.feature_names_in_, features))
        
        # Scale features and return as dict
        scaled_values = self.scaler.transform([[features[col] for col in self.scaler.feature_names_in_]])[0]
        return {col: scaled_values[i] for i, col in enumerate(self.scaler.feature_names_in_)}
    
    def predict(self, sensor_data):
        """Make prediction based on sensor input"""
        scaled_features = self._scale_features(sensor_data)
        prediction = self.online_model.predict_one(scaled_features)
        return self.label_encoder.inverse_transform([prediction])[0]
    
    def update_model(self, sensor_data, actual_crop):
        """Update model with new labeled data"""
        # Encode crop label
        actual_label = self.label_encoder.transform([actual_crop])[0]
        
        # Scale features
        scaled_features = self._scale_features(sensor_data)
        
        # Update model
        self.online_model.learn_one(scaled_features, actual_label)
        
        # Update history
        self._update_history(sensor_data, actual_crop, actual_label)
        
        # Periodic retraining
        if (datetime.now() - self.last_retrain).days >= 7:
            self.full_retrain()
            self.last_retrain = datetime.now()
    
    def _update_history(self, sensor_data, actual_crop, actual_label):
        """Append new data to historical dataset"""
        # Convert to dictionary format
        if isinstance(sensor_data, dict):
            new_entry = sensor_data.copy()
        elif hasattr(sensor_data, 'to_dict'):
            new_entry = sensor_data.to_dict()
        else:
            new_entry = dict(zip(self.scaler.feature_names_in_, sensor_data))
            
        new_entry.update({
            'label': actual_crop,
            'label_encoded': actual_label
        })
        
        # Append to history
        if os.path.exists('sensor_history.csv'):
            hist_df = pd.read_csv('sensor_history.csv')
            updated_df = pd.concat([hist_df, pd.DataFrame([new_entry])], ignore_index=True)
        else:
            updated_df = pd.DataFrame([new_entry])
        
        updated_df.to_csv('sensor_history.csv', index=False)
    
    def full_retrain(self):
        """Periodic full retraining with all historical data"""
        print("Starting full retraining...")
        df_hist = pd.read_csv('sensor_history.csv')
        X_hist = df_hist.drop(['label', 'label_encoded'], axis=1)
        y_hist = df_hist['label_encoded']
        
        # Reinitialize model
        self.online_model = tree.HoeffdingTreeClassifier()
        
        # Retrain with all data
        for xi, yi in stream.iter_pandas(X_hist, y_hist):
            # Ensure xi is in correct format (dict)
            if not isinstance(xi, dict):
                xi = xi.to_dict() if hasattr(xi, 'to_dict') else dict(zip(X_hist.columns, xi))
            
            xi_scaled = self._scale_features(xi)
            self.online_model.learn_one(xi_scaled, yi)
        
        print(f"Model retrained with {len(X_hist)} samples")

In [380]:
cl_system = ContinuousLearningSystem()
print("Continuous learning system initialized!")

Processed 1 historical samples
Processed 101 historical samples
Processed 201 historical samples
Processed 301 historical samples
Processed 401 historical samples
Processed 501 historical samples
Processed 601 historical samples
Processed 701 historical samples
Processed 801 historical samples
Processed 901 historical samples
Processed 1001 historical samples
Processed 1101 historical samples
Processed 1201 historical samples
Processed 1301 historical samples
Processed 1401 historical samples
Processed 1501 historical samples
Processed 1601 historical samples
Processed 1701 historical samples
Processed 1801 historical samples
Processed 1901 historical samples
Processed 2001 historical samples
Processed 2101 historical samples
Online model initialized with 2200 historical samples
Continuous learning system initialized!


In [381]:
class VirtualIoTDevice:
    def __init__(self, learning_system):
        self.learning_system = learning_system
        self.sensors = {
            'N': np.random.randint(0, 140),
            'P': np.random.randint(5, 145),
            'K': np.random.randint(5, 205),
            'temperature': np.random.uniform(8, 44),
            'humidity': np.random.uniform(14, 99),
            'ph': np.random.uniform(3.5, 9.9),
            'rainfall': np.random.uniform(20, 300)
        }
    
    def read_sensors(self):
        """Simulate sensor readings with small random variations"""
        new_readings = {}
        for key, value in self.sensors.items():
            # Add ±5% variation to simulate real readings
            variation = np.random.uniform(-0.05, 0.05)
            new_value = max(0, value * (1 + variation))
            new_readings[key] = round(new_value, 2)
        return new_readings
    
    def display_recommendation(self, recommendation):
        """Display recommendation nicely"""
        print("\n" + "="*50)
        print(f"Recommended crop: {recommendation}")
        print("="*50)
    
    def run(self, days=30):
        """Simulate device operation over time"""
        for day in range(1, days+1):
            print(f"\n{'='*30}")
            print(f"DAY {day} - MORNING READING")
            print(f"{'='*30}")
            
            # Morning reading
            sensor_data = self.read_sensors()
            print("Sensor Readings:")
            for k, v in sensor_data.items():
                print(f"- {k}: {v}")
            
            # Get recommendation
            recommendation = self.learning_system.predict(sensor_data)
            self.display_recommendation(recommendation)
            
            # Simulate farmer feedback (once every 3 days)
            if day % 3 == 0:
                print("\nFARMER FEEDBACK RECEIVED")
                # In real system, this would come from farmer input
                # Here we'll use a simple simulation
                actual_crop = 'rice'  # 
                #Simulated farmer feedback
                print(f"Farmer planted: {actual_crop}")
                # Update model
                self.learning_system.update_model(sensor_data, actual_crop)
                print("Model updated with new data!")
            # Wait until next day
            time.sleep(1)  # Simulate daily interval



In [397]:
print("="*55)
print('Step 6:MAIN DEPLOYMENT')
print("="*55)

Step 6:MAIN DEPLOYMENT


In [399]:
def main():
    # Initialize system
    cl_system = ContinuousLearningSystem()
    device = VirtualIoTDevice(cl_system)
    
    print("\n" + "="*50)
    print("IoT CROP RECOMMENDATION SYSTEM ACTIVATED")
    print("Initial Model Loaded | Continuous Learning Active")
    print("="*50)
    
    # Run simulation
    try:
        device.run(days=7)  # Simulate 7 days
    except KeyboardInterrupt:
        print("\nSystem interrupted by user")
    
    # Save final state
    joblib.dump(cl_system.online_model, 'online_model.joblib')
    print("\nSystem shutdown. Model saved for next session.")


In [401]:
print("="*55)
print('Step 7:RUN THE SYSTEM')
print("="*55)

Step 7:RUN THE SYSTEM


In [407]:
if __name__ == "__main__":
    main()
    """Scale sensor features using the fitted scaler"""
        # Convert to 2D array for scaler

Processed 1 historical samples
Processed 101 historical samples
Processed 201 historical samples
Processed 301 historical samples
Processed 401 historical samples
Processed 501 historical samples
Processed 601 historical samples
Processed 701 historical samples
Processed 801 historical samples
Processed 901 historical samples
Processed 1001 historical samples
Processed 1101 historical samples
Processed 1201 historical samples
Processed 1301 historical samples
Processed 1401 historical samples
Processed 1501 historical samples
Processed 1601 historical samples
Processed 1701 historical samples
Processed 1801 historical samples
Processed 1901 historical samples
Processed 2001 historical samples
Processed 2101 historical samples
Online model initialized with 2200 historical samples

IoT CROP RECOMMENDATION SYSTEM ACTIVATED
Initial Model Loaded | Continuous Learning Active

DAY 1 - MORNING READING
Sensor Readings:
- N: 81.6
- P: 116.93
- K: 97.39
- temperature: 37.39
- humidity: 81.21
- ph