# Puviyan Soil Detection - Realistic Training v5.0

GPU-accelerated training with **realistic soil colors** and multiple dataset options:
- **Synthetic Data**: Generated with accurate soil colors
- **Real Data**: Upload your own dataset
- **Hybrid**: Combination of both

**Setup:** Runtime > Change runtime type > GPU

In [None]:
# Setup Environment
print('Setting up Puviyan Soil Detection Training v5.0 - Realistic Colors...')
!pip install -q tensorflow matplotlib numpy tqdm pillow scikit-learn

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from google.colab import files
import json
import os
import zipfile
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Soil type labels
SOIL_LABELS = [
    "Alluvial Soil",
    "Black Soil", 
    "Red Soil",
    "Laterite Soil",
    "Desert Soil",
    "Saline/Alkaline Soil",
    "Peaty/Marshy Soil",
    "Forest/Hill Soil"
]

# Check GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f'✅ GPU Available: {gpus[0]}')
    tf.config.experimental.set_memory_growth(gpus[0], True)
else:
    print('⚠️ No GPU detected - Training will be slow')

print(f'📊 Soil types: {len(SOIL_LABELS)}')
print('🚀 Environment ready!')

In [None]:
# Realistic soil data generation
def generate_realistic_synthetic_data(num_samples=1000):
    """Generate synthetic soil images with realistic colors based on actual soil types"""
    print(f'🎨 Generating {num_samples} realistic synthetic soil samples...')
    
    # Realistic soil colors based on actual soil types
    realistic_soil_colors = {
        0: {  # Alluvial Soil - Rich brown, fertile
            'base': [101, 67, 33],
            'variations': [[120, 85, 50], [85, 55, 25], [110, 75, 40]]
        },
        1: {  # Black Soil - Dark brown to black, clay-rich
            'base': [45, 35, 25],
            'variations': [[60, 45, 30], [30, 25, 15], [55, 40, 25]]
        },
        2: {  # Red Soil - Reddish brown, iron-rich
            'base': [139, 69, 19],
            'variations': [[160, 82, 35], [120, 55, 15], [145, 75, 25]]
        },
        3: {  # Laterite Soil - Reddish yellow, weathered
            'base': [184, 134, 11],
            'variations': [[200, 150, 30], [165, 115, 5], [190, 140, 20]]
        },
        4: {  # Desert Soil - Light brown, sandy
            'base': [194, 178, 128],
            'variations': [[210, 195, 145], [180, 160, 110], [200, 185, 135]]
        },
        5: {  # Saline/Alkaline Soil - Grayish white, salt deposits
            'base': [169, 169, 169],
            'variations': [[190, 190, 190], [150, 150, 150], [180, 180, 180]]
        },
        6: {  # Peaty/Marshy Soil - Dark brown, organic matter
            'base': [64, 64, 64],
            'variations': [[80, 80, 80], [50, 50, 50], [70, 70, 70]]
        },
        7: {  # Forest/Hill Soil - Dark brown with organic matter
            'base': [83, 53, 10],
            'variations': [[100, 70, 25], [65, 40, 5], [90, 60, 15]]
        }
    }
    
    X = np.zeros((num_samples, 224, 224, 3), dtype=np.uint8)
    y = np.zeros(num_samples, dtype=np.int32)
    
    for i in range(num_samples):
        # Assign soil type (balanced distribution)
        soil_type = i % 8
        y[i] = soil_type
        
        # Get soil color info
        soil_info = realistic_soil_colors[soil_type]
        base_color = soil_info['base']
        variations = soil_info['variations']
        
        # Create base image
        img = np.full((224, 224, 3), base_color, dtype=np.uint8)
        
        # Add realistic texture variations
        # 1. Add subtle color variations
        for _ in range(np.random.randint(20, 50)):
            x_pos = np.random.randint(0, 224)
            y_pos = np.random.randint(0, 224)
            size = np.random.randint(5, 15)
            
            # Choose a variation color
            var_color = variations[np.random.randint(0, len(variations))]
            
            # Apply color patch
            y_start = max(0, y_pos - size)
            y_end = min(224, y_pos + size)
            x_start = max(0, x_pos - size)
            x_end = min(224, x_pos + size)
            
            img[y_start:y_end, x_start:x_end] = var_color
        
        # 2. Add noise for natural texture
        noise = np.random.normal(0, 15, (224, 224, 3))
        img = np.clip(img.astype(np.float32) + noise, 0, 255).astype(np.uint8)
        
        # 3. Add soil-specific features
        if soil_type == 1:  # Black soil - add clay-like smooth areas
            for _ in range(5):
                x_pos = np.random.randint(20, 204)
                y_pos = np.random.randint(20, 204)
                size = np.random.randint(15, 30)
                # Smooth dark patches
                img[y_pos-size:y_pos+size, x_pos-size:x_pos+size] = [35, 25, 15]
                
        elif soil_type == 4:  # Desert soil - add sandy particles
            for _ in range(100):
                x_pos = np.random.randint(0, 224)
                y_pos = np.random.randint(0, 224)
                # Small light particles
                if x_pos < 222 and y_pos < 222:
                    img[y_pos:y_pos+2, x_pos:x_pos+2] = [220, 210, 180]
                    
        elif soil_type == 5:  # Saline soil - add white salt crystals
            for _ in range(30):
                x_pos = np.random.randint(0, 220)
                y_pos = np.random.randint(0, 220)
                size = np.random.randint(2, 6)
                # White salt deposits
                img[y_pos:y_pos+size, x_pos:x_pos+size] = [255, 255, 255]
        
        X[i] = img
    
    print(f'✅ Generated {num_samples} realistic synthetic samples')
    return X, y

def load_real_data():
    """Load real soil images from uploaded zip file"""
    print('📤 Please upload your soil dataset zip file')
    print('Expected structure: soil_dataset.zip containing folders for each soil type')
    
    uploaded = files.upload()
    
    if not uploaded:
        print('⚠️ No files uploaded')
        return None, None
    
    zip_name = list(uploaded.keys())[0]
    print(f'📂 Extracting {zip_name}...')
    
    # Extract zip file
    with zipfile.ZipFile(zip_name, 'r') as zip_ref:
        zip_ref.extractall('dataset')
    
    # Load images
    X_list = []
    y_list = []
    
    dataset_path = 'dataset'
    
    # Find all image files
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                file_path = os.path.join(root, file)
                
                # Try to determine soil type from folder name
                folder_name = os.path.basename(root).lower()
                
                # Map folder names to soil types
                soil_type = 0  # Default to first type
                for i, label in enumerate(SOIL_LABELS):
                    if any(word in folder_name for word in label.lower().split()):
                        soil_type = i
                        break
                
                try:
                    # Load and resize image
                    img = Image.open(file_path)
                    img = img.convert('RGB')
                    img = img.resize((224, 224))
                    img_array = np.array(img)
                    
                    X_list.append(img_array)
                    y_list.append(soil_type)
                except Exception as e:
                    print(f'⚠️ Error loading {file_path}: {e}')
    
    if not X_list:
        print('❌ No valid images found')
        return None, None
    
    X = np.array(X_list)
    y = np.array(y_list)
    
    print(f'✅ Loaded {len(X)} real images')
    print(f'📊 Class distribution: {np.bincount(y)}')
    
    return X, y

print('📋 Realistic data loading functions ready!')

In [None]:
# Dataset selection
print('📊 Select your training dataset:')
print('1. Realistic synthetic data (improved colors)')
print('2. Real data only (upload your dataset)')
print('3. Hybrid (realistic synthetic + real data)')

choice = input('Enter your choice (1-3): ').strip()

X_train = None
y_train = None
X_val = None
y_val = None

if choice == '1':
    print('🎨 Using realistic synthetic data')
    X_all, y_all = generate_realistic_synthetic_data(5000)
    
elif choice == '2':
    print('📂 Using real data only')
    X_all, y_all = load_real_data()
    
    if X_all is None:
        print('⚠️ Falling back to realistic synthetic data')
        X_all, y_all = generate_realistic_synthetic_data(5000)
        
elif choice == '3':
    print('🔄 Using hybrid data (realistic synthetic + real)')
    
    # Generate realistic synthetic data
    X_syn, y_syn = generate_realistic_synthetic_data(2500)
    
    # Load real data
    X_real, y_real = load_real_data()
    
    if X_real is not None:
        # Combine datasets
        X_all = np.concatenate([X_syn, X_real], axis=0)
        y_all = np.concatenate([y_syn, y_real], axis=0)
        print(f'✅ Combined dataset: {len(X_all)} samples')
    else:
        print('⚠️ Using realistic synthetic data only')
        X_all, y_all = X_syn, y_syn
        
else:
    print('❌ Invalid choice, using realistic synthetic data')
    X_all, y_all = generate_realistic_synthetic_data(5000)

# Split data
X_train, X_val, y_train, y_val = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

print(f'📊 Final dataset:')
print(f'   Training: {X_train.shape[0]} samples')
print(f'   Validation: {X_val.shape[0]} samples')
print(f'   Classes: {len(np.unique(y_all))}')

# Show sample images with realistic colors
plt.figure(figsize=(16, 4))
for i in range(8):
    plt.subplot(2, 4, i+1)
    # Find first sample of each class
    idx = np.where(y_train == i)[0]
    if len(idx) > 0:
        plt.imshow(X_train[idx[0]])
        plt.title(SOIL_LABELS[i], fontsize=10, pad=5)
    plt.axis('off')
plt.suptitle('Realistic Soil Sample Images', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

print('🎨 Notice the more realistic soil colors!')

In [None]:
# Create enhanced model
def create_enhanced_model():
    model = keras.Sequential([
        # Input preprocessing
        layers.Rescaling(1./255, input_shape=(224, 224, 3)),
        
        # Data augmentation (only during training)
        layers.RandomFlip('horizontal'),
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.1),
        layers.RandomBrightness(0.1),
        
        # Feature extraction
        layers.Conv2D(32, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2),
        
        layers.SeparableConv2D(64, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2),
        
        layers.SeparableConv2D(128, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(2),
        
        layers.SeparableConv2D(256, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        
        # Classification head
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(8, activation='softmax')
    ])
    
    return model

model = create_enhanced_model()
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print('🏗️ Enhanced model created!')
model.summary()

In [None]:
# Train model
print('🚀 Starting training with realistic soil colors...')

# Callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        verbose=1
    )
]

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

print('✅ Training completed!')

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Convert to TFLite
print('🔄 Converting to TFLite...')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save TFLite model
model_filename = 'puviyan_soil_model_realistic.tflite'
with open(model_filename, 'wb') as f:
    f.write(tflite_model)

# Create labels JSON file for Flutter
labels_data = {
    "labels": SOIL_LABELS,
    "model_info": {
        "input_size": 224,
        "num_classes": 8,
        "model_type": "soil_classification",
        "version": "5.0_realistic",
        "dataset_type": choice,
        "features": "realistic_soil_colors"
    }
}

labels_filename = 'labels.json'
with open(labels_filename, 'w') as f:
    json.dump(labels_data, f, indent=2)

print(f'✅ Model saved as {model_filename}')
print(f'📏 Model size: {len(tflite_model) / 1024:.1f} KB')
print(f'📋 Labels saved as {labels_filename}')

# Download both files
files.download(model_filename)
files.download(labels_filename)

print('🎉 Training complete with realistic soil colors!')
print('📱 Ready for Flutter integration!')