# BaggageLens - Siamese Network Training (Google Colab)

This notebook trains the CNN + Siamese network for luggage image matching.

**Steps:**
1. Install dependencies
2. Prepare dataset
3. Train model with GPU
4. Save model
5. Download model files

## Step 1: Install Dependencies

In [None]:
!pip install tensorflow==2.14.0 numpy opencv-python Pillow scikit-learn scipy -q
print("âœ… Dependencies installed")

## Step 2: Import Libraries

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import cv2
from pathlib import Path
import os
from sklearn.model_selection import train_test_split

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

## Step 3: Define CNN Encoder

In [None]:
def create_cnn_encoder(input_shape=(256, 256, 3)):
    """Create CNN encoder for feature extraction"""
    inputs = keras.Input(shape=input_shape)
    
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(256, activation='relu')(x)
    
    return keras.Model(inputs, x, name='encoder')

print("âœ… CNN encoder defined")

## Step 4: Define L2 Distance Layer

In [None]:
class L2Distance(layers.Layer):
    """Custom layer for Euclidean distance"""
    def call(self, x):
        x1, x2 = x
        return tf.math.sqrt(tf.reduce_sum(tf.square(x1 - x2), axis=1, keepdims=True))

print("âœ… L2Distance layer defined")

## Step 5: Create Siamese Network

In [None]:
def create_siamese_network(input_shape=(256, 256, 3)):
    """Create full Siamese network for image comparison"""
    encoder = create_cnn_encoder(input_shape)
    
    # Input layers for two images
    input_1 = keras.Input(shape=input_shape)
    input_2 = keras.Input(shape=input_shape)
    
    # Encode both images
    encoded_1 = encoder(input_1)
    encoded_2 = encoder(input_2)
    
    # Calculate distance
    distance = L2Distance()([encoded_1, encoded_2])
    
    # Normalize distance to 0-1 range (similarity)
    similarity = layers.Lambda(lambda x: 1 / (1 + x))(distance)
    
    return keras.Model([input_1, input_2], similarity, name='siamese_network')

model = create_siamese_network()
model.summary()

## Step 6: Generate Sample Training Data

**For production:** Upload your dataset folder with lost/ and found/ subdirectories

In [None]:
# Create sample training data for demo
IMAGE_SIZE = (256, 256)

# Generate synthetic training pairs
def generate_sample_data(num_samples=100):
    """Generate synthetic image pairs for training"""
    X1 = np.random.rand(num_samples, 256, 256, 3).astype(np.float32)
    X2 = np.random.rand(num_samples, 256, 256, 3).astype(np.float32)
    
    # Create labels: 1 for similar pairs, 0 for dissimilar
    y = np.random.randint(0, 2, num_samples).astype(np.float32)
    
    # For similar pairs, make images more similar
    for i in range(num_samples):
        if y[i] == 1:
            X2[i] = X1[i] + np.random.rand(256, 256, 3) * 0.1  # Add small noise
    
    return X1, X2, y

print("Generating sample training data...")
X1_train, X2_train, y_train = generate_sample_data(500)
X1_val, X2_val, y_val = generate_sample_data(100)

print(f"Training data: {X1_train.shape}")
print(f"Validation data: {X1_val.shape}")

## Step 7: Compile Model

In [None]:
model.compile(
    loss='binary_crossentropy',
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=['accuracy']
)
print("âœ… Model compiled")

## Step 8: Train Model

In [None]:
print("Training model...")
history = model.fit(
    [X1_train, X2_train], y_train,
    batch_size=32,
    epochs=20,
    validation_data=([X1_val, X2_val], y_val),
    verbose=1
)
print("âœ… Training complete")

## Step 9: Evaluate Model

In [None]:
loss, accuracy = model.evaluate([X1_val, X2_val], y_val, verbose=0)
print(f"\nðŸ“Š Model Performance:")
print(f"   Loss: {loss:.4f}")
print(f"   Accuracy: {accuracy:.4f}")

## Step 10: Save Model

In [None]:
# Create models directory if it doesn't exist
os.makedirs('models', exist_ok=True)

# Save the full model
model.save('models/siamese_model.h5')
print("âœ… Model saved as siamese_model.h5")

# Also save in TensorFlow SavedModel format (recommended)
model.save('models/siamese_model')
print("âœ… Model saved in TensorFlow SavedModel format")

## Step 11: Download Model Files

Run the cell below to download the trained model

In [None]:
from google.colab import files

# Download the .h5 model
print("Downloading model files...")
files.download('models/siamese_model.h5')
print("âœ… Downloaded: siamese_model.h5")

## Instructions for Local Use

1. Download the trained model from Colab
2. Place `siamese_model.h5` in the `models/` folder
3. The API will automatically load it
4. Run: `python api.py`