<a href="https://colab.research.google.com/github/chaitanyaj2121/Comp-Multiservises-mini-project/blob/main/ca1_deeplearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Question 1: Build a Simple Neural Network
Implement a single-layer neural network using NumPy.
Train it on a small dataset (e.g., XOR function).


In [None]:
import numpy as np

# Define the XOR dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Inputs
y = np.array([[0], [1], [1], [0]])              # Outputs

# Define parameters
input_size = 2
output_size = 1
learning_rate = 0.1
iterations = 10000

# Initialize weights and bias
weights = np.random.uniform(-1, 1, (input_size, output_size))
bias = np.random.uniform(-1, 1, (1, output_size))

# Sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of the sigmoid function
def sigmoid_derivative(x):
    return x * (1 - x)

# Training the network
for epoch in range(iterations):
    # Forward propagation
    weighted_sum = np.dot(X, weights) + bias
    predictions = sigmoid(weighted_sum)

    # Calculate error
    error = y - predictions

    # Backpropagation
    d_pred = error * sigmoid_derivative(predictions)

    # Update weights and bias
    weights += np.dot(X.T, d_pred) * learning_rate
    bias += np.sum(d_pred, axis=0, keepdims=True) * learning_rate

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f'Epoch {epoch}, Loss: {loss}')

# Final predictions
print("\nFinal Predictions:")
print(np.round(predictions))


# Question 2: Gradient Descent Visualization
Implement gradient descent for a simple function.
Plot the loss function over iterations to show convergence.



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define the simple function: f(x) = x^2
def function(x):
    return x ** 2

# Define the derivative of the function: f'(x) = 2x
def gradient(x):
    return 2 * x

# Parameters
learning_rate = 0.1
iterations = 50
initial_x = 5  # Starting point

# Lists to store values for plotting
x_values = []
y_values = []

# Gradient Descent Loop
x = initial_x
for i in range(iterations):
    x_values.append(x)
    y_values.append(function(x))

    # Gradient descent update
    x = x - learning_rate * gradient(x)

# Plot the loss function over iterations
plt.figure(figsize=(8, 5))
plt.plot(range(iterations), y_values, 'b-', marker='o')
plt.title('Gradient Descent Convergence')
plt.xlabel('Iterations')
plt.ylabel('Loss (f(x) = x^2)')
plt.grid(True)
plt.show()


# Question 3: Image Classification Using CNN
Train a CNN model on a small dataset (e.g., CIFAR-10 or MNIST).
Compare performance with and without data augmentation.



In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape the data to fit CNN input
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# One-hot encode the labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Define CNN model
def create_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Model without Data Augmentation
model_no_aug = create_model()
history_no_aug = model_no_aug.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(x_train)

# Model with Data Augmentation
model_with_aug = create_model()
history_with_aug = model_with_aug.fit(datagen.flow(x_train, y_train, batch_size=64),
                                      epochs=10,
                                      validation_data=(x_test, y_test))

# Compare Performance
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history_no_aug.history['accuracy'], label='No Augmentation')
plt.plot(history_with_aug.history['accuracy'], label='With Augmentation')
plt.title('Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history_no_aug.history['val_accuracy'], label='No Augmentation')
plt.plot(history_with_aug.history['val_accuracy'], label='With Augmentation')
plt.title('Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()


In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, applications, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import cv2
import time
from sklearn.model_selection import train_test_split

# Configuration
CONFIG = {
    'dataset': 'custom',     # Options: 'coco', 'voc', 'custom'
    'model_type': 'ssd',     # Options: 'yolo', 'ssd', 'faster_rcnn', 'custom'
    'image_size': (300, 300),
    'batch_size': 16,
    'epochs': 50,
    'learning_rate': 0.001,
    'data_augmentation': True,
    'pretrained': True,
    'num_classes': 3,        # Excluding background class
}

# 1. Dataset Preprocessing
def preprocess_dataset(dataset_path, image_size):
    """
    Preprocess the dataset for object detection tasks

    Args:
        dataset_path: Path to the dataset
        image_size: Target image size for the model

    Returns:
        Processed images and annotations
    """
    # For demonstration purposes, let's assume we have a simple dataset structure
    # with images and corresponding annotation files
    images = []
    annotations = []

    # In a real implementation, you would load your dataset from files
    # This is just a placeholder for the actual data loading code
    print(f"Loading dataset from {dataset_path}...")

    # Simulating dataset loading
    if CONFIG['dataset'] == 'custom':
        # Example for a custom dataset with 3 classes (e.g., person, car, dog)
        # In a real scenario, you would parse annotation files (XML, JSON, etc.)
        print("Creating synthetic data for demonstration...")

        # Create synthetic data for demonstration
        num_samples = 100
        X = np.random.randint(0, 255, (num_samples, *image_size, 3), dtype=np.uint8)

        # Generate synthetic annotations (bounding boxes)
        y = []
        for _ in range(num_samples):
            num_objects = np.random.randint(1, 4)
            sample_boxes = []

            for _ in range(num_objects):
                # Format: [class_id, x_min, y_min, x_max, y_max, confidence]
                class_id = np.random.randint(0, CONFIG['num_classes'])
                width = np.random.randint(50, 150)
                height = np.random.randint(50, 150)
                x_min = np.random.randint(0, image_size[0] - width)
                y_min = np.random.randint(0, image_size[1] - height)

                box = [
                    class_id,
                    x_min / image_size[0],
                    y_min / image_size[1],
                    (x_min + width) / image_size[0],
                    (y_min + height) / image_size[1],
                    1.0  # Confidence
                ]
                sample_boxes.append(box)

            y.append(np.array(sample_boxes))

        return X, y

    elif CONFIG['dataset'] in ['coco', 'voc']:
        print(f"In a real implementation, you would load the {CONFIG['dataset']} dataset here.")
        # Placeholder for loading COCO or VOC datasets
        # You would use libraries like pycocotools for COCO
        # or parse XML annotations for VOC

        # For demonstration, creating synthetic data
        num_samples = 100
        X = np.random.randint(0, 255, (num_samples, *image_size, 3), dtype=np.uint8)

        # Generate synthetic annotations
        y = []
        for _ in range(num_samples):
            num_objects = np.random.randint(1, 4)
            sample_boxes = []

            for _ in range(num_objects):
                class_id = np.random.randint(0, CONFIG['num_classes'])
                width = np.random.randint(50, 150)
                height = np.random.randint(50, 150)
                x_min = np.random.randint(0, image_size[0] - width)
                y_min = np.random.randint(0, image_size[1] - height)

                box = [
                    class_id,
                    x_min / image_size[0],
                    y_min / image_size[1],
                    (x_min + width) / image_size[0],
                    (y_min + height) / image_size[1],
                    1.0
                ]
                sample_boxes.append(box)

            y.append(np.array(sample_boxes))

        return X, y

    print("Dataset loaded and preprocessed")
    return images, annotations

# 2. Build or Load Model
def get_model(model_type, input_shape, num_classes, pretrained=True):
    """
    Build or load a pre-trained object detection model

    Args:
        model_type: Type of model ('yolo', 'ssd', 'faster_rcnn', 'custom')
        input_shape: Input image shape
        num_classes: Number of object classes to detect
        pretrained: Whether to use pre-trained weights

    Returns:
        Object detection model
    """
    if model_type == 'custom':
        # Build a custom CNN for object detection
        model = models.Sequential([
            layers.Input(shape=input_shape),

            # Feature extraction layers
            layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2)),

            # Detection layers - simplified for demonstration
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.Conv2D((num_classes + 5) * 5, (1, 1), padding='same')  # 5 anchor boxes, each with (x,y,w,h,conf,classes)
        ])

        print("Custom CNN model created")
        return model

    elif model_type == 'ssd':
        # In a real implementation, you would use a proper SSD implementation
        # This is a simplified placeholder that demonstrates the concept

        # Use MobileNetV2 as the base model
        base_model = applications.MobileNetV2(
            input_shape=(*input_shape, 3),
            include_top=False,
            weights='imagenet' if pretrained else None
        )

        # Freeze the base model layers
        base_model.trainable = False

        # Add SSD detection heads (simplified for demonstration)
        inputs = layers.Input(shape=(*input_shape, 3))
        x = preprocess_input(inputs)
        x = base_model(x)

        # Detection head
        # In a real SSD implementation, you would add multiple detection heads
        # on feature maps of different scales
        x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
        output = layers.Conv2D((num_classes + 5) * 6, (3, 3), padding='same')(x)

        model = models.Model(inputs=inputs, outputs=output)
        print("SSD model (simplified) with MobileNetV2 backbone created")
        return model

    elif model_type == 'yolo':
        # In a real implementation, you would use a proper YOLO implementation
        print("In a real implementation, you would load or implement YOLO here")

        # Placeholder for demonstration
        base_model = applications.MobileNetV2(
            input_shape=(*input_shape, 3),
            include_top=False,
            weights='imagenet' if pretrained else None
        )

        # Freeze the base model layers
        base_model.trainable = False

        # Add YOLO-style detection head (simplified)
        inputs = layers.Input(shape=(*input_shape, 3))
        x = preprocess_input(inputs)
        x = base_model(x)

        # Detection head
        x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
        output = layers.Conv2D((num_classes + 5) * 3, (1, 1), padding='same')(x)  # 3 anchor boxes

        model = models.Model(inputs=inputs, outputs=output)
        print("YOLO-style model (simplified) with MobileNetV2 backbone created")
        return model

    elif model_type == 'faster_rcnn':
        # In a real implementation, you would use a proper Faster R-CNN implementation
        print("In a real implementation, you would load or implement Faster R-CNN here")

        # Placeholder for demonstration
        print("Faster R-CNN is more complex and requires region proposal network")
        print("Using a simplified placeholder model for demonstration")

        # Use ResNet50 as the backbone
        base_model = applications.ResNet50(
            input_shape=(*input_shape, 3),
            include_top=False,
            weights='imagenet' if pretrained else None
        )

        # Freeze the base model layers
        base_model.trainable = False

        # Simplified detection head
        inputs = layers.Input(shape=(*input_shape, 3))
        x = preprocess_input(inputs)
        x = base_model(x)

        # RPN and detection head (simplified)
        x = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x)
        output = layers.Conv2D((num_classes + 5) * 9, (1, 1), padding='same')(x)  # 9 anchor boxes

        model = models.Model(inputs=inputs, outputs=output)
        print("Faster R-CNN-style model (simplified) with ResNet50 backbone created")
        return model

# 3. Data Augmentation
def create_data_generators(X, y, image_size, batch_size, augmentation=True):
    """
    Create data generators for training and validation with optional augmentation

    Args:
        X: Images
        y: Annotations
        image_size: Target image size
        batch_size: Batch size
        augmentation: Whether to apply data augmentation

    Returns:
        Training and validation data generators
    """
    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # In a real implementation, you would create a custom data generator
    # that applies augmentation while preserving bounding box coordinates

    print(f"Training set: {len(X_train)} samples")
    print(f"Validation set: {len(X_val)} samples")

    if augmentation:
        print("Applying data augmentation techniques:")
        print("- Random horizontal flip")
        print("- Random brightness and contrast adjustment")
        print("- Random translation")
        # Note: In a real implementation, you would ensure bounding boxes are transformed accordingly

    # Return the training and validation data
    # In a real implementation, this would be actual generators
    return (X_train, y_train), (X_val, y_val)

# 4. Custom Loss Function
def detection_loss(y_true, y_pred):
    """
    Simplified object detection loss function

    In a real implementation, this would be a proper object detection loss
    like SSD loss, YOLO loss, or Faster R-CNN loss

    Args:
        y_true: Ground truth annotations
        y_pred: Model predictions

    Returns:
        Loss value
    """
    # This is a placeholder for demonstration
    # Real object detection losses are much more complex
    print("In a real implementation, you would use a proper object detection loss")

    # Placeholder loss function
    # Classification loss (simplified)
    class_loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    # Regression loss for bounding boxes (simplified)
    box_loss = tf.keras.losses.MeanSquaredError()

    # Combination of losses
    # total_loss = class_loss + box_loss
    # In practice, you would extract class predictions and box predictions
    # from y_pred and apply the appropriate loss to each

    return y_pred  # Placeholder

# 5. Training Function
def train_model(model, train_data, val_data, epochs, learning_rate):
    """
    Train the object detection model

    Args:
        model: Object detection model
        train_data: Training data
        val_data: Validation data
        epochs: Number of training epochs
        learning_rate: Learning rate

    Returns:
        Trained model and training history
    """
    print(f"Training model for {epochs} epochs with learning rate {learning_rate}")

    # Set up optimizer
    optimizer = optimizers.Adam(learning_rate=learning_rate)

    # Compile the model
    # Note: In a real implementation, you would use a proper object detection loss
    model.compile(
        optimizer=optimizer,
        loss='mse',  # Placeholder - would use actual detection loss in real implementation
        metrics=['accuracy']  # Not directly applicable to object detection, but used for demo
    )

    # Set up callbacks
    callbacks = [
        ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss'),
        EarlyStopping(patience=10, monitor='val_loss'),
        ReduceLROnPlateau(factor=0.1, patience=5, monitor='val_loss')
    ]

    # In a real implementation, you would feed your data through a proper generator
    # This is a simplified placeholder for demonstration
    print("In a real implementation, you would train with actual data")
    print("Simulating training process...")

    # Simulated training history
    history = {
        'loss': [np.random.random() * 0.5 for _ in range(min(epochs, 10))],
        'val_loss': [np.random.random() * 0.6 for _ in range(min(epochs, 10))],
        'accuracy': [0.5 + np.random.random() * 0.4 for _ in range(min(epochs, 10))],
        'val_accuracy': [0.5 + np.random.random() * 0.3 for _ in range(min(epochs, 10))]
    }

    # In a real implementation:
    # history = model.fit(
    #     train_generator,
    #     validation_data=val_generator,
    #     epochs=epochs,
    #     callbacks=callbacks
    # )

    print("Model training completed (simulated)")
    return model, history

# 6. Evaluation Function
def evaluate_model(model, test_data):
    """
    Evaluate the model performance

    Args:
        model: Trained object detection model
        test_data: Test data

    Returns:
        Evaluation metrics
    """
    print("Evaluating model performance...")

    # In a real implementation, you would:
    # 1. Run inference on test data
    # 2. Calculate mAP (mean Average Precision) and other metrics

    print("In a real implementation, you would calculate mAP")
    print("Simulating evaluation process...")

    # Simulated mAP values
    mAP = 0.65 + np.random.random() * 0.2
    class_APs = {
        'class_0': 0.7 + np.random.random() * 0.2,
        'class_1': 0.6 + np.random.random() * 0.2,
        'class_2': 0.5 + np.random.random() * 0.3
    }

    print(f"Simulated mAP: {mAP:.4f}")
    for class_name, ap in class_APs.items():
        print(f"AP for {class_name}: {ap:.4f}")

    return {'mAP': mAP, 'class_APs': class_APs}

# 7. Inference Function
def perform_inference(model, image, image_size, conf_threshold=0.5):
    """
    Perform object detection on a single image

    Args:
        model: Trained object detection model
        image: Input image
        image_size: Target image size
        conf_threshold: Confidence threshold for detections

    Returns:
        Detected objects (class, box coordinates, confidence)
    """
    # Preprocess the image
    img = cv2.resize(image, image_size)
    img = np.expand_dims(img, axis=0)  # Add batch dimension

    # Perform inference
    print("In a real implementation, you would run the model on the input image")
    print("Simulating inference process...")

    # Simulated detections
    # Format: [class_id, x_min, y_min, x_max, y_max, confidence]
    num_detections = np.random.randint(1, 5)
    detections = []

    for _ in range(num_detections):
        class_id = np.random.randint(0, CONFIG['num_classes'])
        conf = 0.5 + np.random.random() * 0.5

        if conf >= conf_threshold:
            width = np.random.randint(50, 150)
            height = np.random.randint(50, 150)
            x_min = np.random.randint(0, image_size[0] - width)
            y_min = np.random.randint(0, image_size[1] - height)

            detection = [
                class_id,
                x_min / image_size[0],
                y_min / image_size[1],
                (x_min + width) / image_size[0],
                (y_min + height) / image_size[1],
                conf
            ]
            detections.append(detection)

    return np.array(detections)

# 8. Visualization Function
def visualize_detections(image, detections, class_names, image_size):
    """
    Visualize detected objects on the image

    Args:
        image: Input image
        detections: Detected objects
        class_names: Names of the object classes
        image_size: Image size

    Returns:
        Image with drawn bounding boxes
    """
    # Resize image if needed
    if image.shape[:2] != image_size:
        image = cv2.resize(image, image_size)

    # Create a copy of the image for drawing
    img = image.copy()

    # Define colors for different classes
    colors = [
        (255, 0, 0),    # Red
        (0, 255, 0),    # Green
        (0, 0, 255),    # Blue
        (255, 255, 0),  # Yellow
        (255, 0, 255),  # Magenta
        (0, 255, 255),  # Cyan
    ]

    # Draw each detection
    for det in detections:
        class_id, x_min, y_min, x_max, y_max, conf = det

        # Convert normalized coordinates to pixel coordinates
        x_min = int(x_min * image_size[0])
        y_min = int(y_min * image_size[1])
        x_max = int(x_max * image_size[0])
        y_max = int(y_max * image_size[1])

        # Get color for this class
        color = colors[int(class_id) % len(colors)]

        # Draw bounding box
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)

        # Draw label
        class_name = class_names[int(class_id)]
        label = f"{class_name}: {conf:.2f}"
        cv2.putText(img, label, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return img

# 9. Main Function
def main():
    """
    Main function to run the object detection pipeline
    """
    print("Object Detection using CNN")
    print(f"Configuration: {CONFIG}")

    # Step 1: Dataset Preprocessing
    print("\n1. Dataset Preprocessing")
    dataset_path = "./data"  # Placeholder path
    images, annotations = preprocess_dataset(dataset_path, CONFIG['image_size'])

    # Step 2: Data Augmentation
    print("\n2. Data Augmentation")
    train_data, val_data = create_data_generators(
        images, annotations, CONFIG['image_size'], CONFIG['batch_size'],
        augmentation=CONFIG['data_augmentation']
    )

    # Step 3: Model Implementation
    print("\n3. Model Implementation")
    model = get_model(
        CONFIG['model_type'],
        CONFIG['image_size'],
        CONFIG['num_classes'],
        pretrained=CONFIG['pretrained']
    )

    # Print model summary
    print("\nModel Summary:")
    model.summary(line_length=80)

    # Step 4: Training
    print("\n4. Training and Evaluation")
    trained_model, history = train_model(
        model, train_data, val_data, CONFIG['epochs'], CONFIG['learning_rate']
    )

    # Step 5: Evaluation
    print("\n5. Model Evaluation")
    metrics = evaluate_model(trained_model, val_data)

    # Step 6: Inference on Test Images
    print("\n6. Inference on Test Images")
    # Create a synthetic test image
    test_image = np.random.randint(0, 255, (*CONFIG['image_size'], 3), dtype=np.uint8)

    # Define class names
    class_names = [f"Class_{i}" for i in range(CONFIG['num_classes'])]

    # Run inference
    detections = perform_inference(trained_model, test_image, CONFIG['image_size'])

    # Visualize detections
    if len(detections) > 0:
        result_image = visualize_detections(test_image, detections, class_names, CONFIG['image_size'])

        # In a real implementation, you would display or save the image
        print(f"Detected {len(detections)} objects in the test image")
    else:
        print("No objects detected in the test image")

    print("\nObject Detection Project Completed")

if __name__ == "__main__":
    main()