# 🚀 Oil Spill Detection Internship Project
### Combined Weeks 1–4 Notebook (Updated with Real Training)
---

## ✅ Week 1: Data Collection

In [None]:

from google.colab import drive
drive.mount('/content/drive')

!unzip -q "/content/drive/MyDrive/Oil_Spill_Project/dataset.zip" -d /content/oil_spill_data
!ls /content/oil_spill_data


In [None]:

import os

base_dir = "/content/oil_spill_data"

print("Top-level folders:", os.listdir(base_dir))
print("\nTrain folder structure:", os.listdir(os.path.join(base_dir, "train")))


In [None]:

def count_files(path):
    total = 0
    for root, dirs, files in os.walk(path):
        total += len([f for f in files if not f.startswith('.')])
    return total

print("Train Images:", count_files(os.path.join(base_dir, "train/images")))
print("Train Masks :", count_files(os.path.join(base_dir, "train/masks")))
print("Val Images  :", count_files(os.path.join(base_dir, "val/images")))
print("Val Masks   :", count_files(os.path.join(base_dir, "val/masks")))
print("Test Images :", count_files(os.path.join(base_dir, "test/images")))
print("Test Masks  :", count_files(os.path.join(base_dir, "test/masks")))


In [None]:

import cv2, matplotlib.pyplot as plt, random

img_dir = os.path.join(base_dir, "train/images")
mask_dir = os.path.join(base_dir, "train/masks")

random_img = random.choice(os.listdir(img_dir))

img = cv2.imread(os.path.join(img_dir, random_img))
mask = cv2.imread(os.path.join(mask_dir, random_img.replace(".jpg", ".png")), cv2.IMREAD_GRAYSCALE)

plt.figure(figsize=(10,5))
plt.subplot(1,2,1); plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)); plt.title("Image")
plt.subplot(1,2,2); plt.imshow(mask, cmap="gray"); plt.title("Mask")
plt.show()


## ✅ Week 2: Data Exploration & Preprocessing

In [None]:

import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def preprocess_image(image, size=(256,256)):
    image = cv2.resize(image, size)
    image = image / 255.0
    return image

def preprocess_mask(mask, size=(256,256)):
    mask = cv2.resize(mask, size, interpolation=cv2.INTER_NEAREST)
    mask = mask / 255.0
    return mask

def speckle_reduction(image):
    return cv2.medianBlur(image, 3)

img_pre = preprocess_image(img)
mask_pre = preprocess_mask(mask)
img_denoised = speckle_reduction((img_pre*255).astype(np.uint8))

plt.figure(figsize=(15,5))
plt.subplot(1,3,1); plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)); plt.title("Original")
plt.subplot(1,3,2); plt.imshow(img_pre); plt.title("Resized + Normalized")
plt.subplot(1,3,3); plt.imshow(img_denoised, cmap="gray"); plt.title("After Noise Reduction")
plt.show()


In [None]:

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.8,1.2]
)

img_resized = cv2.resize(img, (256,256))
img_resized = np.expand_dims(img_resized, 0)

plt.figure(figsize=(12,8))
for i, batch in enumerate(datagen.flow(img_resized, batch_size=1)):
    plt.subplot(2,3,i+1)
    plt.imshow(batch[0].astype("uint8"))
    if i == 5: break
plt.suptitle("Augmented Samples")
plt.show()


## ✅ Week 3: Model Development (U-Net)

In [None]:

import tensorflow as tf
from tensorflow.keras import layers, models

def unet_model(input_size=(256,256,3)):
    inputs = layers.Input(input_size)

    # Encoder
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    bn = layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
    bn = layers.Conv2D(256, 3, activation='relu', padding='same')(bn)

    # Decoder
    u1 = layers.Conv2DTranspose(128, (2,2), strides=(2,2), padding='same')(bn)
    u1 = layers.concatenate([u1, c2])
    c3 = layers.Conv2D(128, 3, activation='relu', padding='same')(u1)
    c3 = layers.Conv2D(128, 3, activation='relu', padding='same')(c3)

    u2 = layers.Conv2DTranspose(64, (2,2), strides=(2,2), padding='same')(c3)
    u2 = layers.concatenate([u2, c1])
    c4 = layers.Conv2D(64, 3, activation='relu', padding='same')(u2)
    c4 = layers.Conv2D(64, 3, activation='relu', padding='same')(c4)

    outputs = layers.Conv2D(1, (1,1), activation='sigmoid')(c4)

    model = models.Model(inputs=[inputs], outputs=[outputs])
    return model

model = unet_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


## ✅ Week 4: Training & Validation (Updated with Real Dataset)

In [None]:

import glob

IMG_SIZE = 256

def load_images_and_masks(img_dir, mask_dir, size=(IMG_SIZE, IMG_SIZE)):
    images, masks = [], []
    img_files = glob.glob(os.path.join(img_dir, "*.jpg"))
    
    for img_path in img_files:
        mask_path = os.path.join(mask_dir, os.path.basename(img_path).replace(".jpg", ".png"))
        if not os.path.exists(mask_path):
            continue
        
        img = cv2.imread(img_path)
        img = cv2.resize(img, size) / 255.0
        
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask, size, interpolation=cv2.INTER_NEAREST)
        mask = (mask > 127).astype(np.float32)
        
        images.append(img)
        masks.append(np.expand_dims(mask, axis=-1))
    
    return np.array(images), np.array(masks)

X_train, y_train = load_images_and_masks(os.path.join(base_dir, "train/images"),
                                         os.path.join(base_dir, "train/masks"))
X_val, y_val = load_images_and_masks(os.path.join(base_dir, "val/images"),
                                     os.path.join(base_dir, "val/masks"))

print("Train set:", X_train.shape, y_train.shape)
print("Val set  :", X_val.shape, y_val.shape)


In [None]:

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=4
)


In [None]:

import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Training vs Validation Loss")
plt.show()

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.legend()
plt.title("Training vs Validation Accuracy")
plt.show()


In [None]:

preds = model.predict(X_val[:3])

plt.figure(figsize=(12,8))
for i in range(3):
    plt.subplot(3,3,3*i+1); plt.imshow(X_val[i]); plt.title("Image")
    plt.subplot(3,3,3*i+2); plt.imshow(y_val[i].squeeze(), cmap="gray"); plt.title("True Mask")
    plt.subplot(3,3,3*i+3); plt.imshow((preds[i].squeeze()>0.5).astype("uint8"), cmap="gray"); plt.title("Predicted Mask")
plt.show()


## 🎯 Summary
- **Week 1:** Dataset collected and verified.
- **Week 2:** Data preprocessed + augmented.
- **Week 3:** U-Net model built and compiled.
- **Week 4:** Model trained on **real dataset**, validated, and predictions visualized.
---
✅ Data and model are now ready for **Week 5–6: Evaluation & Deployment**.